Solved

How to search a string in a text file without loading it to memory? I need a function that seeks directory to the file.

Posted on 2003-11-18
6
288 Views
Last Modified: 2010-04-03
Anybody here knows how to search a string in a text file without loading it memory such as using TStringList, TStringStream, etc? It would be greatful if somebody could give me example(s).

Thanks,
Edwin
0
Comment
Question by:edwinaceron
[X]
Welcome to Experts Exchange

Add your voice to the tech community where 5M+ people just like you are talking about what matters.

  • Help others & share knowledge
  • Earn cash & points
  • Learn & ask questions
  • 4
  • 2
6 Comments
 
LVL 27

Accepted Solution

by:
kretzschmar earned 250 total points
ID: 9769860
a high performant search using a 64kb (as docu desciribes) memory map range, usefull for really large files (> 1MB up to 4GB),
if you have lesser files -> use a tsringlist and the pos function)

from my paq:

http://www.experts-exchange.com/Programming/Programming_Languages/Delphi/Q_20698441.html

---- paste accepted answer

well, was long time ago :-))

found another unit, which encapsulates memory mapped files:
(from Dmitry Streblechenko)

http://www.programmersheaven.com/d/click.aspx?ID=F2883

there are two changes to do in this unit-source

- unit-name didn't match filename
- and one EFCreateError.Create(... must be changed to EFCreateError.CreateFmt(...

did done this sample

unit mmstream_test_u;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls;

type
  TForm1 = class(TForm)
    Edit1: TEdit;
    ListBox1: TListBox;
    OpenDialog1: TOpenDialog;
    Button1: TButton;
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

var
  Form1: TForm1;

implementation

{$R *.dfm}
uses mmStream;

var mmf : TMemoryMappedFileStream;

//BinPosEx from Madshi
function BinPosEx(subStr,str: pchar; fromPos,toPos,  subStrLen,strLen: cardinal) : cardinal; assembler
asm      //       EAX    EDX         ECX     [ESP+16][ESP+12]  [ESP+8]               EAX
       TEST    EAX,EAX                  // subStr empty ?
       JE      @@noWork
       TEST    EDX,EDX                  // str empty ?
       JE      @@fail4
       TEST    ECX,ECX                  // fromPos = 0 ?
       JE      @@fail4
       PUSH    EBX
       MOV     EBX,ECX                  // EBX = fromPos
       MOV     ECX,[ESP+20]             // ECX = toPos  (+4 w/ PUSH EBX)
       TEST    ECX,ECX                  // toPos = 0 ?
       JE      @@fail3
       PUSH    ESI
       PUSH    EDI
       MOV     ESI,EAX                  // ESI = substr
       MOV     EDI,EDX                  // EDI = str
       CMP     EBX,ECX                  // fromPos > toPos ?
       JA      @@backwards
@@forwards:
       MOV     EDX,[ESP+20]             // EDX = strLen  (+12 w/ PUSH EBX+ESI+EDI)
       CMP     EBX,EDX                  // fromPos > Length(str) ?
       JA      @@fail2
       CMP     ECX,EDX                  // toPos <= Length(str) ?
       JNA     @@toPosOk
       MOV     ECX,EDX                  // toPos = Length(str)
@@toPosOk:
       MOV     EDX,[ESP+24]             // EDX = subStrLen  (+12 w/ PUSH EBX+ESI+EDI)
       DEC     EDX                      // EDX = Length(substr) - 1
       JS      @@fail2                  // EDX < 0 ?
       PUSH    EDI                      // remember str position to calculate index
       DEC     EBX                      // dec(fromPos)
       ADD     EDI,EBX                  // "Delete (str, 1, fromPos - 1)"
       SUB     ECX,EBX                  // toPos := toPos - fromPos + 1
       SUB     ECX,EDX                  // #positions in str to look at = Length(str) - Length(substr) + 1
       JBE     @@fail1                  // #positions <= 0 ?
       MOV     AL,[ESI]                 // AL = first char of substr
       INC     ESI                      // Point ESI to 2'nd char of substr
@@fwLoop:
       REPNE   SCASB
       JNE     @@fail1
       MOV     EBX,ECX                  // save outer loop counter
       PUSH    ESI                      // save outer loop substr pointer
       PUSH    EDI                      // save outer loop str pointer
       MOV     ECX,EDX
       REPE    CMPSB
       POP     EDI                      // restore outer loop str pointer
       POP     ESI                      // restore outer loop substr pointer
       JE      @@fwFound
       MOV     ECX,EBX                  // restore outer loop counter
       JMP     @@fwLoop
@@fwFound:
       POP     EDX                      // restore pointer to first char of str
       MOV     EAX,EDI                  // EDI points of char after match
       SUB     EAX,EDX                  // the difference is the correct index
       POP     EDI
       POP     ESI
       POP     EBX
       JMP     @@noWork
@@backwards:
       MOV     EDX,[ESP+20]             // EDX = strLen  (+12 w/ PUSH EBX+ESI+EDI)
       CMP     ECX,EDX                  // toPos > Length(str) ?
       JA      @@fail2
       CMP     EBX,EDX                  // fromPos <= Length(str) ?
       JNA     @@fromPosOk
       MOV     EBX,EDX                  // fromPos = Length(str)
@@fromPosOk:
       MOV     EDX,[ESP+24]             // EDX = subStrLen  (+12 w/ PUSH EBX+ESI+EDI)
       DEC     EDX                      // EDX = Length(substr) - 1
       JS      @@fail2                  // EDX < 0 ?
       MOV     EAX,EDI                  // remember str position to calculate index
       ADD     EAX,EDX                  // add backwards calculation
       SUB     EAX,2
       PUSH    EAX
       DEC     ECX                      // dec(toPos)
       ADD     EDI,ECX                  // "Delete (str, 1, toPos - 1)"
       SUB     EBX,ECX                  // fromPos := fromPos - toPos + 1
       MOV     ECX,EBX                  // swap (fromPos, lastPos)
       ADD     EDI,ECX
       DEC     EDI
       ADD     ESI,EDX
       SUB     ECX,EDX                  // #positions in str to look at = Length(str) - Length(substr) + 1
       JBE     @@fail1                  // #positions <= 0 ?
       MOV     AL,[ESI]                 // AL = first char of substr
       DEC     ESI                      // Point ESI to 2'nd char of substr
       STD
@@bwLoop:
       REPNE   SCASB
       JNE     @@fail0
       MOV     EBX,ECX                  // save outer loop counter
       PUSH    ESI                      // save outer loop substr pointer
       PUSH    EDI                      // save outer loop str pointer
       MOV     ECX,EDX
       REPE    CMPSB
       POP     EDI                      // restore outer loop str pointer
       POP     ESI                      // restore outer loop substr pointer
       JE      @@bwFound
       MOV     ECX,EBX                  // restore outer loop counter
       JMP     @@bwLoop
@@bwFound:
       POP     EDX                      // restore pointer to first char of str + backwards calculation
       MOV     EAX,EDI                  // EDI points of char after match
       SUB     EAX,EDX                  // the difference is the correct index
       CLD
       POP     EDI
       POP     ESI
       POP     EBX
       JMP     @@noWork
@@fail0:
       CLD
@@fail1:
       POP     EDX                      // get rid of saved str pointer
@@fail2:
       POP     EDI
       POP     ESI
@@fail3:
       POP     EBX
@@fail4:
       XOR     EAX,EAX
@@noWork:
end;


procedure TForm1.Button1Click(Sender: TObject);
var
 sl : TstringList;
 P : PChar;
 i,l : Integer;
begin
 P := Nil;
 if Opendialog1.Execute then
 begin
   try
     sl := TstringList.Create;
     mmf := TMemoryMappedFileStream.Create(OPendialog1.Filename,'',fmOpenReadWrite);
     l := length(edit1.text);
     P := PChar(edit1.text);
     i := 0;
     repeat
       i := BinPosEx(P,PChar(mmf.memory),i+1,mmf.size,l,mmf.size);
       if i > 0 then sl.Add(inttostr(i));
     until i = 0;
     Listbox1.Items.Assign(sl);
   finally
     mmf.Free;
     sl.free;
   end;
 end;
end;

end.

atleast you should visit madshi's site
www.madshi.net

and get the mad..-librarys there

---------- paste end

meikl ;-)
0
 

Expert Comment

by:mece
ID: 9770246
got some errors at mmstream.pas
why ??
0
 
LVL 27

Expert Comment

by:kretzschmar
ID: 9770261
see above

there are two changes to do in this unit-source

- unit-name didn't match filename
- and one EFCreateError.Create(... must be changed to EFCreateError.CreateFmt(...

0
Industry Leaders: We Want Your Opinion!

We value your feedback.

Take our survey and automatically be enter to win anyone of the following:
Yeti Cooler, Amazon eGift Card, and Movie eGift Card!

 

Expert Comment

by:mece
ID: 9771133
good example,
thanx
0
 
LVL 27

Expert Comment

by:kretzschmar
ID: 9777166
to edwinaceron,
does it help?
any questions about this?
0
 
LVL 27

Expert Comment

by:kretzschmar
ID: 9796437
to edwinaceron,
are you still alive?
0

Featured Post

Technology Partners: We Want Your Opinion!

We value your feedback.

Take our survey and automatically be enter to win anyone of the following:
Yeti Cooler, Amazon eGift Card, and Movie eGift Card!

Question has a verified solution.

If you are experiencing a similar issue, please ask a related question

This article explains how to create forms/units independent of other forms/units object names in a delphi project. Have you ever created a form for user input in a Delphi project and then had the need to have that same form in a other Delphi proj…
In my programming career I have only very rarely run into situations where operator overloading would be of any use in my work.  Normally those situations involved math with either overly large numbers (hundreds of thousands of digits or accuracy re…
Michael from AdRem Software explains how to view the most utilized and worst performing nodes in your network, by accessing the Top Charts view in NetCrunch network monitor (https://www.adremsoft.com/). Top Charts is a view in which you can set seve…
In this video you will find out how to export Office 365 mailboxes using the built in eDiscovery tool. Bear in mind that although this method might be useful in some cases, using PST files as Office 365 backup is troublesome in a long run (more on t…
Suggested Courses
Course of the Month7 days, 15 hours left to enroll

617 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question