Solved

Could someone convert this c source to delphi?

Posted on 2004-03-23
17
465 Views
Last Modified: 2010-04-05
I'm having a problem converting codes from c to delphi. Could someone convert the codes below?


#define KEY_LEN          (1024)
//#ifdef UNIX
//#define LINE_LEN     (1024*25)
//#endif
//#ifdef MAC
//#define LINE_LEN     (1024*8)
//#endif
//#ifdef PC
#define LINE_LEN     (1024*25)
//#endif

static char line[LINE_LEN];
long last_bin_search_offset = 0;

char *bin_search(char *searchkey, FILE *fp)
{
    int c;
    long top, mid, bot, diff;
    char *linep, key[KEY_LEN];
    int length;

    diff=666;
    linep = line;
    line[0] = '\0';

    fseek(fp, 0L, 2);
    top = 0;
    bot = ftell(fp);
    mid = (bot - top) / 2;

    do {
     fseek(fp, mid - 1, 0);
     if(mid != 1)
         while((c = getc(fp)) != '\n' && c != EOF);
        last_bin_search_offset = ftell( fp );
     fgets(linep, LINE_LEN, fp);
     length = (int)(strchr(linep, ' ') - linep);
     strncpy(key, linep, length);
     key[length] = '\0';
     if(strcmp(key, searchkey) < 0) {
         top = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
     if(strcmp(key, searchkey) > 0) {
         bot = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
    } while((strcmp(key, searchkey)) && (diff != 0));
   
    if(!strcmp(key, searchkey))
     return(line);
    else
     return(NULL);
}

I'm going to use the above code in searching a large file (4MB+) without loading it to memory. I tried using TFileStream and found out that it wouldn't load the entire file into memory but the problem is my existing binarysearch uses TStringList to do searching. So anybody has an idea on how to search a string in a large file without compromising the speed?

0
Comment
Question by:edeaux
[X]
Welcome to Experts Exchange

Add your voice to the tech community where 5M+ people just like you are talking about what matters.

  • Help others & share knowledge
  • Earn cash & points
  • Learn & ask questions
  • 9
  • 4
17 Comments
 

Author Comment

by:edeaux
ID: 10664814
This is the complete code. All i need is a binary search function. it would be better if the entire source code is converted with only the points i've gave :)
/*

  binsearch.c - general binary search functions

*/

#include <stdio.h>
#include <string.h>

/* Binary search - looks for the key passed at the start of a line
   in the file associated with open file descriptor fp, and returns
   a buffer containing the line in the file. */

#define KEY_LEN          (1024)
//#ifdef UNIX
//#define LINE_LEN     (1024*25)
//#endif
//#ifdef MAC
//#define LINE_LEN     (1024*8)
//#endif
//#ifdef PC
#define LINE_LEN     (1024*25)
//#endif

static char line[LINE_LEN];
long last_bin_search_offset = 0;

/* General purpose binary search function to search for key as first
   item on line in open file.  Item is delimited by space. */

#undef getc

void main()
{}

char *read_index(long offset, FILE *fp) {
    char *linep;

    linep = line;
    line[0] = '0';

    fseek( fp, offset, SEEK_SET );
    fgets(linep, LINE_LEN, fp);
    return(line);
}
     
char *bin_search(char *searchkey, FILE *fp)
{
    int c;
    long top, mid, bot, diff;
    char *linep, key[KEY_LEN];
    int length;

    diff=666;
    linep = line;
    line[0] = '\0';

    fseek(fp, 0L, 2);
    top = 0;
    bot = ftell(fp);
    mid = (bot - top) / 2;

    do {
     fseek(fp, mid - 1, 0);
     if(mid != 1)
         while((c = getc(fp)) != '\n' && c != EOF);
        last_bin_search_offset = ftell( fp );
     fgets(linep, LINE_LEN, fp);
     length = (int)(strchr(linep, ' ') - linep);
     strncpy(key, linep, length);
     key[length] = '\0';
     if(strcmp(key, searchkey) < 0) {
         top = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
     if(strcmp(key, searchkey) > 0) {
         bot = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
    } while((strcmp(key, searchkey)) && (diff != 0));
   
    if(!strcmp(key, searchkey))
     return(line);
    else
     return(NULL);
}

static long offset;

static int bin_search_key(char *searchkey, FILE *fp)
{
    int c;
    long top, mid, bot, diff;
    char *linep, key[KEY_LEN];
    int length, offset1, offset2;

    /* do binary search to find correct place in file to insert line */

    diff=666;
    linep = line;
    line[0] = '\0';

    fseek(fp, 0L, 2);
    top = 0;
    bot = ftell(fp);
    if (bot == 0) {
     offset = 0;
     return(0);          /* empty file */
    }
    mid = (bot - top) / 2;

    /* If only one line in file, don't work through loop */

    length = 0;
    rewind(fp);
    while((c = getc(fp)) != '\n' && c != EOF)
     line[length++] =  c;
    if (getc(fp) == EOF) {     /* only 1 line in file */
     length = (int)(strchr(linep, ' ') - linep);
     strncpy(key, linep, length);
     key[length] = '\0';
     if(strcmp(key, searchkey) > 0) {
         offset = 0;
         return(0);          /* line with key is not found */
     } else if (strcmp(key, searchkey) < 0) {
         offset = ftell(fp);
         return(0);          /* line with key is not found */
     } else {
         offset = 0;
         return(1);          /* line with key is found */
     }
    }

    do {
     fseek(fp, mid - 1, 0);
     if(mid != 1)
         while((c = getc(fp)) != '\n' && c != EOF);
     offset1 = ftell(fp);     /* offset at start of line */
     if (fgets(linep, LINE_LEN, fp) != NULL) {
           offset2 = ftell(fp); /* offset at start of next line */
         length = (int)(strchr(linep, ' ') - linep);
         strncpy(key, linep, length);
         key[length] = '\0';
         if(strcmp(key, searchkey) < 0) {     /* further in file */
          top = mid;
          diff = (bot - top) / 2;
          mid = top + diff;
          offset = offset2;
         }
         if(strcmp(key, searchkey) > 0) {     /* earlier in file */
          bot = mid;
          diff = (bot - top) / 2;
          mid = top + diff;
          offset = offset1;
         }
     } else {
         bot = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
    } while((strcmp(key, searchkey)) && (diff != 0));

    if(!strcmp(key, searchkey)) {
     offset = offset1;     /* get to start of current line */
     return(1);          /* line with key is found */
    } else
     return(0);          /* line with key is not found */
}

/* Copy contents from one file to another. */

void copyfile(FILE *fromfp, FILE *tofp)
{
    int c;

    while ((c = getc(fromfp)) != EOF)
     putc(c, tofp);
}

/* Function to replace a line in a file.  Returns the original line,
   or NULL in case of error. */

char *replace_line(char *new_line, char *searchkey, FILE *fp)
{
    FILE *tfp;               /* temporary file pointer */

    if (!bin_search_key(searchkey, fp))
     return(NULL);          /* line with key not found */

    if ((tfp = tmpfile()) == NULL)
     return(NULL);          /* could not create temp file */
    fseek(fp, offset, 0);
    fgets(line, LINE_LEN, fp);     /* read original */
    copyfile(fp, tfp);
    if (fseek(fp, offset, 0) == -1)
     return(NULL);          /* could not seek to offset */
    fprintf(fp, new_line);     /* write line */
    rewind(tfp);
    copyfile(tfp, fp);

    fclose(tfp);
    fflush(fp);

    return(line);
}

/* Find location to insert line at in file.  If line with this
   key is already in file, return NULL. */

char *insert_line(char *new_line, char *searchkey, FILE *fp)
{
    FILE *tfp;

    if (bin_search_key(searchkey, fp))
     return(NULL);
   
    if ((tfp = tmpfile()) == NULL)
     return(NULL);          /* could not create temp file */
    if (fseek(fp, offset, 0) == -1)
     return(NULL);          /* could not seek to offset */
    copyfile(fp, tfp);
    if (fseek(fp, offset, 0) == -1)
     return(NULL);          /* could not seek to offset */
    fprintf(fp, new_line);     /* write line */
    rewind(tfp);
    copyfile(tfp, fp);

    fclose(tfp);
    fflush(fp);

    return(new_line);
}
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10672192
Here is the part bin_search part. I implemented using TStream so it really doesn't matter if your are working with files or whatever. Also notable I scan until #10 (so windows #13#10 works) (instead of unix CRLF).

Regards Jacco

(still working on the rest)

uses
  Math;

const
  KeyLen  = 1024;
  LineLen = 25 * 1024;

var
  Line: array[0..LineLen-1] of Char;

function BinSearch(aSearchKey: string; aStream: TStream): Boolean;
var
  liTop, liMid, liBot, liDiff: Int64;
  liData: Byte;
  lsKey: string;
begin

  aStream.Seek(0, soEnd);
  liTop := 0;
  liBot := aStream.Position;
  liMid := (liBot - liTop) div 2;

  repeat
    aStream.Seek(liMid - 1, soBeginning);
    if liMid <> 1 then
      while (aStream.Read(liData, 1) = 1) and (liData <> 10) do ;
    aStream.Read(Line, LineLen);
    lsKey := Copy(Line, 1, Pos(' ', Line) - 1);
    case Sign(CompareText(aSearchKey, lsKey)) of
      1: begin
        liTop  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
     -1: begin
        liBot  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
    end;
    Result := SameText(aSearchKey, lsKey);
  until Result or (liDiff = 0);
end;

procedure TForm1.Button1Click(Sender: TObject);
var
  FS: TFileStream;
begin
  FS := TFileStream.Create('File1.txt', fmOpenRead);
  try
    if BinSearch('abc', FS) then
      ShowMessage('abc');
    if BinSearch('zksjfd', FS) then
      ShowMessage('zksjfd');
  finally
    FS.Free;
  end;
end;
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10672695
Here is the rest :-)

It tested it a bit and it seems to work correctly. I changed the implementation to a class. It is not a literal translation of the C source you gave but anyway. I did not implement the " bin_search_key" but the "bin_search" does what it did also. The code might need some safeguards still. Let me know if you think this is useful.

Regards Jacco

uses
  Math;

const
  KeyLen  = 1024;
  LineLen = 25 * 1024;

type
  TDictionary = class
  private
    fStream: TStream;
    fLine: array[0..LineLen-1] of Char;
  public
    constructor Create(aStream: TStream);
    function BinSearch(aSearchKey: string): Boolean;
    function DoLine(aLine, aSearchKey: string; aSkipLine: Boolean = False): Boolean;
    function InsertLine(aLine, aSearchKey: string): Boolean;
    function ReplaceLine(aLine, aSearchKey: string): Boolean;
    procedure WriteString(const aString: string);
    procedure SkipLine;
  end;

{ TDictionary }

constructor TDictionary.Create(aStream: TStream);
begin
  fStream := aStream;
end;

procedure TDictionary.WriteString(const aString: string);
begin
  fStream.Write(Pointer(aString)^, Length(aString));
end;

procedure TDictionary.SkipLine;
var
  lChar: Char;
begin
  while (fStream.Read(lChar, 1) = 1) and (lChar <> #10) do ;
end;

function TDictionary.BinSearch(aSearchKey: string): Boolean;
var
  liTop, liMid, liBot, liDiff, liOffset: Int64;
  lsKey: string;
begin
  liTop := 0;
  liBot := fStream.Size;
  liMid := (liBot - liTop) div 2;
  repeat
    fStream.Seek(liMid - 1, soBeginning);
    if liMid <> 1 then
      SkipLine;
    liOffset := fStream.Position;
    fStream.Read(fLine, LineLen);
    lsKey := Copy(fLine, 1, Pos(' ', fLine) - 1);
    case Sign(CompareText(aSearchKey, lsKey)) of
      1: begin
        liTop  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
     -1: begin
        liBot  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
    end;
    Result := SameText(aSearchKey, lsKey);
  until Result or (liDiff = 0);
  fStream.Position := liOffset;
  if not Result then
    SkipLine;
end;

function TDictionary.DoLine(aLine, aSearchKey: string; aSkipLine: Boolean): Boolean;
var
  TS: TMemoryStream;
  liPos: Int64;
  lChar: Char;
begin
  TS := nil;
  try
    liPos := fStream.Position;
    if aSkipLine then
      SkipLine;
    if fStream.Size - fStream.Position > 0 then
    begin
      TS := TMemoryStream.Create;
      TS.CopyFrom(fStream, fStream.Size - fStream.Position);
    end;
    fStream.Position := liPos;
    WriteString(aSearchKey + #32 + aLine + #13#10);
    if Assigned(TS) then
      fStream.CopyFrom(TS, 0);
  finally
    TS.Free;
  end;
end;

function TDictionary.InsertLine(aLine, aSearchKey: string): Boolean;
begin
  Result := not BinSearch(aSearchKey);
  if Result then
    DoLine(aLine, aSearchKey);
end;

function TDictionary.ReplaceLine(aLine, aSearchKey: string): Boolean;
begin
  Result := BinSearch(aSearchKey);
  if Result then
    DoLine(aLine, aSearchKey, True);
end;

procedure TForm1.Button1Click(Sender: TObject);
var
  FS: TFileStream;
  D: TDictionary;
begin
  FS := TFileStream.Create('File1.txt', fmOpenReadWrite);
  try
    D := TDictionary.Create(FS);
    try
      if D.BinSearch('abc') then
        ShowMessage('abc');
      if D.BinSearch('zksjfd') then
        ShowMessage('zksjfd');
      D.ReplaceLine('jaccoabc', 'zzzmaya');
    finally
      D.Free;
    end;
  finally
    FS.Free;
  end;
end;
0
Industry Leaders: We Want Your Opinion!

We value your feedback.

Take our survey and automatically be enter to win anyone of the following:
Yeti Cooler, Amazon eGift Card, and Movie eGift Card!

 

Author Comment

by:edeaux
ID: 10674544
Thanks for the post :)
I tried your code and it worked. I compared it with my existing binary search using tstringlist and the result was my existing search is faster than yours but mine used almost 10MB of memory (data is (+)(-)10MB). Do you have a could that searches faster using TFileStream or any method that wouldn't require loading all data into memory?
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10674683
I did not optimize the code for speed :-) I merely converted it.

One small optimization is the following (comparing the strings only one time):

function TDictionary.BinSearch(aSearchKey: string): Boolean;
var
  liTop, liMid, liBot, liDiff, liOffset: Int64;
  lsKey: string;
begin
  liTop := 0;
  liBot := fStream.Size;
  liMid := (liBot - liTop) div 2;
  repeat
    fStream.Seek(liMid - 1, soBeginning);
    if liMid <> 1 then
      SkipLine;
    liOffset := fStream.Position;
    fStream.Read(fLine, LineLen);
    lsKey := Copy(fLine, 1, Pos(' ', fLine) - 1);
    case Sign(CompareText(aSearchKey, lsKey)) of
      1: begin
        liTop  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
     -1: begin
        liBot  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
      else Result := True;
    end;
  until Result or (liDiff = 0);
  fStream.Position := liOffset;
  if not Result then
    SkipLine;
end;

Other optimisations could include buffering 256KB of data. But since we skip "binary" through the file it won't help much. The SkipLine would benefit from it because it does a per character file access. These optimisations will never render a method faster then the TStringList search.

You could parse the file (in one go) and create a hash with the keys and file positions. Then only the total size of the keys would be in-mem. Let me know if you want that :-)

Regards Jacco
0
 

Author Comment

by:edeaux
ID: 10674738
of course i want that :) could u post it?
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10674797
Here is a different approach. The inserting and replacing is not in place yet.

Is this fast enough?

Regards Jacco

const
  LineLen = 25 * 1024; // too much (should be maximum line length)

type
  TFastDictionary = class
  private
    fStream: TStream;
    fHash: TStringHash;
    fLine: array[0..LineLen-1] of Char;
  public
    constructor Create(const aFileName: string);
    destructor Destroy; override;
    function ValueOf(aKey: string): string;
  end;

implementation

uses
  SysUtils;

{ TFastDictionary }

constructor TFastDictionary.Create(const aFileName: string);
var
  liPos: Integer;
  F: TextFile;
  lsLine: string;
  lsKey: string;
begin
  fHash := TStringHash.Create;
  AssignFile(F, aFileName);
  Reset(F);
  while not EOF(F) do
  begin
    ReadLn(F, lsLine);
    lsKey := Copy(lsLine, 1, Pos(' ', lsLine) - 1);
    fHash.Add(lsKey, liPos + Length(lsKey) + 1);
    Inc(liPos, Length(lsLine) + 2);
  end;
  CloseFile(F);
  fStream := TFileStream.Create(aFileName, fmOpenReadWrite);
end;

destructor TFastDictionary.Destroy;
begin
  fStream.Free;
  inherited Destroy;
end;

function TFastDictionary.ValueOf(aKey: string): string;
var
  liPos: Integer;
begin
  liPos := fHash.ValueOf(aKey);
  fStream.Seek(liPos, soBeginning);
  fStream.Read(fLine, LineLen);
  liPos := Pos(#10, fLine);
  if liPos = 0 then
    Result := fLine
  else
    Result := Copy(fLine, 1, liPos-1);
end;
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10674801
Oh yeah IniFiles and Classes need to be in the uses clause.

Testing goes like this:

procedure TForm1.Button2Click(Sender: TObject);
var
  lFD: TFastDictionary;
begin
  lFD := TFastDictionary.Create('File1.txt');
  try
    ShowMessage(lFD.ValueOf('abc'));
    ShowMessage(lFD.ValueOf('zksjfd'));
  finally
    lFD.Free;
  end;
end;

Good luck :)
0
 

Author Comment

by:edeaux
ID: 10683538
good day :)
we tried ur last post and it was great coz the speed is almost the same using tstringlist binary search. We checked the memory and it is also the same allocation with tstringlist bin search (+/-10MB) . I would like to use the Tfilestream coz it saves a lot of memory but slows down the speed of searching a word in our dictionary. could u post a faster tfilestream bin search?
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10684648
I don't understand why it uses the same amount of memory as your method since I only put the keys in the stringlist (or that what you did too?)

I will think about a faster filestream implemetation.

Regards Jacco
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10684661
To optimize the BinSearch I need maximum line/key lengths.
0
 
LVL 10

Accepted Solution

by:
Jacco earned 300 total points
ID: 10684793
Here is a version that only caches the start positions. (It uses best of both worlds). It loads them initially into a TStringList. The then BinSearch uses the FileStream but does not have to Skip to end-of-lines (because it knows where they are :-) ).

Could you test this one?

Regards Jacco

Possible optimizations:
- use a TIntegerList in stead of TStringList to prevent StrToInt/IntToStr conversions (not much)
- use next position in list to exactly load the line

unit Unit2;

interface

uses
  Classes, IniFiles;

const
  LineLen = 1024; // too much (should be maximum line length)

type
  TFastDictionary = class
  private
    fStream: TStream;
    fPositions: TStringList;
    fLine: array[0..LineLen-1] of Char;
  public
    constructor Create(const aFileName: string);
    destructor Destroy; override;
    function BinSearch(aSearchKey: string): Boolean;
  end;

implementation

uses
  Math, SysUtils;

{ TFastDictionary }

constructor TFastDictionary.Create(const aFileName: string);
var
  liPos: Integer;
  F: TextFile;
  lsLine: string;
begin
  fPositions := TStringList.Create;
  liPos := 0;
  AssignFile(F, aFileName);
  Reset(F);
  while not EOF(F) do
  begin
    fPositions.Add(IntToStr(liPos));
    ReadLn(F, lsLine);
    Inc(liPos, Length(lsLine) + 2);
  end;
  CloseFile(F);
  fStream := TFileStream.Create(aFileName, fmOpenReadWrite);
end;

destructor TFastDictionary.Destroy;
begin
  fStream.Free;
  inherited Destroy;
end;

function TFastDictionary.BinSearch(aSearchKey: string): Boolean;
var
  liTop, liMid, liBot, liDiff, liOffset: Int64;
  lsKey: string;
begin
  liTop := 0;
  liBot := fPositions.Count - 1;
  liMid := (liBot - liTop) div 2;
  repeat
    fStream.Seek(StrToInt(fPositions[liMid]), soBeginning);
    liOffset := fStream.Position;
    fStream.Read(fLine, LineLen);
    lsKey := Copy(fLine, 1, Pos(' ', fLine) - 1);
    case Sign(CompareText(aSearchKey, lsKey)) of
      1: begin
        liTop  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
     -1: begin
        liBot  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
      else Result := True;
    end;
  until Result or (liDiff = 0);
  fStream.Position := liOffset;
  if not Result then
    fStream.Position := liOffset + Pos(#10, fLine);
end;

end.
0
 

Expert Comment

by:edwinaceron
ID: 10694969
thanks, man :)
i'll try it later coz i'm still working on other modules.
once again thank you so much :)
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10729679
Have you got two logins?
0

Featured Post

Free Tool: Site Down Detector

Helpful to verify reports of your own downtime, or to double check a downed website you are trying to access.

One of a set of tools we are providing to everyone as a way of saying thank you for being a part of the community.

Question has a verified solution.

If you are experiencing a similar issue, please ask a related question

Suggested Solutions

Title # Comments Views Activity
Help on project with Soap 10 70
creating threads in delphi 1 202
Microsoft Access 97 and Delphi XE2 9 85
Typecasting TBytes to Integer in Delphi XE8 2 68
Have you ever had your Delphi form/application just hanging while waiting for data to load? This is the article to read if you want to learn some things about adding threads for data loading in the background. First, I'll setup a general applica…
Introduction I have seen many questions in this Delphi topic area where queries in threads are needed or suggested. I know bumped into a similar need. This article will address some of the concepts when dealing with a multithreaded delphi database…
A short tutorial showing how to set up an email signature in Outlook on the Web (previously known as OWA). For free email signatures designs, visit https://www.mail-signatures.com/articles/signature-templates/?sts=6651 If you want to manage em…
Finds all prime numbers in a range requested and places them in a public primes() array. I've demostrated a template size of 30 (2 * 3 * 5) but larger templates can be built such 210  (2 * 3 * 5 * 7) or 2310  (2 * 3 * 5 * 7 * 11). The larger templa…

751 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question