Link to home
Start Free TrialLog in
Avatar of edeaux
edeaux

asked on

Could someone convert this c source to delphi?

I'm having a problem converting codes from c to delphi. Could someone convert the codes below?


#define KEY_LEN          (1024)
//#ifdef UNIX
//#define LINE_LEN     (1024*25)
//#endif
//#ifdef MAC
//#define LINE_LEN     (1024*8)
//#endif
//#ifdef PC
#define LINE_LEN     (1024*25)
//#endif

static char line[LINE_LEN];
long last_bin_search_offset = 0;

char *bin_search(char *searchkey, FILE *fp)
{
    int c;
    long top, mid, bot, diff;
    char *linep, key[KEY_LEN];
    int length;

    diff=666;
    linep = line;
    line[0] = '\0';

    fseek(fp, 0L, 2);
    top = 0;
    bot = ftell(fp);
    mid = (bot - top) / 2;

    do {
     fseek(fp, mid - 1, 0);
     if(mid != 1)
         while((c = getc(fp)) != '\n' && c != EOF);
        last_bin_search_offset = ftell( fp );
     fgets(linep, LINE_LEN, fp);
     length = (int)(strchr(linep, ' ') - linep);
     strncpy(key, linep, length);
     key[length] = '\0';
     if(strcmp(key, searchkey) < 0) {
         top = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
     if(strcmp(key, searchkey) > 0) {
         bot = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
    } while((strcmp(key, searchkey)) && (diff != 0));
   
    if(!strcmp(key, searchkey))
     return(line);
    else
     return(NULL);
}

I'm going to use the above code in searching a large file (4MB+) without loading it to memory. I tried using TFileStream and found out that it wouldn't load the entire file into memory but the problem is my existing binarysearch uses TStringList to do searching. So anybody has an idea on how to search a string in a large file without compromising the speed?

Avatar of edeaux
edeaux

ASKER

This is the complete code. All i need is a binary search function. it would be better if the entire source code is converted with only the points i've gave :)
/*

  binsearch.c - general binary search functions

*/

#include <stdio.h>
#include <string.h>

/* Binary search - looks for the key passed at the start of a line
   in the file associated with open file descriptor fp, and returns
   a buffer containing the line in the file. */

#define KEY_LEN          (1024)
//#ifdef UNIX
//#define LINE_LEN     (1024*25)
//#endif
//#ifdef MAC
//#define LINE_LEN     (1024*8)
//#endif
//#ifdef PC
#define LINE_LEN     (1024*25)
//#endif

static char line[LINE_LEN];
long last_bin_search_offset = 0;

/* General purpose binary search function to search for key as first
   item on line in open file.  Item is delimited by space. */

#undef getc

void main()
{}

char *read_index(long offset, FILE *fp) {
    char *linep;

    linep = line;
    line[0] = '0';

    fseek( fp, offset, SEEK_SET );
    fgets(linep, LINE_LEN, fp);
    return(line);
}
     
char *bin_search(char *searchkey, FILE *fp)
{
    int c;
    long top, mid, bot, diff;
    char *linep, key[KEY_LEN];
    int length;

    diff=666;
    linep = line;
    line[0] = '\0';

    fseek(fp, 0L, 2);
    top = 0;
    bot = ftell(fp);
    mid = (bot - top) / 2;

    do {
     fseek(fp, mid - 1, 0);
     if(mid != 1)
         while((c = getc(fp)) != '\n' && c != EOF);
        last_bin_search_offset = ftell( fp );
     fgets(linep, LINE_LEN, fp);
     length = (int)(strchr(linep, ' ') - linep);
     strncpy(key, linep, length);
     key[length] = '\0';
     if(strcmp(key, searchkey) < 0) {
         top = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
     if(strcmp(key, searchkey) > 0) {
         bot = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
    } while((strcmp(key, searchkey)) && (diff != 0));
   
    if(!strcmp(key, searchkey))
     return(line);
    else
     return(NULL);
}

static long offset;

static int bin_search_key(char *searchkey, FILE *fp)
{
    int c;
    long top, mid, bot, diff;
    char *linep, key[KEY_LEN];
    int length, offset1, offset2;

    /* do binary search to find correct place in file to insert line */

    diff=666;
    linep = line;
    line[0] = '\0';

    fseek(fp, 0L, 2);
    top = 0;
    bot = ftell(fp);
    if (bot == 0) {
     offset = 0;
     return(0);          /* empty file */
    }
    mid = (bot - top) / 2;

    /* If only one line in file, don't work through loop */

    length = 0;
    rewind(fp);
    while((c = getc(fp)) != '\n' && c != EOF)
     line[length++] =  c;
    if (getc(fp) == EOF) {     /* only 1 line in file */
     length = (int)(strchr(linep, ' ') - linep);
     strncpy(key, linep, length);
     key[length] = '\0';
     if(strcmp(key, searchkey) > 0) {
         offset = 0;
         return(0);          /* line with key is not found */
     } else if (strcmp(key, searchkey) < 0) {
         offset = ftell(fp);
         return(0);          /* line with key is not found */
     } else {
         offset = 0;
         return(1);          /* line with key is found */
     }
    }

    do {
     fseek(fp, mid - 1, 0);
     if(mid != 1)
         while((c = getc(fp)) != '\n' && c != EOF);
     offset1 = ftell(fp);     /* offset at start of line */
     if (fgets(linep, LINE_LEN, fp) != NULL) {
           offset2 = ftell(fp); /* offset at start of next line */
         length = (int)(strchr(linep, ' ') - linep);
         strncpy(key, linep, length);
         key[length] = '\0';
         if(strcmp(key, searchkey) < 0) {     /* further in file */
          top = mid;
          diff = (bot - top) / 2;
          mid = top + diff;
          offset = offset2;
         }
         if(strcmp(key, searchkey) > 0) {     /* earlier in file */
          bot = mid;
          diff = (bot - top) / 2;
          mid = top + diff;
          offset = offset1;
         }
     } else {
         bot = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
    } while((strcmp(key, searchkey)) && (diff != 0));

    if(!strcmp(key, searchkey)) {
     offset = offset1;     /* get to start of current line */
     return(1);          /* line with key is found */
    } else
     return(0);          /* line with key is not found */
}

/* Copy contents from one file to another. */

void copyfile(FILE *fromfp, FILE *tofp)
{
    int c;

    while ((c = getc(fromfp)) != EOF)
     putc(c, tofp);
}

/* Function to replace a line in a file.  Returns the original line,
   or NULL in case of error. */

char *replace_line(char *new_line, char *searchkey, FILE *fp)
{
    FILE *tfp;               /* temporary file pointer */

    if (!bin_search_key(searchkey, fp))
     return(NULL);          /* line with key not found */

    if ((tfp = tmpfile()) == NULL)
     return(NULL);          /* could not create temp file */
    fseek(fp, offset, 0);
    fgets(line, LINE_LEN, fp);     /* read original */
    copyfile(fp, tfp);
    if (fseek(fp, offset, 0) == -1)
     return(NULL);          /* could not seek to offset */
    fprintf(fp, new_line);     /* write line */
    rewind(tfp);
    copyfile(tfp, fp);

    fclose(tfp);
    fflush(fp);

    return(line);
}

/* Find location to insert line at in file.  If line with this
   key is already in file, return NULL. */

char *insert_line(char *new_line, char *searchkey, FILE *fp)
{
    FILE *tfp;

    if (bin_search_key(searchkey, fp))
     return(NULL);
   
    if ((tfp = tmpfile()) == NULL)
     return(NULL);          /* could not create temp file */
    if (fseek(fp, offset, 0) == -1)
     return(NULL);          /* could not seek to offset */
    copyfile(fp, tfp);
    if (fseek(fp, offset, 0) == -1)
     return(NULL);          /* could not seek to offset */
    fprintf(fp, new_line);     /* write line */
    rewind(tfp);
    copyfile(tfp, fp);

    fclose(tfp);
    fflush(fp);

    return(new_line);
}
Here is the part bin_search part. I implemented using TStream so it really doesn't matter if your are working with files or whatever. Also notable I scan until #10 (so windows #13#10 works) (instead of unix CRLF).

Regards Jacco

(still working on the rest)

uses
  Math;

const
  KeyLen  = 1024;
  LineLen = 25 * 1024;

var
  Line: array[0..LineLen-1] of Char;

function BinSearch(aSearchKey: string; aStream: TStream): Boolean;
var
  liTop, liMid, liBot, liDiff: Int64;
  liData: Byte;
  lsKey: string;
begin

  aStream.Seek(0, soEnd);
  liTop := 0;
  liBot := aStream.Position;
  liMid := (liBot - liTop) div 2;

  repeat
    aStream.Seek(liMid - 1, soBeginning);
    if liMid <> 1 then
      while (aStream.Read(liData, 1) = 1) and (liData <> 10) do ;
    aStream.Read(Line, LineLen);
    lsKey := Copy(Line, 1, Pos(' ', Line) - 1);
    case Sign(CompareText(aSearchKey, lsKey)) of
      1: begin
        liTop  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
     -1: begin
        liBot  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
    end;
    Result := SameText(aSearchKey, lsKey);
  until Result or (liDiff = 0);
end;

procedure TForm1.Button1Click(Sender: TObject);
var
  FS: TFileStream;
begin
  FS := TFileStream.Create('File1.txt', fmOpenRead);
  try
    if BinSearch('abc', FS) then
      ShowMessage('abc');
    if BinSearch('zksjfd', FS) then
      ShowMessage('zksjfd');
  finally
    FS.Free;
  end;
end;
Here is the rest :-)

It tested it a bit and it seems to work correctly. I changed the implementation to a class. It is not a literal translation of the C source you gave but anyway. I did not implement the " bin_search_key" but the "bin_search" does what it did also. The code might need some safeguards still. Let me know if you think this is useful.

Regards Jacco

uses
  Math;

const
  KeyLen  = 1024;
  LineLen = 25 * 1024;

type
  TDictionary = class
  private
    fStream: TStream;
    fLine: array[0..LineLen-1] of Char;
  public
    constructor Create(aStream: TStream);
    function BinSearch(aSearchKey: string): Boolean;
    function DoLine(aLine, aSearchKey: string; aSkipLine: Boolean = False): Boolean;
    function InsertLine(aLine, aSearchKey: string): Boolean;
    function ReplaceLine(aLine, aSearchKey: string): Boolean;
    procedure WriteString(const aString: string);
    procedure SkipLine;
  end;

{ TDictionary }

constructor TDictionary.Create(aStream: TStream);
begin
  fStream := aStream;
end;

procedure TDictionary.WriteString(const aString: string);
begin
  fStream.Write(Pointer(aString)^, Length(aString));
end;

procedure TDictionary.SkipLine;
var
  lChar: Char;
begin
  while (fStream.Read(lChar, 1) = 1) and (lChar <> #10) do ;
end;

function TDictionary.BinSearch(aSearchKey: string): Boolean;
var
  liTop, liMid, liBot, liDiff, liOffset: Int64;
  lsKey: string;
begin
  liTop := 0;
  liBot := fStream.Size;
  liMid := (liBot - liTop) div 2;
  repeat
    fStream.Seek(liMid - 1, soBeginning);
    if liMid <> 1 then
      SkipLine;
    liOffset := fStream.Position;
    fStream.Read(fLine, LineLen);
    lsKey := Copy(fLine, 1, Pos(' ', fLine) - 1);
    case Sign(CompareText(aSearchKey, lsKey)) of
      1: begin
        liTop  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
     -1: begin
        liBot  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
    end;
    Result := SameText(aSearchKey, lsKey);
  until Result or (liDiff = 0);
  fStream.Position := liOffset;
  if not Result then
    SkipLine;
end;

function TDictionary.DoLine(aLine, aSearchKey: string; aSkipLine: Boolean): Boolean;
var
  TS: TMemoryStream;
  liPos: Int64;
  lChar: Char;
begin
  TS := nil;
  try
    liPos := fStream.Position;
    if aSkipLine then
      SkipLine;
    if fStream.Size - fStream.Position > 0 then
    begin
      TS := TMemoryStream.Create;
      TS.CopyFrom(fStream, fStream.Size - fStream.Position);
    end;
    fStream.Position := liPos;
    WriteString(aSearchKey + #32 + aLine + #13#10);
    if Assigned(TS) then
      fStream.CopyFrom(TS, 0);
  finally
    TS.Free;
  end;
end;

function TDictionary.InsertLine(aLine, aSearchKey: string): Boolean;
begin
  Result := not BinSearch(aSearchKey);
  if Result then
    DoLine(aLine, aSearchKey);
end;

function TDictionary.ReplaceLine(aLine, aSearchKey: string): Boolean;
begin
  Result := BinSearch(aSearchKey);
  if Result then
    DoLine(aLine, aSearchKey, True);
end;

procedure TForm1.Button1Click(Sender: TObject);
var
  FS: TFileStream;
  D: TDictionary;
begin
  FS := TFileStream.Create('File1.txt', fmOpenReadWrite);
  try
    D := TDictionary.Create(FS);
    try
      if D.BinSearch('abc') then
        ShowMessage('abc');
      if D.BinSearch('zksjfd') then
        ShowMessage('zksjfd');
      D.ReplaceLine('jaccoabc', 'zzzmaya');
    finally
      D.Free;
    end;
  finally
    FS.Free;
  end;
end;
Avatar of edeaux

ASKER

Thanks for the post :)
I tried your code and it worked. I compared it with my existing binary search using tstringlist and the result was my existing search is faster than yours but mine used almost 10MB of memory (data is (+)(-)10MB). Do you have a could that searches faster using TFileStream or any method that wouldn't require loading all data into memory?
I did not optimize the code for speed :-) I merely converted it.

One small optimization is the following (comparing the strings only one time):

function TDictionary.BinSearch(aSearchKey: string): Boolean;
var
  liTop, liMid, liBot, liDiff, liOffset: Int64;
  lsKey: string;
begin
  liTop := 0;
  liBot := fStream.Size;
  liMid := (liBot - liTop) div 2;
  repeat
    fStream.Seek(liMid - 1, soBeginning);
    if liMid <> 1 then
      SkipLine;
    liOffset := fStream.Position;
    fStream.Read(fLine, LineLen);
    lsKey := Copy(fLine, 1, Pos(' ', fLine) - 1);
    case Sign(CompareText(aSearchKey, lsKey)) of
      1: begin
        liTop  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
     -1: begin
        liBot  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
      else Result := True;
    end;
  until Result or (liDiff = 0);
  fStream.Position := liOffset;
  if not Result then
    SkipLine;
end;

Other optimisations could include buffering 256KB of data. But since we skip "binary" through the file it won't help much. The SkipLine would benefit from it because it does a per character file access. These optimisations will never render a method faster then the TStringList search.

You could parse the file (in one go) and create a hash with the keys and file positions. Then only the total size of the keys would be in-mem. Let me know if you want that :-)

Regards Jacco
Avatar of edeaux

ASKER

of course i want that :) could u post it?
Here is a different approach. The inserting and replacing is not in place yet.

Is this fast enough?

Regards Jacco

const
  LineLen = 25 * 1024; // too much (should be maximum line length)

type
  TFastDictionary = class
  private
    fStream: TStream;
    fHash: TStringHash;
    fLine: array[0..LineLen-1] of Char;
  public
    constructor Create(const aFileName: string);
    destructor Destroy; override;
    function ValueOf(aKey: string): string;
  end;

implementation

uses
  SysUtils;

{ TFastDictionary }

constructor TFastDictionary.Create(const aFileName: string);
var
  liPos: Integer;
  F: TextFile;
  lsLine: string;
  lsKey: string;
begin
  fHash := TStringHash.Create;
  AssignFile(F, aFileName);
  Reset(F);
  while not EOF(F) do
  begin
    ReadLn(F, lsLine);
    lsKey := Copy(lsLine, 1, Pos(' ', lsLine) - 1);
    fHash.Add(lsKey, liPos + Length(lsKey) + 1);
    Inc(liPos, Length(lsLine) + 2);
  end;
  CloseFile(F);
  fStream := TFileStream.Create(aFileName, fmOpenReadWrite);
end;

destructor TFastDictionary.Destroy;
begin
  fStream.Free;
  inherited Destroy;
end;

function TFastDictionary.ValueOf(aKey: string): string;
var
  liPos: Integer;
begin
  liPos := fHash.ValueOf(aKey);
  fStream.Seek(liPos, soBeginning);
  fStream.Read(fLine, LineLen);
  liPos := Pos(#10, fLine);
  if liPos = 0 then
    Result := fLine
  else
    Result := Copy(fLine, 1, liPos-1);
end;
Oh yeah IniFiles and Classes need to be in the uses clause.

Testing goes like this:

procedure TForm1.Button2Click(Sender: TObject);
var
  lFD: TFastDictionary;
begin
  lFD := TFastDictionary.Create('File1.txt');
  try
    ShowMessage(lFD.ValueOf('abc'));
    ShowMessage(lFD.ValueOf('zksjfd'));
  finally
    lFD.Free;
  end;
end;

Good luck :)
Avatar of edeaux

ASKER

good day :)
we tried ur last post and it was great coz the speed is almost the same using tstringlist binary search. We checked the memory and it is also the same allocation with tstringlist bin search (+/-10MB) . I would like to use the Tfilestream coz it saves a lot of memory but slows down the speed of searching a word in our dictionary. could u post a faster tfilestream bin search?
I don't understand why it uses the same amount of memory as your method since I only put the keys in the stringlist (or that what you did too?)

I will think about a faster filestream implemetation.

Regards Jacco
To optimize the BinSearch I need maximum line/key lengths.
ASKER CERTIFIED SOLUTION
Avatar of Jacco
Jacco
Flag of Netherlands image

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
thanks, man :)
i'll try it later coz i'm still working on other modules.
once again thank you so much :)
Have you got two logins?