Solved

Could someone convert this c source to delphi?

Posted on 2004-03-23
17
431 Views
Last Modified: 2010-04-05
I'm having a problem converting codes from c to delphi. Could someone convert the codes below?


#define KEY_LEN          (1024)
//#ifdef UNIX
//#define LINE_LEN     (1024*25)
//#endif
//#ifdef MAC
//#define LINE_LEN     (1024*8)
//#endif
//#ifdef PC
#define LINE_LEN     (1024*25)
//#endif

static char line[LINE_LEN];
long last_bin_search_offset = 0;

char *bin_search(char *searchkey, FILE *fp)
{
    int c;
    long top, mid, bot, diff;
    char *linep, key[KEY_LEN];
    int length;

    diff=666;
    linep = line;
    line[0] = '\0';

    fseek(fp, 0L, 2);
    top = 0;
    bot = ftell(fp);
    mid = (bot - top) / 2;

    do {
     fseek(fp, mid - 1, 0);
     if(mid != 1)
         while((c = getc(fp)) != '\n' && c != EOF);
        last_bin_search_offset = ftell( fp );
     fgets(linep, LINE_LEN, fp);
     length = (int)(strchr(linep, ' ') - linep);
     strncpy(key, linep, length);
     key[length] = '\0';
     if(strcmp(key, searchkey) < 0) {
         top = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
     if(strcmp(key, searchkey) > 0) {
         bot = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
    } while((strcmp(key, searchkey)) && (diff != 0));
   
    if(!strcmp(key, searchkey))
     return(line);
    else
     return(NULL);
}

I'm going to use the above code in searching a large file (4MB+) without loading it to memory. I tried using TFileStream and found out that it wouldn't load the entire file into memory but the problem is my existing binarysearch uses TStringList to do searching. So anybody has an idea on how to search a string in a large file without compromising the speed?

0
Comment
Question by:edeaux
  • 9
  • 4
17 Comments
 

Author Comment

by:edeaux
ID: 10664814
This is the complete code. All i need is a binary search function. it would be better if the entire source code is converted with only the points i've gave :)
/*

  binsearch.c - general binary search functions

*/

#include <stdio.h>
#include <string.h>

/* Binary search - looks for the key passed at the start of a line
   in the file associated with open file descriptor fp, and returns
   a buffer containing the line in the file. */

#define KEY_LEN          (1024)
//#ifdef UNIX
//#define LINE_LEN     (1024*25)
//#endif
//#ifdef MAC
//#define LINE_LEN     (1024*8)
//#endif
//#ifdef PC
#define LINE_LEN     (1024*25)
//#endif

static char line[LINE_LEN];
long last_bin_search_offset = 0;

/* General purpose binary search function to search for key as first
   item on line in open file.  Item is delimited by space. */

#undef getc

void main()
{}

char *read_index(long offset, FILE *fp) {
    char *linep;

    linep = line;
    line[0] = '0';

    fseek( fp, offset, SEEK_SET );
    fgets(linep, LINE_LEN, fp);
    return(line);
}
     
char *bin_search(char *searchkey, FILE *fp)
{
    int c;
    long top, mid, bot, diff;
    char *linep, key[KEY_LEN];
    int length;

    diff=666;
    linep = line;
    line[0] = '\0';

    fseek(fp, 0L, 2);
    top = 0;
    bot = ftell(fp);
    mid = (bot - top) / 2;

    do {
     fseek(fp, mid - 1, 0);
     if(mid != 1)
         while((c = getc(fp)) != '\n' && c != EOF);
        last_bin_search_offset = ftell( fp );
     fgets(linep, LINE_LEN, fp);
     length = (int)(strchr(linep, ' ') - linep);
     strncpy(key, linep, length);
     key[length] = '\0';
     if(strcmp(key, searchkey) < 0) {
         top = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
     if(strcmp(key, searchkey) > 0) {
         bot = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
    } while((strcmp(key, searchkey)) && (diff != 0));
   
    if(!strcmp(key, searchkey))
     return(line);
    else
     return(NULL);
}

static long offset;

static int bin_search_key(char *searchkey, FILE *fp)
{
    int c;
    long top, mid, bot, diff;
    char *linep, key[KEY_LEN];
    int length, offset1, offset2;

    /* do binary search to find correct place in file to insert line */

    diff=666;
    linep = line;
    line[0] = '\0';

    fseek(fp, 0L, 2);
    top = 0;
    bot = ftell(fp);
    if (bot == 0) {
     offset = 0;
     return(0);          /* empty file */
    }
    mid = (bot - top) / 2;

    /* If only one line in file, don't work through loop */

    length = 0;
    rewind(fp);
    while((c = getc(fp)) != '\n' && c != EOF)
     line[length++] =  c;
    if (getc(fp) == EOF) {     /* only 1 line in file */
     length = (int)(strchr(linep, ' ') - linep);
     strncpy(key, linep, length);
     key[length] = '\0';
     if(strcmp(key, searchkey) > 0) {
         offset = 0;
         return(0);          /* line with key is not found */
     } else if (strcmp(key, searchkey) < 0) {
         offset = ftell(fp);
         return(0);          /* line with key is not found */
     } else {
         offset = 0;
         return(1);          /* line with key is found */
     }
    }

    do {
     fseek(fp, mid - 1, 0);
     if(mid != 1)
         while((c = getc(fp)) != '\n' && c != EOF);
     offset1 = ftell(fp);     /* offset at start of line */
     if (fgets(linep, LINE_LEN, fp) != NULL) {
           offset2 = ftell(fp); /* offset at start of next line */
         length = (int)(strchr(linep, ' ') - linep);
         strncpy(key, linep, length);
         key[length] = '\0';
         if(strcmp(key, searchkey) < 0) {     /* further in file */
          top = mid;
          diff = (bot - top) / 2;
          mid = top + diff;
          offset = offset2;
         }
         if(strcmp(key, searchkey) > 0) {     /* earlier in file */
          bot = mid;
          diff = (bot - top) / 2;
          mid = top + diff;
          offset = offset1;
         }
     } else {
         bot = mid;
         diff = (bot - top) / 2;
         mid = top + diff;
     }
    } while((strcmp(key, searchkey)) && (diff != 0));

    if(!strcmp(key, searchkey)) {
     offset = offset1;     /* get to start of current line */
     return(1);          /* line with key is found */
    } else
     return(0);          /* line with key is not found */
}

/* Copy contents from one file to another. */

void copyfile(FILE *fromfp, FILE *tofp)
{
    int c;

    while ((c = getc(fromfp)) != EOF)
     putc(c, tofp);
}

/* Function to replace a line in a file.  Returns the original line,
   or NULL in case of error. */

char *replace_line(char *new_line, char *searchkey, FILE *fp)
{
    FILE *tfp;               /* temporary file pointer */

    if (!bin_search_key(searchkey, fp))
     return(NULL);          /* line with key not found */

    if ((tfp = tmpfile()) == NULL)
     return(NULL);          /* could not create temp file */
    fseek(fp, offset, 0);
    fgets(line, LINE_LEN, fp);     /* read original */
    copyfile(fp, tfp);
    if (fseek(fp, offset, 0) == -1)
     return(NULL);          /* could not seek to offset */
    fprintf(fp, new_line);     /* write line */
    rewind(tfp);
    copyfile(tfp, fp);

    fclose(tfp);
    fflush(fp);

    return(line);
}

/* Find location to insert line at in file.  If line with this
   key is already in file, return NULL. */

char *insert_line(char *new_line, char *searchkey, FILE *fp)
{
    FILE *tfp;

    if (bin_search_key(searchkey, fp))
     return(NULL);
   
    if ((tfp = tmpfile()) == NULL)
     return(NULL);          /* could not create temp file */
    if (fseek(fp, offset, 0) == -1)
     return(NULL);          /* could not seek to offset */
    copyfile(fp, tfp);
    if (fseek(fp, offset, 0) == -1)
     return(NULL);          /* could not seek to offset */
    fprintf(fp, new_line);     /* write line */
    rewind(tfp);
    copyfile(tfp, fp);

    fclose(tfp);
    fflush(fp);

    return(new_line);
}
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10672192
Here is the part bin_search part. I implemented using TStream so it really doesn't matter if your are working with files or whatever. Also notable I scan until #10 (so windows #13#10 works) (instead of unix CRLF).

Regards Jacco

(still working on the rest)

uses
  Math;

const
  KeyLen  = 1024;
  LineLen = 25 * 1024;

var
  Line: array[0..LineLen-1] of Char;

function BinSearch(aSearchKey: string; aStream: TStream): Boolean;
var
  liTop, liMid, liBot, liDiff: Int64;
  liData: Byte;
  lsKey: string;
begin

  aStream.Seek(0, soEnd);
  liTop := 0;
  liBot := aStream.Position;
  liMid := (liBot - liTop) div 2;

  repeat
    aStream.Seek(liMid - 1, soBeginning);
    if liMid <> 1 then
      while (aStream.Read(liData, 1) = 1) and (liData <> 10) do ;
    aStream.Read(Line, LineLen);
    lsKey := Copy(Line, 1, Pos(' ', Line) - 1);
    case Sign(CompareText(aSearchKey, lsKey)) of
      1: begin
        liTop  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
     -1: begin
        liBot  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
    end;
    Result := SameText(aSearchKey, lsKey);
  until Result or (liDiff = 0);
end;

procedure TForm1.Button1Click(Sender: TObject);
var
  FS: TFileStream;
begin
  FS := TFileStream.Create('File1.txt', fmOpenRead);
  try
    if BinSearch('abc', FS) then
      ShowMessage('abc');
    if BinSearch('zksjfd', FS) then
      ShowMessage('zksjfd');
  finally
    FS.Free;
  end;
end;
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10672695
Here is the rest :-)

It tested it a bit and it seems to work correctly. I changed the implementation to a class. It is not a literal translation of the C source you gave but anyway. I did not implement the " bin_search_key" but the "bin_search" does what it did also. The code might need some safeguards still. Let me know if you think this is useful.

Regards Jacco

uses
  Math;

const
  KeyLen  = 1024;
  LineLen = 25 * 1024;

type
  TDictionary = class
  private
    fStream: TStream;
    fLine: array[0..LineLen-1] of Char;
  public
    constructor Create(aStream: TStream);
    function BinSearch(aSearchKey: string): Boolean;
    function DoLine(aLine, aSearchKey: string; aSkipLine: Boolean = False): Boolean;
    function InsertLine(aLine, aSearchKey: string): Boolean;
    function ReplaceLine(aLine, aSearchKey: string): Boolean;
    procedure WriteString(const aString: string);
    procedure SkipLine;
  end;

{ TDictionary }

constructor TDictionary.Create(aStream: TStream);
begin
  fStream := aStream;
end;

procedure TDictionary.WriteString(const aString: string);
begin
  fStream.Write(Pointer(aString)^, Length(aString));
end;

procedure TDictionary.SkipLine;
var
  lChar: Char;
begin
  while (fStream.Read(lChar, 1) = 1) and (lChar <> #10) do ;
end;

function TDictionary.BinSearch(aSearchKey: string): Boolean;
var
  liTop, liMid, liBot, liDiff, liOffset: Int64;
  lsKey: string;
begin
  liTop := 0;
  liBot := fStream.Size;
  liMid := (liBot - liTop) div 2;
  repeat
    fStream.Seek(liMid - 1, soBeginning);
    if liMid <> 1 then
      SkipLine;
    liOffset := fStream.Position;
    fStream.Read(fLine, LineLen);
    lsKey := Copy(fLine, 1, Pos(' ', fLine) - 1);
    case Sign(CompareText(aSearchKey, lsKey)) of
      1: begin
        liTop  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
     -1: begin
        liBot  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
    end;
    Result := SameText(aSearchKey, lsKey);
  until Result or (liDiff = 0);
  fStream.Position := liOffset;
  if not Result then
    SkipLine;
end;

function TDictionary.DoLine(aLine, aSearchKey: string; aSkipLine: Boolean): Boolean;
var
  TS: TMemoryStream;
  liPos: Int64;
  lChar: Char;
begin
  TS := nil;
  try
    liPos := fStream.Position;
    if aSkipLine then
      SkipLine;
    if fStream.Size - fStream.Position > 0 then
    begin
      TS := TMemoryStream.Create;
      TS.CopyFrom(fStream, fStream.Size - fStream.Position);
    end;
    fStream.Position := liPos;
    WriteString(aSearchKey + #32 + aLine + #13#10);
    if Assigned(TS) then
      fStream.CopyFrom(TS, 0);
  finally
    TS.Free;
  end;
end;

function TDictionary.InsertLine(aLine, aSearchKey: string): Boolean;
begin
  Result := not BinSearch(aSearchKey);
  if Result then
    DoLine(aLine, aSearchKey);
end;

function TDictionary.ReplaceLine(aLine, aSearchKey: string): Boolean;
begin
  Result := BinSearch(aSearchKey);
  if Result then
    DoLine(aLine, aSearchKey, True);
end;

procedure TForm1.Button1Click(Sender: TObject);
var
  FS: TFileStream;
  D: TDictionary;
begin
  FS := TFileStream.Create('File1.txt', fmOpenReadWrite);
  try
    D := TDictionary.Create(FS);
    try
      if D.BinSearch('abc') then
        ShowMessage('abc');
      if D.BinSearch('zksjfd') then
        ShowMessage('zksjfd');
      D.ReplaceLine('jaccoabc', 'zzzmaya');
    finally
      D.Free;
    end;
  finally
    FS.Free;
  end;
end;
0
 

Author Comment

by:edeaux
ID: 10674544
Thanks for the post :)
I tried your code and it worked. I compared it with my existing binary search using tstringlist and the result was my existing search is faster than yours but mine used almost 10MB of memory (data is (+)(-)10MB). Do you have a could that searches faster using TFileStream or any method that wouldn't require loading all data into memory?
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10674683
I did not optimize the code for speed :-) I merely converted it.

One small optimization is the following (comparing the strings only one time):

function TDictionary.BinSearch(aSearchKey: string): Boolean;
var
  liTop, liMid, liBot, liDiff, liOffset: Int64;
  lsKey: string;
begin
  liTop := 0;
  liBot := fStream.Size;
  liMid := (liBot - liTop) div 2;
  repeat
    fStream.Seek(liMid - 1, soBeginning);
    if liMid <> 1 then
      SkipLine;
    liOffset := fStream.Position;
    fStream.Read(fLine, LineLen);
    lsKey := Copy(fLine, 1, Pos(' ', fLine) - 1);
    case Sign(CompareText(aSearchKey, lsKey)) of
      1: begin
        liTop  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
     -1: begin
        liBot  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
      else Result := True;
    end;
  until Result or (liDiff = 0);
  fStream.Position := liOffset;
  if not Result then
    SkipLine;
end;

Other optimisations could include buffering 256KB of data. But since we skip "binary" through the file it won't help much. The SkipLine would benefit from it because it does a per character file access. These optimisations will never render a method faster then the TStringList search.

You could parse the file (in one go) and create a hash with the keys and file positions. Then only the total size of the keys would be in-mem. Let me know if you want that :-)

Regards Jacco
0
 

Author Comment

by:edeaux
ID: 10674738
of course i want that :) could u post it?
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10674797
Here is a different approach. The inserting and replacing is not in place yet.

Is this fast enough?

Regards Jacco

const
  LineLen = 25 * 1024; // too much (should be maximum line length)

type
  TFastDictionary = class
  private
    fStream: TStream;
    fHash: TStringHash;
    fLine: array[0..LineLen-1] of Char;
  public
    constructor Create(const aFileName: string);
    destructor Destroy; override;
    function ValueOf(aKey: string): string;
  end;

implementation

uses
  SysUtils;

{ TFastDictionary }

constructor TFastDictionary.Create(const aFileName: string);
var
  liPos: Integer;
  F: TextFile;
  lsLine: string;
  lsKey: string;
begin
  fHash := TStringHash.Create;
  AssignFile(F, aFileName);
  Reset(F);
  while not EOF(F) do
  begin
    ReadLn(F, lsLine);
    lsKey := Copy(lsLine, 1, Pos(' ', lsLine) - 1);
    fHash.Add(lsKey, liPos + Length(lsKey) + 1);
    Inc(liPos, Length(lsLine) + 2);
  end;
  CloseFile(F);
  fStream := TFileStream.Create(aFileName, fmOpenReadWrite);
end;

destructor TFastDictionary.Destroy;
begin
  fStream.Free;
  inherited Destroy;
end;

function TFastDictionary.ValueOf(aKey: string): string;
var
  liPos: Integer;
begin
  liPos := fHash.ValueOf(aKey);
  fStream.Seek(liPos, soBeginning);
  fStream.Read(fLine, LineLen);
  liPos := Pos(#10, fLine);
  if liPos = 0 then
    Result := fLine
  else
    Result := Copy(fLine, 1, liPos-1);
end;
0
Free Trending Threat Insights Every Day

Enhance your security with threat intelligence from the web. Get trending threat insights on hackers, exploits, and suspicious IP addresses delivered to your inbox with our free Cyber Daily.

 
LVL 10

Expert Comment

by:Jacco
ID: 10674801
Oh yeah IniFiles and Classes need to be in the uses clause.

Testing goes like this:

procedure TForm1.Button2Click(Sender: TObject);
var
  lFD: TFastDictionary;
begin
  lFD := TFastDictionary.Create('File1.txt');
  try
    ShowMessage(lFD.ValueOf('abc'));
    ShowMessage(lFD.ValueOf('zksjfd'));
  finally
    lFD.Free;
  end;
end;

Good luck :)
0
 

Author Comment

by:edeaux
ID: 10683538
good day :)
we tried ur last post and it was great coz the speed is almost the same using tstringlist binary search. We checked the memory and it is also the same allocation with tstringlist bin search (+/-10MB) . I would like to use the Tfilestream coz it saves a lot of memory but slows down the speed of searching a word in our dictionary. could u post a faster tfilestream bin search?
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10684648
I don't understand why it uses the same amount of memory as your method since I only put the keys in the stringlist (or that what you did too?)

I will think about a faster filestream implemetation.

Regards Jacco
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10684661
To optimize the BinSearch I need maximum line/key lengths.
0
 
LVL 10

Accepted Solution

by:
Jacco earned 300 total points
ID: 10684793
Here is a version that only caches the start positions. (It uses best of both worlds). It loads them initially into a TStringList. The then BinSearch uses the FileStream but does not have to Skip to end-of-lines (because it knows where they are :-) ).

Could you test this one?

Regards Jacco

Possible optimizations:
- use a TIntegerList in stead of TStringList to prevent StrToInt/IntToStr conversions (not much)
- use next position in list to exactly load the line

unit Unit2;

interface

uses
  Classes, IniFiles;

const
  LineLen = 1024; // too much (should be maximum line length)

type
  TFastDictionary = class
  private
    fStream: TStream;
    fPositions: TStringList;
    fLine: array[0..LineLen-1] of Char;
  public
    constructor Create(const aFileName: string);
    destructor Destroy; override;
    function BinSearch(aSearchKey: string): Boolean;
  end;

implementation

uses
  Math, SysUtils;

{ TFastDictionary }

constructor TFastDictionary.Create(const aFileName: string);
var
  liPos: Integer;
  F: TextFile;
  lsLine: string;
begin
  fPositions := TStringList.Create;
  liPos := 0;
  AssignFile(F, aFileName);
  Reset(F);
  while not EOF(F) do
  begin
    fPositions.Add(IntToStr(liPos));
    ReadLn(F, lsLine);
    Inc(liPos, Length(lsLine) + 2);
  end;
  CloseFile(F);
  fStream := TFileStream.Create(aFileName, fmOpenReadWrite);
end;

destructor TFastDictionary.Destroy;
begin
  fStream.Free;
  inherited Destroy;
end;

function TFastDictionary.BinSearch(aSearchKey: string): Boolean;
var
  liTop, liMid, liBot, liDiff, liOffset: Int64;
  lsKey: string;
begin
  liTop := 0;
  liBot := fPositions.Count - 1;
  liMid := (liBot - liTop) div 2;
  repeat
    fStream.Seek(StrToInt(fPositions[liMid]), soBeginning);
    liOffset := fStream.Position;
    fStream.Read(fLine, LineLen);
    lsKey := Copy(fLine, 1, Pos(' ', fLine) - 1);
    case Sign(CompareText(aSearchKey, lsKey)) of
      1: begin
        liTop  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
     -1: begin
        liBot  := liMid;
        liDiff := (liBot - liTop) div 2;
        liMid  := liTop + liDiff;
      end;
      else Result := True;
    end;
  until Result or (liDiff = 0);
  fStream.Position := liOffset;
  if not Result then
    fStream.Position := liOffset + Pos(#10, fLine);
end;

end.
0
 

Expert Comment

by:edwinaceron
ID: 10694969
thanks, man :)
i'll try it later coz i'm still working on other modules.
once again thank you so much :)
0
 
LVL 10

Expert Comment

by:Jacco
ID: 10729679
Have you got two logins?
0

Featured Post

Maximize Your Threat Intelligence Reporting

Reporting is one of the most important and least talked about aspects of a world-class threat intelligence program. Here’s how to do it right.

Join & Write a Comment

Suggested Solutions

The uses clause is one of those things that just tends to grow and grow. Most of the time this is in the main form, as it's from this form that all others are called. If you have a big application (including many forms), the uses clause in the in…
In this tutorial I will show you how to use the Windows Speech API in Delphi. I will only cover basic functions such as text to speech and controlling the speed of the speech. SAPI Installation First you need to install the SAPI type library, th…
This demo shows you how to set up the containerized NetScaler CPX with NetScaler Management and Analytics System in a non-routable Mesos/Marathon environment for use with Micro-Services applications.
This video shows how to remove a single email address from the Outlook 2010 Auto Suggestion memory. NOTE: For Outlook 2016 and 2013 perform the exact same steps. Open a new email: Click the New email button in Outlook. Start typing the address: …

747 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question

Need Help in Real-Time?

Connect with top rated Experts

12 Experts available now in Live!

Get 1:1 Help Now