Link to home
Start Free TrialLog in
Avatar of Bianca Pat
Bianca Pat

asked on

How to find string in binary file ?

Hi,
I want to find, extract, and replace string on a file.

procedure TForm5.Button4Click(Sender: TObject);
var
  Stream: TFileStream;
  MS: TMemoryStream;
  Buffer: array [0 .. 1023] of AnsiChar;
  i: Integer;
  myKeyword : string;
begin
  Stream := TFileStream.Create(edtTarget.Text, fmOpenRead);
  try
    while Stream.Position < Stream.Size do
    begin
      Stream.Read(Buffer, 1024);
      m1.Lines.Add(Buffer); // no need, just display to evaluate
      (* 1. Get address of given keyword *)
      // i := Stream.PositionOf(myKeyword);   < how to do this?
      (* 2. Stream Replace *)
      // I want to replace MyKeyword with other string
    end;

    MS := TMemoryStream.Create;
    try
      MS.LoadFromStream(Stream);
      MS.SaveToFile(edtTarget.Text + '.mod');
      // save the edited file, is there any other way than using MS?
    finally
      MS.Free;
    end;

  finally
    Stream.Free;
  end;

end;

Open in new window


  1. I want to have the position of  "mykeyword" on my loaded file.
  2. I want to replace / delete "mykeyword" on my loaded file.
  3. I want to extract "mykeyword" + 1000 or some length.

my loaded file is up to 500 MB.
Avatar of Ferruccio Accalai
Ferruccio Accalai
Flag of Italy image

I've used a StackOverflow solution found here in the past. It should be helpful for you too
Avatar of Bianca Pat
Bianca Pat

ASKER

awesome link, thank you!

Do you have any idea on how to get the pos ? I need to have the stream position as well.
First of all: Provide more context and line out your problem. Do you need to use streams? Cause this imposes some limits. Especially as you can do only a simple forward search on the stream. Otherwise, when using buffers over streams you can use Pos(), but you keep Morris-Pratt in mind (sliding window at the end of the buffer for finding matches which overlap). And then, why not using TStringStream? And why do you limit your solution to AnsiChar? Why not addressing Unicode also?

Then: write code top-down and respect the SOLID principles. The weak interpretation of the single responsibility principle requires that you separate UI from logic. This also increases readability, maintainability and testability of code.
Using Ferruccios hint/example - I build new one (with all respect to original autor) ... I add some missing stuff - like Find or Get string.
I go this way because of class-look-like principle... and works ok (fast enough)....

unit uFindReplace;

interface

uses
  System.Classes,
  System.SysUtils;

type
  TFileSearchReplace = class(TObject)
  private
    FSourceFile: TFileStream;
    FtmpFile: TFileStream;
    FEncoding: TEncoding;
  public
    constructor Create(const AFileName: string);
    destructor Destroy; override;

    procedure Replace(const AFrom, ATo: string; ReplaceFlags: TReplaceFlags);
    function FindFirst(const AText: string): Int64;
    function GetTextFromPos(const AFromPos: Int64; const ALength: Int64): String;
  end;

implementation

uses
  System.IOUtils,
  System.StrUtils;

function Max(const A, B: Integer): Integer;
begin
  if A > B then
    Result := A
  else
    Result := B;
end;

{ TFileSearchReplace }

constructor TFileSearchReplace.Create(const AFileName: string);
begin
  inherited Create;

  FSourceFile := TFileStream.Create(AFileName, fmOpenReadWrite);
  FtmpFile := TFileStream.Create(ChangeFileExt(AFileName, '.tmp'), fmCreate);
end;

destructor TFileSearchReplace.Destroy;
var
  tmpFileName: string;
begin
  if Assigned(FtmpFile) then
    tmpFileName := FtmpFile.FileName;

  FreeAndNil(FtmpFile);
  FreeAndNil(FSourceFile);

  TFile.Delete(tmpFileName);

  inherited;
end;

function TFileSearchReplace.FindFirst(const AText: string): Int64;

  procedure GetEncoding;
  var
    PreambleSize: Integer;
    PreambleBuf: TBytes;
  begin
    // get Encoding preamble
    SetLength(PreambleBuf, 100);
    FSourceFile.Read(PreambleBuf, Length(PreambleBuf));
    FSourceFile.Seek(0, soBeginning);

    PreambleSize := TEncoding.GetBufferEncoding(PreambleBuf, FEncoding);
  end;

  function FindInBuffer(Buf: TBytes): Integer;
  var
    i, iPos: Integer;
    ReadedBufLen: Integer;
    BufStr: string;
  begin
    Result := -1;
    // 1. Get chars from buffer
    ReadedBufLen := 0;
    for i := Length(Buf) downto 0 do
      if FEncoding.GetCharCount(Buf, 0, i) <> 0 then
        begin
          ReadedBufLen := i;
          Break;
        end;
    if ReadedBufLen = 0 then
      raise EEncodingError.Create('Cant convert bytes to str');

    i := ReadedBufLen - Length(Buf);
    FSourceFile.Seek(i, soCurrent);

    BufStr := FEncoding.GetString(Buf, 0, ReadedBufLen);

    iPos := Pos(AText, BufStr);
    if iPos > 0 then
    begin
      Result := i + iPos - 1;   //pos goes from 1 and we want from 0
    end;
  end;

var
  Buf: TBytes;
  BufLen: Integer;
  SourceSize, FoundIdx, CurrentPos: Int64;
begin
  Result := -1;

  FSourceFile.Seek(0, soBeginning);

  GetEncoding;

  SourceSize := FSourceFile.Size;
  BufLen := Max(FEncoding.GetByteCount(AText) * 5, 2048);
  SetLength(Buf, BufLen);

  while FSourceFile.Position < SourceSize do
    begin
      CurrentPos := FSourceFile.Position; //get before ....
      BufLen := FSourceFile.Read(Buf, Length(Buf));
      SetLength(Buf, BufLen);
      FoundIdx := FindInBuffer(Buf);
      if FoundIdx >= 0 then
      begin
        Result := CurrentPos + FoundIdx;
        Break;
      end;
    end;
end;

function TFileSearchReplace.GetTextFromPos(const AFromPos: Int64;
  const ALength: Int64): String;

  procedure GetEncoding;
  var
    PreambleSize: Integer;
    PreambleBuf: TBytes;
  begin
    // get Encoding preamble
    SetLength(PreambleBuf, 100);
    FSourceFile.Read(PreambleBuf, Length(PreambleBuf));
    FSourceFile.Seek(0, soBeginning);

    PreambleSize := TEncoding.GetBufferEncoding(PreambleBuf, FEncoding);
  end;

var
  Buf: TBytes;
  BufLen: Integer;
  SourceSize, FoundIdx, CurrentPos: Int64;
begin
  Result := '';
  FSourceFile.Seek(0, soBeginning);

  GetEncoding;

  SourceSize := FSourceFile.Size;
  SetLength(Buf, ALength);

  FSourceFile.Seek(AFromPos, soBeginning);
  BufLen := FSourceFile.Read(Buf, Length(Buf));
  SetLength(Buf, BufLen);
  Result := FEncoding.GetString(Buf, 0, BufLen);
end;

procedure TFileSearchReplace.Replace(const AFrom, ATo: string; ReplaceFlags: TReplaceFlags);

  procedure CopyPreamble;
  var
    PreambleSize: Integer;
    PreambleBuf: TBytes;
  begin
    // Copy Encoding preamble
    SetLength(PreambleBuf, 100);
    FSourceFile.Read(PreambleBuf, Length(PreambleBuf));
    FSourceFile.Seek(0, soBeginning);

    PreambleSize := TEncoding.GetBufferEncoding(PreambleBuf, FEncoding);
    if PreambleSize <> 0 then
      FtmpFile.CopyFrom(FSourceFile, PreambleSize);
  end;

  function GetLastIndex(const Str, SubStr: string): Integer;
  var
    i: Integer;
    tmpSubStr, tmpStr: string;
  begin
    if not(rfIgnoreCase in ReplaceFlags) then
      begin
        i := Pos(SubStr, Str);
        Result := i;
        while i > 0 do
          begin
            i := PosEx(SubStr, Str, i + 1);
            if i > 0 then
              Result := i;
          end;
        if Result > 0 then
          Inc(Result, Length(SubStr) - 1);
      end
    else
      begin
        tmpStr := UpperCase(Str);
        tmpSubStr := UpperCase(SubStr);
        i := Pos(tmpSubStr, tmpStr);
        Result := i;
        while i > 0 do
          begin
            i := PosEx(tmpSubStr, tmpStr, i + 1);
            if i > 0 then
              Result := i;
          end;
        if Result > 0 then
          Inc(Result, Length(tmpSubStr) - 1);
      end;
  end;

  procedure ParseBuffer(Buf: TBytes; var IsReplaced: Boolean);
  var
    i: Integer;
    ReadedBufLen: Integer;
    BufStr: string;
    DestBytes: TBytes;
    LastIndex: Integer;
  begin
    if IsReplaced and (not(rfReplaceAll in ReplaceFlags)) then
      begin
        FtmpFile.Write(Buf, Length(Buf));
        Exit;
      end;

    // 1. Get chars from buffer
    ReadedBufLen := 0;
    for i := Length(Buf) downto 0 do
      if FEncoding.GetCharCount(Buf, 0, i) <> 0 then
        begin
          ReadedBufLen := i;
          Break;
        end;
    if ReadedBufLen = 0 then
      raise EEncodingError.Create('Cant convert bytes to str');

    FSourceFile.Seek(ReadedBufLen - Length(Buf), soCurrent);

    BufStr := FEncoding.GetString(Buf, 0, ReadedBufLen);
    if rfIgnoreCase in ReplaceFlags then
      IsReplaced := ContainsText(BufStr, AFrom)
    else
      IsReplaced := ContainsStr(BufStr, AFrom);

    if IsReplaced then
      begin
        LastIndex := GetLastIndex(BufStr, AFrom);
        LastIndex := Max(LastIndex, Length(BufStr) - Length(AFrom) + 1);
      end
    else
      LastIndex := Length(BufStr);

    SetLength(BufStr, LastIndex);
    FSourceFile.Seek(FEncoding.GetByteCount(BufStr) - ReadedBufLen, soCurrent);

    BufStr := StringReplace(BufStr, AFrom, ATo, ReplaceFlags);
    DestBytes := FEncoding.GetBytes(BufStr);
    FtmpFile.Write(DestBytes, Length(DestBytes));
  end;

var
  Buf: TBytes;
  BufLen: Integer;
  bReplaced: Boolean;
  SourceSize: int64;
begin
  FSourceFile.Seek(0, soBeginning);
  FtmpFile.Size := 0;
  CopyPreamble;

  SourceSize := FSourceFile.Size;
  BufLen := Max(FEncoding.GetByteCount(AFrom) * 5, 2048);
  BufLen := Max(FEncoding.GetByteCount(ATo) * 5, BufLen);
  SetLength(Buf, BufLen);

  bReplaced := False;
  while FSourceFile.Position < SourceSize do
    begin
      BufLen := FSourceFile.Read(Buf, Length(Buf));
      SetLength(Buf, BufLen);
      ParseBuffer(Buf, bReplaced);
    end;

  FSourceFile.Size := 0;
  FSourceFile.CopyFrom(FtmpFile, 0);
end;

end.

Open in new window


usage:
var
  fsr: TFileSearchReplace;
  i: Int64;
  s: String;
begin
  fsr:=TFileSearchReplace.Create('H:\Logovi\brinje istok\brinje.sql');
  try
     fsr.Replace('my text','new text', [rfReplaceAll]);
     i := fsr.FindFirst('new text');
     s := Replacer.GetTextFromPos(i, 34);
     ShowMessage(s);
  finally
     fsr.Free;
  end;

Open in new window

This question needs an answer!
Become an EE member today
7 DAY FREE TRIAL
Members can start a 7-Day Free trial then enjoy unlimited access to the platform.
View membership options
or
Learn why we charge membership fees
We get it - no one likes a content blocker. Take one extra minute and find out why we block content.