Bianca Pat
asked on
How to find string in binary file ?
Hi,
I want to find, extract, and replace string on a file.
my loaded file is up to 500 MB.
I want to find, extract, and replace string on a file.
procedure TForm5.Button4Click(Sender: TObject);
var
Stream: TFileStream;
MS: TMemoryStream;
Buffer: array [0 .. 1023] of AnsiChar;
i: Integer;
myKeyword : string;
begin
Stream := TFileStream.Create(edtTarget.Text, fmOpenRead);
try
while Stream.Position < Stream.Size do
begin
Stream.Read(Buffer, 1024);
m1.Lines.Add(Buffer); // no need, just display to evaluate
(* 1. Get address of given keyword *)
// i := Stream.PositionOf(myKeyword); < how to do this?
(* 2. Stream Replace *)
// I want to replace MyKeyword with other string
end;
MS := TMemoryStream.Create;
try
MS.LoadFromStream(Stream);
MS.SaveToFile(edtTarget.Text + '.mod');
// save the edited file, is there any other way than using MS?
finally
MS.Free;
end;
finally
Stream.Free;
end;
end;
- I want to have the position of "mykeyword" on my loaded file.
- I want to replace / delete "mykeyword" on my loaded file.
- I want to extract "mykeyword" + 1000 or some length.
my loaded file is up to 500 MB.
I've used a StackOverflow solution found here in the past. It should be helpful for you too
ASKER
awesome link, thank you!
Do you have any idea on how to get the pos ? I need to have the stream position as well.
Do you have any idea on how to get the pos ? I need to have the stream position as well.
First of all: Provide more context and line out your problem. Do you need to use streams? Cause this imposes some limits. Especially as you can do only a simple forward search on the stream. Otherwise, when using buffers over streams you can use Pos(), but you keep Morris-Pratt in mind (sliding window at the end of the buffer for finding matches which overlap). And then, why not using TStringStream? And why do you limit your solution to AnsiChar? Why not addressing Unicode also?
Then: write code top-down and respect the SOLID principles. The weak interpretation of the single responsibility principle requires that you separate UI from logic. This also increases readability, maintainability and testability of code.
Then: write code top-down and respect the SOLID principles. The weak interpretation of the single responsibility principle requires that you separate UI from logic. This also increases readability, maintainability and testability of code.
Using Ferruccios hint/example - I build new one (with all respect to original autor) ... I add some missing stuff - like Find or Get string.
I go this way because of class-look-like principle... and works ok (fast enough)....
usage:
I go this way because of class-look-like principle... and works ok (fast enough)....
unit uFindReplace;
interface
uses
System.Classes,
System.SysUtils;
type
TFileSearchReplace = class(TObject)
private
FSourceFile: TFileStream;
FtmpFile: TFileStream;
FEncoding: TEncoding;
public
constructor Create(const AFileName: string);
destructor Destroy; override;
procedure Replace(const AFrom, ATo: string; ReplaceFlags: TReplaceFlags);
function FindFirst(const AText: string): Int64;
function GetTextFromPos(const AFromPos: Int64; const ALength: Int64): String;
end;
implementation
uses
System.IOUtils,
System.StrUtils;
function Max(const A, B: Integer): Integer;
begin
if A > B then
Result := A
else
Result := B;
end;
{ TFileSearchReplace }
constructor TFileSearchReplace.Create(const AFileName: string);
begin
inherited Create;
FSourceFile := TFileStream.Create(AFileName, fmOpenReadWrite);
FtmpFile := TFileStream.Create(ChangeFileExt(AFileName, '.tmp'), fmCreate);
end;
destructor TFileSearchReplace.Destroy;
var
tmpFileName: string;
begin
if Assigned(FtmpFile) then
tmpFileName := FtmpFile.FileName;
FreeAndNil(FtmpFile);
FreeAndNil(FSourceFile);
TFile.Delete(tmpFileName);
inherited;
end;
function TFileSearchReplace.FindFirst(const AText: string): Int64;
procedure GetEncoding;
var
PreambleSize: Integer;
PreambleBuf: TBytes;
begin
// get Encoding preamble
SetLength(PreambleBuf, 100);
FSourceFile.Read(PreambleBuf, Length(PreambleBuf));
FSourceFile.Seek(0, soBeginning);
PreambleSize := TEncoding.GetBufferEncoding(PreambleBuf, FEncoding);
end;
function FindInBuffer(Buf: TBytes): Integer;
var
i, iPos: Integer;
ReadedBufLen: Integer;
BufStr: string;
begin
Result := -1;
// 1. Get chars from buffer
ReadedBufLen := 0;
for i := Length(Buf) downto 0 do
if FEncoding.GetCharCount(Buf, 0, i) <> 0 then
begin
ReadedBufLen := i;
Break;
end;
if ReadedBufLen = 0 then
raise EEncodingError.Create('Cant convert bytes to str');
i := ReadedBufLen - Length(Buf);
FSourceFile.Seek(i, soCurrent);
BufStr := FEncoding.GetString(Buf, 0, ReadedBufLen);
iPos := Pos(AText, BufStr);
if iPos > 0 then
begin
Result := i + iPos - 1; //pos goes from 1 and we want from 0
end;
end;
var
Buf: TBytes;
BufLen: Integer;
SourceSize, FoundIdx, CurrentPos: Int64;
begin
Result := -1;
FSourceFile.Seek(0, soBeginning);
GetEncoding;
SourceSize := FSourceFile.Size;
BufLen := Max(FEncoding.GetByteCount(AText) * 5, 2048);
SetLength(Buf, BufLen);
while FSourceFile.Position < SourceSize do
begin
CurrentPos := FSourceFile.Position; //get before ....
BufLen := FSourceFile.Read(Buf, Length(Buf));
SetLength(Buf, BufLen);
FoundIdx := FindInBuffer(Buf);
if FoundIdx >= 0 then
begin
Result := CurrentPos + FoundIdx;
Break;
end;
end;
end;
function TFileSearchReplace.GetTextFromPos(const AFromPos: Int64;
const ALength: Int64): String;
procedure GetEncoding;
var
PreambleSize: Integer;
PreambleBuf: TBytes;
begin
// get Encoding preamble
SetLength(PreambleBuf, 100);
FSourceFile.Read(PreambleBuf, Length(PreambleBuf));
FSourceFile.Seek(0, soBeginning);
PreambleSize := TEncoding.GetBufferEncoding(PreambleBuf, FEncoding);
end;
var
Buf: TBytes;
BufLen: Integer;
SourceSize, FoundIdx, CurrentPos: Int64;
begin
Result := '';
FSourceFile.Seek(0, soBeginning);
GetEncoding;
SourceSize := FSourceFile.Size;
SetLength(Buf, ALength);
FSourceFile.Seek(AFromPos, soBeginning);
BufLen := FSourceFile.Read(Buf, Length(Buf));
SetLength(Buf, BufLen);
Result := FEncoding.GetString(Buf, 0, BufLen);
end;
procedure TFileSearchReplace.Replace(const AFrom, ATo: string; ReplaceFlags: TReplaceFlags);
procedure CopyPreamble;
var
PreambleSize: Integer;
PreambleBuf: TBytes;
begin
// Copy Encoding preamble
SetLength(PreambleBuf, 100);
FSourceFile.Read(PreambleBuf, Length(PreambleBuf));
FSourceFile.Seek(0, soBeginning);
PreambleSize := TEncoding.GetBufferEncoding(PreambleBuf, FEncoding);
if PreambleSize <> 0 then
FtmpFile.CopyFrom(FSourceFile, PreambleSize);
end;
function GetLastIndex(const Str, SubStr: string): Integer;
var
i: Integer;
tmpSubStr, tmpStr: string;
begin
if not(rfIgnoreCase in ReplaceFlags) then
begin
i := Pos(SubStr, Str);
Result := i;
while i > 0 do
begin
i := PosEx(SubStr, Str, i + 1);
if i > 0 then
Result := i;
end;
if Result > 0 then
Inc(Result, Length(SubStr) - 1);
end
else
begin
tmpStr := UpperCase(Str);
tmpSubStr := UpperCase(SubStr);
i := Pos(tmpSubStr, tmpStr);
Result := i;
while i > 0 do
begin
i := PosEx(tmpSubStr, tmpStr, i + 1);
if i > 0 then
Result := i;
end;
if Result > 0 then
Inc(Result, Length(tmpSubStr) - 1);
end;
end;
procedure ParseBuffer(Buf: TBytes; var IsReplaced: Boolean);
var
i: Integer;
ReadedBufLen: Integer;
BufStr: string;
DestBytes: TBytes;
LastIndex: Integer;
begin
if IsReplaced and (not(rfReplaceAll in ReplaceFlags)) then
begin
FtmpFile.Write(Buf, Length(Buf));
Exit;
end;
// 1. Get chars from buffer
ReadedBufLen := 0;
for i := Length(Buf) downto 0 do
if FEncoding.GetCharCount(Buf, 0, i) <> 0 then
begin
ReadedBufLen := i;
Break;
end;
if ReadedBufLen = 0 then
raise EEncodingError.Create('Cant convert bytes to str');
FSourceFile.Seek(ReadedBufLen - Length(Buf), soCurrent);
BufStr := FEncoding.GetString(Buf, 0, ReadedBufLen);
if rfIgnoreCase in ReplaceFlags then
IsReplaced := ContainsText(BufStr, AFrom)
else
IsReplaced := ContainsStr(BufStr, AFrom);
if IsReplaced then
begin
LastIndex := GetLastIndex(BufStr, AFrom);
LastIndex := Max(LastIndex, Length(BufStr) - Length(AFrom) + 1);
end
else
LastIndex := Length(BufStr);
SetLength(BufStr, LastIndex);
FSourceFile.Seek(FEncoding.GetByteCount(BufStr) - ReadedBufLen, soCurrent);
BufStr := StringReplace(BufStr, AFrom, ATo, ReplaceFlags);
DestBytes := FEncoding.GetBytes(BufStr);
FtmpFile.Write(DestBytes, Length(DestBytes));
end;
var
Buf: TBytes;
BufLen: Integer;
bReplaced: Boolean;
SourceSize: int64;
begin
FSourceFile.Seek(0, soBeginning);
FtmpFile.Size := 0;
CopyPreamble;
SourceSize := FSourceFile.Size;
BufLen := Max(FEncoding.GetByteCount(AFrom) * 5, 2048);
BufLen := Max(FEncoding.GetByteCount(ATo) * 5, BufLen);
SetLength(Buf, BufLen);
bReplaced := False;
while FSourceFile.Position < SourceSize do
begin
BufLen := FSourceFile.Read(Buf, Length(Buf));
SetLength(Buf, BufLen);
ParseBuffer(Buf, bReplaced);
end;
FSourceFile.Size := 0;
FSourceFile.CopyFrom(FtmpFile, 0);
end;
end.
usage:
var
fsr: TFileSearchReplace;
i: Int64;
s: String;
begin
fsr:=TFileSearchReplace.Create('H:\Logovi\brinje istok\brinje.sql');
try
fsr.Replace('my text','new text', [rfReplaceAll]);
i := fsr.FindFirst('new text');
s := Replacer.GetTextFromPos(i, 34);
ShowMessage(s);
finally
fsr.Free;
end;
This question needs an answer!
Become an EE member today
7 DAY FREE TRIALMembers can start a 7-Day Free trial then enjoy unlimited access to the platform.
View membership options
or
Learn why we charge membership fees
We get it - no one likes a content blocker. Take one extra minute and find out why we block content.