Solved

Efficient way to split a file into segments ?

Posted on 2011-09-18
1
414 Views
Last Modified: 2016-09-29
I retrieve a file size from hte internet, lets say the file content (size) is 17mb.. (the file size will vary in future usage, so the algorithm must be compatible)

what is the most efficient way to split it into segments ? i have a specific amout of threads that each one downloads from AOffset to BOffset, my current algorithm is to divide it by the amount of connections that the user chose...

this is the code that makes the split:
 
var
  nLoop,
  nEndDiv,
  nConnections      : Integer;

  i64FileSize,
  i64Start,
  i64End,
  i64End2           : Int64;  

  nConnections := StrToInt( SpinBox1.Text );
    nEndDiv := nConnections;  

    bStartZero := True;

    for nLoop := 1 to nConnections do
    begin 

     if ( bStartZero = True ) then i64Start := 0;
      bStartZero := False;

      i64End :=
       i64FileSize div (nEndDiv);

      dec( nEndDiv );

      i64End2 := i64End;

      with TFetchDataThread.Create(
      alabel[nLoop], apbar[nLoop], hOpenFile[nLoop], hInetFile[nLoop], i64Start, i64End ) do
      begin
        Priority := tpNormal;
        Start;
      end;

      i64Start := i64End2 + 1;
    end;

Open in new window

 

this is the code that downloads:
 
procedure TFetchDataThread.Execute;
type
  TypeByteArray = array [1..1024] of Byte;
var
  Buffer         : TypeByteArray;
  BytesToRead    : DWORD;
  BytesToWrite   : DWORD;

  BufferLen,
  BytesWritten   : DWORD;
  EndProgress    : Cardinal;
  i: Integer;
begin
  FProgressBar.Min := Extended( FStartOffset + 0.0 );
  FProgressBar.Max := Extended( FEndOffset   + 0.0 );

  InternetSetFilePointer( FInetFile, FStartOffset, nil, FILE_BEGIN, 0 );

  EndProgress := SetFilePointer( FDestFile, FEndOffset, nil, FILE_BEGIN );

  SetFilePointer( FDestFile, FStartOffset, nil, FILE_BEGIN );

  BytesToRead := SizeOf( Buffer );
  BytesToWrite := SizeOf(Buffer);

  try
    repeat

      InternetReadFile(
       FInetFile, @Buffer, BytesToRead, BufferLen );

      LockFile(
       FDestFile, FStartOffset, 0, BytesToRead, 0 );

      if ( FCurrentOffset > EndProgress ) then
      WriteFile(
       FDestFile, Buffer, BytesToWrite, BytesWritten, nil )
      else
      WriteFile(
       FDestFile, Buffer, BytesToWrite, BytesWritten, nil );

      UnlockFile(
       FDestFile, FStartOffset, 0, BytesToRead, 0 );

      FCurrentOffset :=
       SetFilePointer( FDestFile, 0, nil, FILE_CURRENT );

      FProgressBar.Value := FCurrentOffset;

      Synchronize( UpdateGUI );
    until FCurrentOffset >= EndProgress;
  finally
    CloseHandle( FDestFile );
    InternetCloseHandle( FInetFile );
  end;
end;

Open in new window

0
Comment
Question by:rotem156
1 Comment
 
LVL 25

Accepted Solution

by:
epasquier earned 500 total points
ID: 36563416
well, it's pretty obvious that each thread should manage differently the last block. In your current code you read 1024 bytes whatever the position in your file is.

for the same example of 40.528.057 bytes per block, that would mean
39578 loops reading 1024 bytes
and one reading 185

I suppose one quick fix would be :
repeat
//== FIX
      BytesToRead:=EndProgress-FCurrentOffset;  
      if BytesToRead>1024 Then BytesToRead:=1024;
//== END FIX
      InternetReadFile(
       FInetFile, @Buffer, BytesToRead, BufferLen );

      LockFile(
       FDestFile, FStartOffset, 0, BytesToRead, 0 );

//== WHAT IS THAT ALL ABOUT ??
//      if ( FCurrentOffset > EndProgress ) then
//      WriteFile(
//       FDestFile, Buffer, BytesToWrite, BytesWritten, nil )
//      else
//== ???
      WriteFile(
       FDestFile, Buffer, BytesToWrite, BytesWritten, nil );

      UnlockFile(
       FDestFile, FStartOffset, 0, BytesToRead, 0 );

      FCurrentOffset :=
       SetFilePointer( FDestFile, 0, nil, FILE_CURRENT );

      FProgressBar.Value := FCurrentOffset;

      Synchronize( UpdateGUI );
    until FCurrentOffset >= EndProgress;

Open in new window

0

Featured Post

Free Tool: Subnet Calculator

The subnet calculator helps you design networks by taking an IP address and network mask and returning information such as network, broadcast address, and host range.

One of a set of tools we're offering as a way of saying thank you for being a part of the community.

Question has a verified solution.

If you are experiencing a similar issue, please ask a related question

Hello everybody This Article will show you how to validate number with TEdit control, What's the TEdit control? TEdit is a standard Windows edit control on a form, it allows to user to write, read and copy/paste single line of text. Usua…
One of Google's most recent algorithm changes affecting local searches is entitled "The Pigeon Update." This update has dramatically enhanced search inquires for the keyword "Yelp." Google searches with the word "Yelp" included will now yield Yelp a…
This video shows how to use Hyena, from SystemTools Software, to bulk import 100 user accounts from an external text file. View in 1080p for best video quality.
I've attached the XLSM Excel spreadsheet I used in the video and also text files containing the macros used below. https://filedb.experts-exchange.com/incoming/2017/03_w12/1151775/Permutations.txt https://filedb.experts-exchange.com/incoming/201…

856 members asked questions and received personalized solutions in the past 7 days.

Join the community of 500,000 technology professionals and ask your questions.

Join & Ask a Question