• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 226
  • Last Modified:

Text Compression

Anyone got a good way to compress text that doesn't add a lot to a programs size? (under 2Kb)

I'm looking for something that can handle just some basic text in the range of A to Z (upper and lowercase) as well as 0 to 9 and perhaps some additional text such as ()$% and # (although they can be optional)

It doesn't need to be really a great compression ratio - around 15% or greater would be good.
0
tayto
Asked:
tayto
1 Solution
 
ZhaawZSoftware DeveloperCommented:
Is this text in some external file?
0
 
wildzeroCommented:
Do you want it so, when they save the text to a file, the text is compressed, and when it's loaded back into the program it's uncompressed?
0
 
ZhaawZSoftware DeveloperCommented:
If so, it shouldn't be a problem to get 25% compression ratio if you need 64 or less different chars (for example - a..z, A..Z, 0..9, space, period). Just use 6 bits instead of 8 for each char.
0
Concerto's Cloud Advisory Services

Want to avoid the missteps to gaining all the benefits of the cloud? Learn more about the different assessment options from our Cloud Advisory team.

 
_Katka_Commented:
Hi tayto,

 I suppose ZhaawZ was talking about making bitwise index
table which will reduce your string 2 bits per character:

a-z: 26 chars
0-9: 10 chars
-----------------
total: 36 chars

normally the ASCII character (0-255) is 8 bits long (00000000b)
but you're going to shrink it to (0-64) so it'll be 6 bits (000000b)

it's compression factor is constant 0.75 (aka 25%) so for the 100
chars long string you'll get 75.

Here is my code:

program CompressString;

{$APPTYPE CONSOLE}

uses
  SysUtils;

type
  TrCODE=record
    ASCII:Char;
    CODE:Byte;
  end;

  TaBIT=array[0..799] of Boolean;
  TaBYTE=array of Byte;

const
      ASCII:set of Char=['A'..'Z'];
      NUM:set of Char=['0'..'9'];
      ASCIItoCODE:array['A'..'Z'] of Byte=(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25);
      NUMtoCODE:array['0'..'9'] of Byte=(26,27,28,29,30,31,32,33,34,35);
      CODEtoASCII:array[0..35] of Char=(
       'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R',
       'S','T','U','V','W','X','Y','Z','0','1','2','3','4','5','6','7','8','9');

var
      A:Byte;
      Result:TaBYTE;
  Original,Restored:AnsiString;

//
// COMPRESSION
//

function Compress(PInput:AnsiString):TaBYTE;
var
  A,SIZE:Smallint;
  BITS:TaBIT;

procedure C8toB6(PCode:Byte;POffset:Smallint;var PField:TaBIT);
var
  A:Byte;
begin
  for A:=5 downto 0 do PField[(5-A)+POffset*6]:=(PCode and (1 shl A))=(1 shl A);
end;

procedure B6toC8(POffset:Byte;PSource:TaBIT;var PDestiny:TaBYTE);
var
  A:Byte;
begin
  for A:=0 to 7 do PDestiny[POffset]:=PDestiny[POffset]+Byte(PSource[POffset*8+A]) shl (7-A);
end;

begin
  SIZE:=(Length(PInput)*6) div 8;
  SetLength(Result,SIZE);
  FillChar(Result[0],SIZE,0);
      FillChar(BITS,800,False);
  for A:=1 to Length(PInput) do
  begin
    if PInput[A] in ASCII then C8toB6(ASCIItoCODE[PInput[A]],A-1,BITS) else
    if PInput[A] in NUM then C8toB6(NUMtoCODE[PInput[A]],A-1,BITS);
  end;
  for A:=0 to SIZE-1 do B6toC8(A,BITS,Result);
end;

//
// DECOMPRESSION
//

function Decompress(PInput:TaBYTE):AnsiString;
var
  A,SIZE:Smallint;
  BITS:TaBIT;
  BYTES:TaBYTE;

procedure C8toB8(PCode:Byte;POffset:Smallint;var PField:TaBIT);
var
  A:Byte;
begin
  for A:=7 downto 0 do PField[(7-A)+POffset*8]:=(PCode and (1 shl A))=(1 shl A);
end;

procedure B8toC6(POffset:Byte;PSource:TaBIT;var PDestiny:TaBYTE);
var
  A:Byte;
begin
  for A:=0 to 5 do PDestiny[POffset]:=PDestiny[POffset]+Byte(PSource[POffset*6+A]) shl (5-A);
end;

begin
  Result:='';
      SIZE:=(Length(PInput)*8) div 6;
  SetLength(BYTES,SIZE);
  FillChar(BYTES[0],SIZE,0);
  FillChar(BITS,800,False);
  for A:=0 to Length(PInput)-1 do C8toB8(PInput[A],A,BITS);
  for A:=0 to SIZE-1 do
  begin
    B8toC6(A,BITS,BYTES);
    Result:=Result+CODEtoASCII[BYTES[A]];
  end;
end;

begin
  Original:='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
  WriteLn('Original (',Length(Original),')     : ',Original);
  Result:=Compress(Original);
  Write('Compressed (',Length(Result),')   : ');
  for A:=0 to Length(Result)-1 do Write(Char(Result[A])); WriteLn;
  Restored:=Decompress(Result);
  WriteLn('Decompressed (',Length(Restored),') : ',Restored);
  ReadLn;
end.
0
 
ZhaawZSoftware DeveloperCommented:
There will be 62 chars, because lower and upper case are needed. I also believe that some space chars also will be used.
0
 
_Katka_Commented:
You're right ZhaawZ, it was just showcase, here is full version without special symbols:

program CompressString;

{$APPTYPE CONSOLE}

uses
  SysUtils;

type
  TaBIT=array of Boolean;
  TaBYTE=array of Byte;

const
  UPPER:set of Char=['A'..'Z'];
  LOWER:set of Char=['a'..'z'];
  NUM:set of Char=['0'..'9'];
  SPACE:set of Char=[#32];
  UPPERtoCODE:array['A'..'Z'] of Byte=(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25);
  LOWERtoCODE:array['a'..'z'] of Byte=(26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51);
  NUMtoCODE:array['0'..'9'] of Byte=(52,53,54,55,56,57,58,59,60,61);
  SPACEtoCODE:Byte=62;
  CODEtoASCII:array[0..62] of Char=(
    'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R',
    'S','T','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j',
    'k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0','1',
    '2','3','4','5','6','7','8','9',#32);

var
  A:Byte;
  Result:TaBYTE;
  Original,Restored:AnsiString;

//
// COMPRESSION
//

function Compress(PInput:AnsiString):TaBYTE;
var
  A,SIZE:Smallint;
  BITS:TaBIT;

procedure C8toB6(PCode:Byte;POffset:Smallint;var PField:TaBIT);
var
  A:Byte;
begin
  for A:=5 downto 0 do PField[(5-A)+POffset*6]:=(PCode and (1 shl A))=(1 shl A);
end;

procedure B6toC8(POffset:Byte;PSource:TaBIT;var PDestiny:TaBYTE);
var
  A:Byte;
begin
  for A:=0 to 7 do PDestiny[POffset]:=PDestiny[POffset]+Byte(PSource[POffset*8+A]) shl (7-A);
end;

begin
  SIZE:=Round((Length(PInput)*6)/8);
  SetLength(Result,SIZE);
  FillChar(Result[0],SIZE,0);
  SetLength(BITS,SIZE*6);
  FillChar(BITS[0],SIZE*6,False);
  for A:=1 to Length(PInput) do
  begin
    if PInput[A] in UPPER then C8toB6(UPPERtoCODE[PInput[A]],A-1,BITS) else
    if PInput[A] in LOWER then C8toB6(LOWERtoCODE[PInput[A]],A-1,BITS) else
    if PInput[A] in NUM then C8toB6(NUMtoCODE[PInput[A]],A-1,BITS) else
    if PInput[A] in SPACE then C8toB6(SPACEtoCODE,A-1,BITS);
  end;
  for A:=0 to SIZE do B6toC8(A,BITS,Result);
end;

//
// DECOMPRESSION
//

function Decompress(PInput:TaBYTE):AnsiString;
var
  A,SIZE:Smallint;
  BITS:TaBIT;
  BYTES:TaBYTE;

procedure C8toB8(PCode:Byte;POffset:Smallint;var PField:TaBIT);
var
  A:Byte;
begin
  for A:=7 downto 0 do PField[(7-A)+POffset*8]:=(PCode and (1 shl A))=(1 shl A);
end;

procedure B8toC6(POffset:Byte;PSource:TaBIT;var PDestiny:TaBYTE);
var
  A:Byte;
begin
  for A:=0 to 5 do PDestiny[POffset]:=PDestiny[POffset]+Byte(PSource[POffset*6+A]) shl (5-A);
end;

begin
  Result:='';
  SIZE:=Round((Length(PInput)*8)/6);
  SetLength(BYTES,SIZE);
  FillChar(BYTES[0],SIZE,0);
  SetLength(BITS,SIZE*8);
  FillChar(BITS[0],SIZE*8,False);
  for A:=0 to Length(PInput) do C8toB8(PInput[A],A,BITS);
  for A:=0 to Length(BYTES)-1 do
  begin
    B8toC6(A,BITS,BYTES);
    Result:=Result+CODEtoASCII[BYTES[A]];
  end;
end;

begin
  Original:='ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz0123456789';
  WriteLn('Original (',Length(Original),')     : ',Original);
  Result:=Compress(Original);
  Write('Compressed (',Length(Result),')   : ');
  for A:=0 to Length(Result)-1 do Write(Char(Result[A])); WriteLn;
  Restored:=Decompress(Result);
  WriteLn('Decompressed (',Length(Restored),') : ',Restored);
  ReadLn;
end.

regards,
Kate
0
 
taytoAuthor Commented:
perfect!!

Thanks a lot.
0
 
_Katka_Commented:
You're welcome :)

regard,
Kate
0

Featured Post

Upgrade your Question Security!

Add Premium security features to your question to ensure its privacy or anonymity. Learn more about your ability to control Question Security today.

Tackle projects and never again get stuck behind a technical roadblock.
Join Now