zattz
asked on
Keyword Density
Hi
I'm looking for some code for calculating the keyword density of a text file.
Thanks
I'm looking for some code for calculating the keyword density of a text file.
Thanks
can you explain this a little more? I'm unfamiliar with the term
Do you mean how many times ALL words appear in a file, or how many times a word you supply appears in a file?
ASKER
The measure of how many times a word is repeated compared to the overall content. For example, if a word was listed 5 times out of a word count of 100, the keyword density for that word is 5%.
I want the keyword density for all words in a text file, sorted by highest density descending.
Thanks
I want the keyword density for all words in a text file, sorted by highest density descending.
Thanks
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
And here's another one ;)
2 memos and a button on a form, paste some text into Memo1 and press the button
The function Explode() is taken from here:
http://www.swissdelphicenter.ch/torry/showcode.php?id=1326
unit Unit1;
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls;
type
TExplodeArray = array of String;
type
TWordRec=Record
S:String;
Count:Integer;
end;
type
TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
Memo2: TMemo;
procedure Button1Click(Sender: TObject);
private
WArray: array of TWordRec;
procedure WordOccurence(Text: string);
procedure SortWA;
public
end;
var
Form1: TForm1;
function Explode(const cSeparator, vString: String): TExplodeArray;
implementation
{$R *.dfm}
function Explode(const cSeparator, vString: String): TExplodeArray;
var
i: Integer;
S: String;
begin
S := vString;
SetLength(Result, 0);
i := 0;
while Pos(cSeparator, S) > 0 do begin
SetLength(Result, Length(Result) +1);
Result[i] := Copy(S, 1, Pos(cSeparator, S) -1);
Inc(i);
S := Copy(S, Pos(cSeparator, S) + Length(cSeparator), Length(S));
end;
SetLength(Result, Length(Result) +1);
Result[i] := Copy(S, 1, Length(S));
end;
procedure TForm1.WordOccurence(Text: string);
var
i,j:Integer;
TextWordList: TExplodeArray;
AlreadyThere:Boolean;
tempS:String;
begin
TextWordList := Explode(' ', Text);
for i := 0 to Length(TextWordList)-1 do begin
AlreadyThere:=False;
tempS:=Trim(TextWordList[i ]);
//tempS:=UpperCase(tempS); //only if no case sensitivity required
if not (tempS[Length(tempS)] in ['a'..'z','A'..'Z']) then
Delete(tempS,Length(tempS) ,1);
if length(tempS)>1 then
if not (tempS[1] in ['a'..'z','A'..'Z']) then
Delete(tempS,1,1);
if Length(WArray)>0 then begin
for j := 0 to Length(WArray)-1 do begin
if WArray[j].S = tempS then begin
inc(WArray[j].Count);
AlreadyThere:=True;
end;
end;
end;
if not AlreadyThere then begin
SetLength(WArray,Length(WA rray)+1);
WArray[High(WArray)].S:=te mpS;
WArray[High(WArray)].Count :=1;
end;
end;
end;
procedure TForm1.Button1Click(Sender : TObject);
var
i:Integer;
begin
Memo2.Clear;
WordOccurence(Memo1.Text);
SortWA;
for i:=0 to Length(Warray)-1 do begin
Memo2.Lines.Add(Warray[i]. S+' = '+IntToStr(Warray[i].Count ));
end;
SetLength(WArray,0);
end;
procedure TForm1.SortWA;
var
ok: boolean;
i, n: integer;
temp: TWordRec;
begin
n := Length(WArray);
repeat
ok := true;
for i := 0 to n - 2 do
if WArray[i].Count < WArray[i+1].Count then begin
temp := WArray[i];
WArray[i] := WArray[i+1];
WArray[i+1] := temp;
ok := false;
end;
until ok;
end;
end.
2 memos and a button on a form, paste some text into Memo1 and press the button
The function Explode() is taken from here:
http://www.swissdelphicenter.ch/torry/showcode.php?id=1326
unit Unit1;
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls;
type
TExplodeArray = array of String;
type
TWordRec=Record
S:String;
Count:Integer;
end;
type
TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
Memo2: TMemo;
procedure Button1Click(Sender: TObject);
private
WArray: array of TWordRec;
procedure WordOccurence(Text: string);
procedure SortWA;
public
end;
var
Form1: TForm1;
function Explode(const cSeparator, vString: String): TExplodeArray;
implementation
{$R *.dfm}
function Explode(const cSeparator, vString: String): TExplodeArray;
var
i: Integer;
S: String;
begin
S := vString;
SetLength(Result, 0);
i := 0;
while Pos(cSeparator, S) > 0 do begin
SetLength(Result, Length(Result) +1);
Result[i] := Copy(S, 1, Pos(cSeparator, S) -1);
Inc(i);
S := Copy(S, Pos(cSeparator, S) + Length(cSeparator), Length(S));
end;
SetLength(Result, Length(Result) +1);
Result[i] := Copy(S, 1, Length(S));
end;
procedure TForm1.WordOccurence(Text:
var
i,j:Integer;
TextWordList: TExplodeArray;
AlreadyThere:Boolean;
tempS:String;
begin
TextWordList := Explode(' ', Text);
for i := 0 to Length(TextWordList)-1 do begin
AlreadyThere:=False;
tempS:=Trim(TextWordList[i
//tempS:=UpperCase(tempS);
if not (tempS[Length(tempS)] in ['a'..'z','A'..'Z']) then
Delete(tempS,Length(tempS)
if length(tempS)>1 then
if not (tempS[1] in ['a'..'z','A'..'Z']) then
Delete(tempS,1,1);
if Length(WArray)>0 then begin
for j := 0 to Length(WArray)-1 do begin
if WArray[j].S = tempS then begin
inc(WArray[j].Count);
AlreadyThere:=True;
end;
end;
end;
if not AlreadyThere then begin
SetLength(WArray,Length(WA
WArray[High(WArray)].S:=te
WArray[High(WArray)].Count
end;
end;
end;
procedure TForm1.Button1Click(Sender
var
i:Integer;
begin
Memo2.Clear;
WordOccurence(Memo1.Text);
SortWA;
for i:=0 to Length(Warray)-1 do begin
Memo2.Lines.Add(Warray[i].
end;
SetLength(WArray,0);
end;
procedure TForm1.SortWA;
var
ok: boolean;
i, n: integer;
temp: TWordRec;
begin
n := Length(WArray);
repeat
ok := true;
for i := 0 to n - 2 do
if WArray[i].Count < WArray[i+1].Count then begin
temp := WArray[i];
WArray[i] := WArray[i+1];
WArray[i+1] := temp;
ok := false;
end;
until ok;
end;
end.
BTW, as you probably don't care about case sensitivity, it would be probably better to re-activate this line in the WordOccurence procedure (delete the first "//")
//tempS:=UpperCase(tempS); //only if no case sensitivity required
and replace this
Memo2.Lines.Add(Warray[i]. S+' = '+IntToStr(Warray[i].Count ));
with this (in the Button1's OnClick)
Memo2.Lines.Add(LowerCase( Warray[i]. S)+' = '+IntToStr(Warray[i].Count ));
//tempS:=UpperCase(tempS);
and replace this
Memo2.Lines.Add(Warray[i].
with this (in the Button1's OnClick)
Memo2.Lines.Add(LowerCase(
ASKER
Thanks Pierre, thats an awesome example.
TName, yours didnt work
TName, yours didnt work
Glad to help
>TName, yours didnt work
Would you mind specifying?
Just curious, as I've re-copied the code as it is into delphi and ran it without problems.
Would you mind specifying?
Just curious, as I've re-copied the code as it is into delphi and ran it without problems.
ASKER
I can't remember. Let me copy it back into delphi and I'll let you know just now
ASKER
Input:
dog
dog
cat cat
cow
moo
horse
output:
dog
dog
cat = 1
cat
cow
moo
horse = 1
-------------------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----
What I was looking for was
dog 2, 28%
cat 2, 28%
cow, 1, 14%
moo, 1, 14%
horse, 1, 14%
:)
dog
dog
cat cat
cow
moo
horse
output:
dog
dog
cat = 1
cat
cow
moo
horse = 1
--------------------------
What I was looking for was
dog 2, 28%
cat 2, 28%
cow, 1, 14%
moo, 1, 14%
horse, 1, 14%
:)
You're right, I somehow forgot about the %...