Want to protect your cyber security and still get fast solutions? Ask a secure question today.Go Premium

x
  • Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 688
  • Last Modified:

Retrive info from HTML

Hi

I desperate need a solution to retrive the value/string from a table on a certin website,
does not need to be fast parsing the html-code.

Example Pages:
http://www.edbpriser.dk/hardware/hardware-top10.asp?ID=1244578020
http://www.edbpriser.dk/hardware/hardware-top10.asp?ID=1364577995

What i need to retrive is the first number below TOTAL  eg 3.122 kr

bet some of you string parsing master have a solution ;)

Thanks
 Michael
0
brainware
Asked:
brainware
1 Solution
 
pjenglundCommented:
Hi,

I recoment you pay a visit to about.delphi.com and search for something called HTML File Scraping. They have a ready built example of how to retrieve certain elements from a HTML-file.

Just try to find distinctive patterns between which you can extract the data you want. Fo example, you can ditch everything from the beginnig to this point:
href="hardware-top10.asp?ID=1244578020&Sort=Total">

First, work in notepad to find the patterns and then try to implement it by the examle from about.com

It's certainly not the best way to do this but it's one way...

Hope this helped you a bit on the way!
0
 
KnightyCommented:
do the htmlsource in Memo1 then you will get it in Edit1

procedure TForm1.Button1Click(Sender: TObject);
var i:integer;
begin
for i:=0 to Memo1.Lines.Count do
begin
if Pos('bedominfo.asp',Memo1.Lines.Strings[i])<>0 then
begin
Edit1.Text := copy(Memo1.Lines.Strings[i+9],Pos('>',Memo1.Lines.Strings[i+9])+1,Pos('</',Memo1.Lines.Strings[i+9])-2-Pos('>',Memo1.Lines.Strings[i+9]));
break;
end;
end;
end;
0
 
mocartsCommented:
I preffer to use DOM parser to receive desired content from html and especially from table as it is structured content.
so.. I wrote some code for you :)

uses IdHTTP, // Indy HTTP client - download from http://www.indyproject.org/
  htmlParser, Core; // HTML parser - download from http://htmlp.sourceforge.net/

// helper func to load document from URL
function GetHTMLDom(const URL: string): THTMLDocument;
var
 fs: TMemoryStream;
 ws: PWideString;
 s: string;
 http: TIdHttp;
begin
 Result := THTMLDocument.Create;
 fs := TMemoryStream.Create;
 http := TIdHttp.Create(nil);
 try
   http.Get(URL, fs);
   if http.Response.ContentEncoding = 'UTF-8' then
   begin
     ws := fs.Memory;
     Result.loadHTML(ws^);
   end else begin
     SetLength(s, fs.Size);
     move(fs.memory^, s[1], fs.Size);
     Result.loadHTML(WideString(s));
   end;
 finally
   fs.Free;
   http.Free;
 end;
end;

// helper func to find children of node determined by path
// if need not 1st children of node, you must provide index to that node
// examp. from structury like: <body><table>..</table><table>..</table>..</body>
// using path 'body,table[2]' - will return second (no zero based index!) table element
function FindChildElement(nd: TNode; const sPath: string): TNode;
var
 s: TStringList;
 i, p, c: integer;
 nn: string;
 n: TNode;
begin
 Result := nil;
 s := TStringList.Create;
 try
   s.CommaText := sPath;
   n := nd;
   for i := 0 to s.Count -1 do begin
     nn := Trim(s[i]);
     p := pos('[', nn);
     if p = 0 then
       n := n.childNodes.findElement(nn)
     else begin
       c := strToInt(Copy(nn, p+1, length(nn)-p-1));
       nn := Copy(nn, 1, p-1);
       n := n.childNodes.findElement(nn);
       while (n <> nil) do begin
         if n.nodeName = nn then
           dec(c);
         if c > 0 then n := n.nextSibling
           else break;
       end;
     end;
     if n = nil then exit;
   end;
 finally
   s.free;
 end;
 Result := n;
end;

procedure TForm1.btnGetTotalsClick(Sender: TObject);
var
 hd: THTMLDocument;
 ndt, ndr: TNode;
 msg: string;
begin
 btnGetTotals.Enabled := False;
 try
   hd := GetHTMLDom('http://www.edbpriser.dk/hardware/hardware-top10.asp?ID=1244578020');
//   hd := GetHTMLDom('http://www.edbpriser.dk/hardware/hardware-top10.asp?ID=1364577995', fs);
   try
     ndr := FindChildElement(hd.documentElement,
         'body,table,tr,td,table,tr[3],td,table,tr[3]');
     msg := 'Total';
     while ndr <> nil do begin
       ndt := FindChildelement(ndr, 'td[9]');
       if not assigned(ndt) then break;
       // TD has their content value as child (CDATA element)
       msg := msg + ', ' + ndt.firstChild.nodeValue;
       ndr := ndr.nextSibling;
       while (ndr <> nil) and not (ndr is TElement) do
         ndr := ndr.nextSibling;
     end;
     ShowMessage(msg);
   finally
     hd.Free;
   end;
 finally
   btnGetTotals.Enabled := True;
 end;
end;

// helper method to determine path to the desired element
// form must have TTreeView named TreeView1
procedure TForm1.btnTreeClick(Sender: TObject);
 procedure FillFromNode(nd: TNode; tnd: TTreeNode);
 var
  nd1: TNode;
  tnd1: TTreeNode;
 begin
   // skip *empty* nodes
   if not nd.hasChildNodes and (trim(nd.nodeValue) = '') then exit;
   if nd is TTextNode then
     tnd1 := TreeView1.Items.AddChild(tnd, nd.nodeValue)
   else
     tnd1 := TreeView1.Items.AddChild(tnd, nd.nodeName);
   if nd.hasChildNodes then begin
     nd1 := nd.firstChild;
     while nd1 <> nil do begin
       FillFromNode(nd1, tnd1);
       nd1 := nd1.nextSibling;
     end;
   end;
 end;
var
  hd: THTMLDocument;
begin
 btnTree.Enabled := False;
 try
   hd := GetHTMLDom('http://www.edbpriser.dk/hardware/hardware-top10.asp?ID=1244578020');
//   hd := GetHTMLDom('http://www.edbpriser.dk/hardware/hardware-top10.asp?ID=1364577995', fs);
   try
     TreeView1.items.beginUpdate;
     try
       TreeView1.Items.Clear;
       FillFromNode(hd.documentElement, nil);
     finally
       TreeView1.Items.EndUpdate;
     end;
   finally
     hd.Free;
   end;
 finally
   btnTree.Enabled := True;
 end;
end;

wbr, mo.
0
 
brainwareAuthor Commented:
Thanks all, Im looking at all of your examples,
0
 
brainwareAuthor Commented:
very nice scanning methods,
can even use this for some other things too, very nice.

Thanks a lot
0

Featured Post

Free Tool: ZipGrep

ZipGrep is a utility that can list and search zip (.war, .ear, .jar, etc) archives for text patterns, without the need to extract the archive's contents.

One of a set of tools we're offering as a way to say thank you for being a part of the community.

Tackle projects and never again get stuck behind a technical roadblock.
Join Now