Jaymol
asked on
View source of internet explorer page with frames
Hi.
I know this is probably a really, really long shot, but does anyone know if it is possible to capture the source of a frame within Internet Explorer. I can specifically name the window by it's caption (this will always be the same for my purposes), and I can also specifically name the frame via it's HTML object name. I "just" want to capture the source within Delphi so that I can parse it and do other stuff with it.
This question is definitely about this specific scenario, and therefore a "workround solution" will not suffice.
Thanks in advance,
John.
I know this is probably a really, really long shot, but does anyone know if it is possible to capture the source of a frame within Internet Explorer. I can specifically name the window by it's caption (this will always be the same for my purposes), and I can also specifically name the frame via it's HTML object name. I "just" want to capture the source within Delphi so that I can parse it and do other stuff with it.
This question is definitely about this specific scenario, and therefore a "workround solution" will not suffice.
Thanks in advance,
John.
Did you come right?
ASKER
Hi Pierre.
Sorry for the delay. I'm literally just about to try that so I'll get back to you shortly.
Thanks for your help so far. It's greatly appreciated.
Thanks,
John.
Sorry for the delay. I'm literally just about to try that so I'll get back to you shortly.
Thanks for your help so far. It's greatly appreciated.
Thanks,
John.
ASKER
Hi Pierre,
That's a great bit of code, but I'm afraid it doesn't fully do what I want.
It goes as far as getting the HTML from the selected instance of IE, but it doesn't get me the code in the specific frame, which is what I need to do.
Can you help me out with that?
Thanks,
John.
That's a great bit of code, but I'm afraid it doesn't fully do what I want.
It goes as far as getting the HTML from the selected instance of IE, but it doesn't get me the code in the specific frame, which is what I need to do.
Can you help me out with that?
Thanks,
John.
The code does get the frame source as well, I tested it.
See the THtmlSrcObj.GetHtmlSource method. It does, however, dump it all in the memo control so I can see why you thought so. What is the URL you are trying to get the source for, let me try it on my side as well...
Regards
Pierre
See the THtmlSrcObj.GetHtmlSource method. It does, however, dump it all in the memo control so I can see why you thought so. What is the URL you are trying to get the source for, let me try it on my side as well...
Regards
Pierre
ASKER
Hi Pierre,
Sorry - you're absolutely right. It's another frame, within a frame, within a frame, that I'm looking for.
I'm afraid I can't give you the URL that I'm working on as it's a password protected area on a web site that I obviously can't give out access to.
The perfect solution to this would be to make the code search frames recursively so that it finds all frames within all frames within all frames etc..
Would it be difficult to modify it to do that?
Thanks,
John.
Sorry - you're absolutely right. It's another frame, within a frame, within a frame, that I'm looking for.
I'm afraid I can't give you the URL that I'm working on as it's a password protected area on a web site that I obviously can't give out access to.
The perfect solution to this would be to make the code search frames recursively so that it finds all frames within all frames within all frames etc..
Would it be difficult to modify it to do that?
Thanks,
John.
Sure. Just change to the following:
function THtmlSrcObj.GetHtmlSource: TStrings;
procedure AddDocSource(ADoc: IHTMLDocument2);
var fi: integer;
FrameIndex: OLEVariant;
FrameObj: IDispatch;
FrameDoc: IHTMLDocument2;
element: IHTMLElement;
frames: IHTMLFramesCollection2;
frameWindow: IHTMLWindow2;
begin
element:= ADoc.all.item(0, 0) as IHTMLElement;
if Assigned(Element)
then FHtmlSource.Add(element.ou terHTML);
frames:= ADoc.frames;
for fi:= 0 to frames.length-1 do
begin
FHtmlSource.Add('');
FHtmlSource.Add('<!--'+Str ingOfChar( '*',5)+' '
+'Frame '+IntToStr(fi+1)
+StringOfChar('*',5)+' -->');
try
FrameIndex:= fi;
FrameObj:= frames.item(FrameIndex);
frameWindow:= frameObj AS IHTMLWindow2;
FrameDoc:= (frameWindow.Document AS IHTMLDocument2);
FHtmlSource.Add('<!--'
+StringOfChar('*',5)+' '
+'Title: '+FrameDoc.title
+' URL: '+FrameDoc.url
+StringOfChar('*',5)+' -->');
AddDocSource(FrameDoc);
//element:= FrameDoc.all.item(0, 0) as IHTMLElement;
//if Assigned(Element)
// then FHtmlSource.Add(element.ou terHTML);
FHtmlSource.Add('');
except on e: exception
do FHtmlSource.Add(e.Message) ;
end;
end;
end;
begin
result:= FHtmlSource;
FHtmlSource.Clear;
try
if WBrowser.Document.QueryInt erface(IID _IHTMLDocu ment2, HtmlDoc) = S_OK then
begin
AddDocSource(htmlDoc);
end;
except on e: exception
do Result.Add(e.Message);
end;
end;
Regards
Pierre
function THtmlSrcObj.GetHtmlSource:
procedure AddDocSource(ADoc: IHTMLDocument2);
var fi: integer;
FrameIndex: OLEVariant;
FrameObj: IDispatch;
FrameDoc: IHTMLDocument2;
element: IHTMLElement;
frames: IHTMLFramesCollection2;
frameWindow: IHTMLWindow2;
begin
element:= ADoc.all.item(0, 0) as IHTMLElement;
if Assigned(Element)
then FHtmlSource.Add(element.ou
frames:= ADoc.frames;
for fi:= 0 to frames.length-1 do
begin
FHtmlSource.Add('');
FHtmlSource.Add('<!--'+Str
+'Frame '+IntToStr(fi+1)
+StringOfChar('*',5)+' -->');
try
FrameIndex:= fi;
FrameObj:= frames.item(FrameIndex);
frameWindow:= frameObj AS IHTMLWindow2;
FrameDoc:= (frameWindow.Document AS IHTMLDocument2);
FHtmlSource.Add('<!--'
+StringOfChar('*',5)+' '
+'Title: '+FrameDoc.title
+' URL: '+FrameDoc.url
+StringOfChar('*',5)+' -->');
AddDocSource(FrameDoc);
//element:= FrameDoc.all.item(0, 0) as IHTMLElement;
//if Assigned(Element)
// then FHtmlSource.Add(element.ou
FHtmlSource.Add('');
except on e: exception
do FHtmlSource.Add(e.Message)
end;
end;
end;
begin
result:= FHtmlSource;
FHtmlSource.Clear;
try
if WBrowser.Document.QueryInt
begin
AddDocSource(htmlDoc);
end;
except on e: exception
do Result.Add(e.Message);
end;
end;
Regards
Pierre
ASKER
Thanks Pierre.
I've tried that and it just crashes out. After continuing execution a couple of times when the CPU Debug window pops up, I get the error message "Not enough storage is available to complete this operation."
Any ideas?
Thanks,
John.
I've tried that and it just crashes out. After continuing execution a couple of times when the CPU Debug window pops up, I get the error message "Not enough storage is available to complete this operation."
Any ideas?
Thanks,
John.
Try stepping through the code and let me know where the error occurs. Let me know if you need help with this...
ASKER
Hi Pierre,
Well I've stepped through the code and found that it falls over pretty much immediately, on the following line of code :
sw := CoShellWindows.Create;
I don't know if it'll make any difference to this particular issue, but I'm running Delphi 5 (unfortunately). I did have to change a couple of small things to make it work, but nothing to do with the actual browser interfacing code.
Thanks,
John.
Well I've stepped through the code and found that it falls over pretty much immediately, on the following line of code :
sw := CoShellWindows.Create;
I don't know if it'll make any difference to this particular issue, but I'm running Delphi 5 (unfortunately). I did have to change a couple of small things to make it work, but nothing to do with the actual browser interfacing code.
Thanks,
John.
Not sure if this may help but try it anyway. Change the get browser button event handler as follows and let me know if the same error happens:
procedure TForm1.btnGetBrowsersClick (Sender: TObject);
var sw: TShellWindows;
browser: IWebBrowser2;
i: integer;
htmlDoc: IHTMLDocument2;
obj: THtmlSrcObj;
begin
lbBrowsers.Clear;
sw := TShellWindows.Create(nil);
try
for i:= 0 to sw.Count-1 do
begin
browser:= sw.Item(i) as IWebBrowser2;
if browser.Document.QueryInte rface(IID_ IHTMLDocum ent2, htmlDoc) = S_OK then
begin
obj:= THtmlSrcObj.Create;
HtmlSourceObjects.Add(obj) ;
obj.WBrowser:= browser;
obj.HtmlDoc:=htmlDoc;
lbBrowsers.AddItem(browser .LocationN ame, obj);
end;
end;
finally
sw.Free;
end;
end;
procedure TForm1.btnGetBrowsersClick
var sw: TShellWindows;
browser: IWebBrowser2;
i: integer;
htmlDoc: IHTMLDocument2;
obj: THtmlSrcObj;
begin
lbBrowsers.Clear;
sw := TShellWindows.Create(nil);
try
for i:= 0 to sw.Count-1 do
begin
browser:= sw.Item(i) as IWebBrowser2;
if browser.Document.QueryInte
begin
obj:= THtmlSrcObj.Create;
HtmlSourceObjects.Add(obj)
obj.WBrowser:= browser;
obj.HtmlDoc:=htmlDoc;
lbBrowsers.AddItem(browser
end;
end;
finally
sw.Free;
end;
end;
ASKER
Hi Pierre,
Yes, I still get the same error. It's quite strange because it worked with the original code and the bit that causes the error hasn't changed!
Thanks,
John.
Yes, I still get the same error. It's quite strange because it worked with the original code and the bit that causes the error hasn't changed!
Thanks,
John.
To be honest with you, using the DOM to retrieve the source for frames is bound to be frought with
problems due to M$'s new security model. If the frame is cross-domain, which means that the frame's
source is not on the same domain as the hosting code, then you won't be able to get it via the DOM.
My suggestion would be to parse the HTML and get each frame's code using idHTTP, parse that frame's
code and get the src of each contained frame in a recursive manner.
I have some code to do this posted on Delphi Pages but it is currently down for maint.
I will see if I can find it for you.
problems due to M$'s new security model. If the frame is cross-domain, which means that the frame's
source is not on the same domain as the hosting code, then you won't be able to get it via the DOM.
My suggestion would be to parse the HTML and get each frame's code using idHTTP, parse that frame's
code and get the src of each contained frame in a recursive manner.
I have some code to do this posted on Delphi Pages but it is currently down for maint.
I will see if I can find it for you.
ASKER
Thanks Pierre.
I await your code!
John.
I await your code!
John.
I believe a solution was provided.
ASKER
Hi Pierre,
None of the above solutions worked concisely. They gave methods for partially solving my problem, but none were complete and therefore not solutions.
Thanks for the help all the same, but I was clear about looking for a 100% concise solution.
John.
None of the above solutions worked concisely. They gave methods for partially solving my problem, but none were complete and therefore not solutions.
Thanks for the help all the same, but I was clear about looking for a 100% concise solution.
John.
I spent a lot of time responding to your question and feel it was anwered. You asked how to capture the source and my solution does that. it recursively lists the source for all frames and frames within frames. I tested it on my side and it worked fine. You also stated in a comment that it did i.e. "...it worked with the original code ...".
I don't understand why you say that my suggestions partially solve your problem and that they were incomplete and can not be regarded as solutions when you asked how to get the source and my solution does just that. Please explain why you say so. Understand my frustration as we are volunteers (i.e. we don't get paid for this service) here at EE who answer questions and sometimes spend a lot of time on providing them.
I'm not interested to get into an argument here, so if you really feel unsatisfied then fine I suggest the moderator refund your points but suggest this still gets saved as a PAQ as I believe there is value in these posts for future reference.
I don't understand why you say that my suggestions partially solve your problem and that they were incomplete and can not be regarded as solutions when you asked how to get the source and my solution does just that. Please explain why you say so. Understand my frustration as we are volunteers (i.e. we don't get paid for this service) here at EE who answer questions and sometimes spend a lot of time on providing them.
I'm not interested to get into an argument here, so if you really feel unsatisfied then fine I suggest the moderator refund your points but suggest this still gets saved as a PAQ as I believe there is value in these posts for future reference.
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Sorry you had to wait this long for an answer.
I have put together a sample app for you which lists all IE instances and shows the relevant source. You say you can specifically name the window by its caption, so you could then get the handle for the window using the FindWindow API function. You can find your window by comparing this handle with the HWND property of the IWebbrowser2 interface. Simply modify the btnGetBrowsersClick method with something like
...
for i:= 0 to sw.Count-1 do
begin
browser:= sw.Item(i) as IWebBrowser2;
if browser.HWND <> TheHandleYouWant then Continue;
...
Let me know if you need more help.
Regards
Pierre
PAS File
==========================
unit Unit1;
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, ShDocVw, StdCtrls, mshtml, xpman, ExtCtrls;
type
THtmlSrcObj = class(TObject)
private
FHtmlSource: TStrings;
WBrowser: IWebBrowser2;
HtmlDoc: IHTMLDocument2;
function GetHtmlSource: TStrings;
function GetHtmlTitle: string;
public
constructor Create; virtual;
destructor Destroy; override;
property HTMLSource: TStrings read GetHtmlSource;
property DocTitle: string read GetHtmlTitle;
end;
TForm1 = class(TForm)
Memo1: TMemo;
lbBrowsers: TListBox;
btnGetBrowsers: TButton;
Panel1: TPanel;
Panel2: TPanel;
Splitter1: TSplitter;
Panel3: TPanel;
Panel4: TPanel;
cbWrap: TCheckBox;
procedure btnGetBrowsersClick(Sender
procedure lbBrowsersClick(Sender: TObject);
procedure FormCreate(Sender: TObject);
procedure FormDestroy(Sender: TObject);
procedure cbWrapClick(Sender: TObject);
private
HtmlSourceObjects: TList;
end;
var
Form1: TForm1;
implementation
uses VarUtils;
{$R *.dfm}
{ THtmlSrcObj }
constructor THtmlSrcObj.Create;
begin
FHtmlSource:= TStringList.Create;
end;
destructor THtmlSrcObj.Destroy;
begin
FHtmlSource.Free;
end;
function THtmlSrcObj.GetHtmlSource:
var element: IHTMLElement;
frames: IHTMLFramesCollection2;
frameWindow: IHTMLWindow2;
i: integer;
FrameIndex: OLEVariant;
FrameObj: IDispatch;
FrameDoc: IHTMLDocument2;
begin
result:= FHtmlSource;
FHtmlSource.Clear;
try
if WBrowser.Document.QueryInt
begin
element:= htmlDoc.all.item(0, 0) as IHTMLElement;
if Assigned(Element)
then FHtmlSource.Text:= element.outerHTML;
frames:= htmlDoc.frames;
for i:= 0 to frames.length-1 do
begin
FHtmlSource.Add('');
FHtmlSource.Add('<!--'+Str
+'Frame '+IntToStr(i+1)
+StringOfChar('*',5)+' -->');
try
FrameIndex:= i;
FrameObj:= frames.item(FrameIndex);
frameWindow:= frameObj AS IHTMLWindow2;
FrameDoc:= (frameWindow.Document AS IHTMLDocument2);
FHtmlSource.Add('<!--'
+StringOfChar('*',5)+' '
+'Title: '+FrameDoc.title
+' URL: '+FrameDoc.url
+StringOfChar('*',5)+' -->');
element:= FrameDoc.all.item(0, 0) as IHTMLElement;
if Assigned(Element)
then FHtmlSource.Add(element.ou
FHtmlSource.Add('');
except on e: exception
do FHtmlSource.Add(e.Message)
end;
end;
end;
except on e: exception
do Result.Add(e.Message);
end;
end;
function THtmlSrcObj.GetHtmlTitle: string;
begin
result:= '';
if Assigned(HtmlDoc)
then result:= HtmlDoc.title;
end;
procedure TForm1.btnGetBrowsersClick
var sw: IShellWindows;
browser: IWebBrowser2;
i: integer;
htmlDoc: IHTMLDocument2;
obj: THtmlSrcObj;
begin
lbBrowsers.Clear;
sw := CoShellWindows.Create;
for i:= 0 to sw.Count-1 do
begin
browser:= sw.Item(i) as IWebBrowser2;
if browser.Document.QueryInte
begin
obj:= THtmlSrcObj.Create;
HtmlSourceObjects.Add(obj)
obj.WBrowser:= browser;
obj.HtmlDoc:=htmlDoc;
lbBrowsers.AddItem(browser
end;
end;
end;
procedure TForm1.lbBrowsersClick(Sen
var obj: TObject;
begin
if lbBrowsers.ItemIndex = -1 then exit;
obj:= lbBrowsers.Items.Objects[l
if obj is THtmlSrcObj
then Memo1.Text:= (obj as THtmlSrcObj).HTMLSource.Te
end;
procedure TForm1.FormCreate(Sender: TObject);
begin
HtmlSourceObjects:= Tlist.Create;
end;
procedure TForm1.FormDestroy(Sender:
var obj: TObject;
begin
While HtmlSourceObjects.Count > 0 do
begin
obj:= HtmlSourceObjects.Items[0]
if Assigned(obj) AND (obj is THtmlSrcObj)
then (obj as THtmlSrcObj).Free;
HtmlSourceObjects.Delete(0
end;
HtmlSourceObjects.Free;
end;
procedure TForm1.cbWrapClick(Sender:
begin
Memo1.WordWrap:= cbWrap.Checked;
end;
end.
DFM File
==========================
object Form1: TForm1
Left = 192
Top = 114
Width = 696
Height = 480
Caption = 'Form1'
Color = clBtnFace
Font.Charset = DEFAULT_CHARSET
Font.Color = clWindowText
Font.Height = -11
Font.Name = 'MS Sans Serif'
Font.Style = []
OldCreateOrder = False
OnCreate = FormCreate
OnDestroy = FormDestroy
PixelsPerInch = 96
TextHeight = 13
object Splitter1: TSplitter
Left = 241
Top = 0
Width = 6
Height = 446
Color = clBackground
ParentColor = False
end
object Panel1: TPanel
Left = 0
Top = 0
Width = 241
Height = 446
Align = alLeft
BevelOuter = bvNone
Caption = 'Panel1'
TabOrder = 0
object Panel2: TPanel
Left = 0
Top = 0
Width = 241
Height = 33
Align = alTop
BevelOuter = bvNone
Caption = 'Panel2'
TabOrder = 0
DesignSize = (
241
33)
object btnGetBrowsers: TButton
Left = 8
Top = 4
Width = 227
Height = 25
Anchors = [akLeft, akTop, akRight]
Caption = 'List open browsers'
TabOrder = 0
OnClick = btnGetBrowsersClick
end
end
object lbBrowsers: TListBox
Left = 0
Top = 33
Width = 241
Height = 413
Align = alClient
BorderStyle = bsNone
ItemHeight = 13
TabOrder = 1
OnClick = lbBrowsersClick
end
end
object Panel3: TPanel
Left = 247
Top = 0
Width = 441
Height = 446
Align = alClient
BevelOuter = bvNone
Caption = 'Panel3'
TabOrder = 1
object Memo1: TMemo
Left = 0
Top = 29
Width = 441
Height = 417
Align = alClient
BevelInner = bvNone
BevelOuter = bvNone
BorderStyle = bsNone
TabOrder = 0
WordWrap = False
end
object Panel4: TPanel
Left = 0
Top = 0
Width = 441
Height = 29
Align = alTop
BevelOuter = bvNone
TabOrder = 1
object cbWrap: TCheckBox
Left = 16
Top = 8
Width = 97
Height = 17
Caption = '&Wrap text'
TabOrder = 0
OnClick = cbWrapClick
end
end
end
end