mistadontplay
asked on
Trying to parse html file
I'm trying to run a book example and I get the following error.
E:\>perl parser.pl
Can't locate HTML/Tagset.pm in @INC (@INC contains: E:/ind/perl/lib E:/ind/perl/
site/lib .) at E:/ind/perl/site/lib/HTML/ LinkExtor. pm line 31.
BEGIN failed--compilation aborted at E:/ind/perl/site/lib/HTML/ LinkExtor. pm line
31.
Compilation failed in require at parser.pl line 5.
BEGIN failed--compilation aborted at parser.pl line 5.
sourcecode
#!e:/ind/perl/bin/perl -w
use strict;
use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
my $url = URI::URL->new('http://www.perl.com/');
my $base_url;
# Create new UserAgent object (browser)
my $ua = LWP::UserAgent->new();
# Give our agent a name
$ua->agent("Mozilla/4.7");
# Create HTTP GET request
my $request = HTTP::Request->new(GET => $url);
# Execute HTTP request
my $response = $ua->request($request);
# Check success
if ($response->is_success && $response->content_type eq 'text/html') {
# Request was successful and is html
$base_url = $response->base();
print "Base URL: $base_url\n";
my $link_extor = HTML::LinkExtor->new(\&ext ract_links );
$link_extor->parse($respon se->conten t);
} else {
# Request failed - print response code and message
print "Error getting document: ", $response->status_line, "\n";
}
sub extract_links {
my ($tag, %attr) = @_;
if ($tag eq 'a' or $tag eq 'img') {
foreach my $key (keys %attr) {
if ($key eq 'href' or $key eq 'src') {
my $link_url = URI->new($attr{$key});
my $full_url = $link_url->abs($base_url);
print "LINK: $full_url\n";
}
}
}
}
E:\>perl parser.pl
Can't locate HTML/Tagset.pm in @INC (@INC contains: E:/ind/perl/lib E:/ind/perl/
site/lib .) at E:/ind/perl/site/lib/HTML/
BEGIN failed--compilation aborted at E:/ind/perl/site/lib/HTML/
31.
Compilation failed in require at parser.pl line 5.
BEGIN failed--compilation aborted at parser.pl line 5.
sourcecode
#!e:/ind/perl/bin/perl -w
use strict;
use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
my $url = URI::URL->new('http://www.perl.com/');
my $base_url;
# Create new UserAgent object (browser)
my $ua = LWP::UserAgent->new();
# Give our agent a name
$ua->agent("Mozilla/4.7");
# Create HTTP GET request
my $request = HTTP::Request->new(GET => $url);
# Execute HTTP request
my $response = $ua->request($request);
# Check success
if ($response->is_success && $response->content_type eq 'text/html') {
# Request was successful and is html
$base_url = $response->base();
print "Base URL: $base_url\n";
my $link_extor = HTML::LinkExtor->new(\&ext
$link_extor->parse($respon
} else {
# Request failed - print response code and message
print "Error getting document: ", $response->status_line, "\n";
}
sub extract_links {
my ($tag, %attr) = @_;
if ($tag eq 'a' or $tag eq 'img') {
foreach my $key (keys %attr) {
if ($key eq 'href' or $key eq 'src') {
my $link_url = URI->new($attr{$key});
my $full_url = $link_url->abs($base_url);
print "LINK: $full_url\n";
}
}
}
}
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
My recommendation, which I will post in the Cleanup topic area, is to
split points between fantasy1001 and davorg.
PLEASE DO NOT ACCEPT THIS COMMENT AS AN ANSWER!
jmcg
EE Cleanup Volunteer