asked on
sub getMovieInfo {
my $movieName = shift;
my $processedMovieName = nameProcessor($movieName);
my $searching = 1;
my ($imageLink, $rating, $genre, $imdburl, $plot, $year);
#This loop will run maximum twice. Again if the moviename ends in a four digit number and we didn't find anything the first time.
#The second time it runs it will be without the four digit number at the end of the movie name.
while($searching) {
my $xmlDoc = XML::Mini::Document->new();
#1. Thus URL will give us an XML document to parse:
my $url = "http://imdbapi.org/?title=".uri_escape($processedMovieName)."&type=xml&plot=simple&episode=1&limit=1&yg=0&mt=none&lang=en-US&offset=&aka=simple&release=simple&business=0&tech=0";
#print "<b>".$processedMovieName."</b> - Trying: <font size=\"-1\">" . $url . "</font><br>";
#2. Download the url and extract the information(imageLink/cover: OK, genre: OK, rating: OK):
my $xml;
$genre = " ";
#print "Called getMovieInfo() for " . $movieName . "<br> - " . $processedMovieName . "<br> - ".removeNoiseWords(lc($movieName))."<br><br>";
if($url =~ /http/) {
#print " URL OK!<br>";
my $response_body = tempfile();
my $curl = WWW::Curl::Easy->new;
$curl->setopt(CURLOPT_HEADER, 1);
$curl->setopt(CURLOPT_URL, $url);
$curl->setopt(CURLOPT_WRITEDATA, \$response_body);
my $return_code = $curl->perform;
my $getNextLine = 0;
if ($return_code == 0) {
my $response_code = $curl->getinfo(CURLINFO_HTTP_CODE);
seek($response_body, 0, SEEK_SET); # reset filehandle to beginning of file
while (<$response_body>) {
$xml .= $_;
}
$xmlDoc->parse($xml);
my $xmlHash = $xmlDoc->toHash();
$rating = $xmlHash->{'IMDBDocumentList'}->{'item'}->{'rating'};
if(ref $xmlHash->{'IMDBDocumentList'}->{'item'}->{'genres'}->{'item'} eq 'ARRAY') {
foreach my $genreElement (@{$xmlHash->{'IMDBDocumentList'}->{'item'}->{'genres'}->{'item'}}) {
$genre .= $genreElement . " ";
}
} else {
$genre .= $xmlHash->{'IMDBDocumentList'}->{'item'}->{'genres'}->{'item'} if(exists($xmlHash->{'IMDBDocumentList'}->{'item'}->{'genres'}->{'item'}));
}
if(ref $xmlHash->{'IMDBDocumentList'}->{'item'}->{'poster'} eq 'ARRAY') {
foreach my $imageLinkElement (@{$xmlHash->{'IMDBDocumentList'}->{'item'}->{'poster'}}) {
$imageLink .= $imageLinkElement . " ";
}
} else {
$imageLink = $xmlHash->{'IMDBDocumentList'}->{'item'}->{'poster'};
}
$imdburl = $xmlHash->{'IMDBDocumentList'}->{'item'}->{'imdb_url'};
$plot = $xmlHash->{'IMDBDocumentList'}->{'item'}->{'plot_simple'};
$year = $xmlHash->{'IMDBDocumentList'}->{'item'}->{'year'};
} elsif($return_code == 56) {
#If the return code is 56, then we have been interrupted. We loop and try again.
$searching = 1;
next;
} else {
print ("An error occured: ".$return_code." ".$curl->strerror($return_code)." ".$curl->errbuf." at initiateMovieInfo() when treating ". $url ."\n");
}
#print " -->" . $rating . " " . $imageLink . " " . $genre . "<br>";
}
if(defined($rating)) {undef($rating) if($rating eq "");}
if($processedMovieName !~ m/\b\d{4}\b$/){$searching = 0;}
if($processedMovieName =~ m/\b\d{4}\b$/ && !defined($rating)){$processedMovieName =~ s/\b\d{4}\b$//;}
}
$imageLink =~ s/\s+$// if(defined($imageLink)); #remove trailing spaces
if(defined($genre)) { $genre =~ s/\s+$//; $genre =~ s/^\s+//;} #remove trailing spaces #remove leading spaces
#print " Returning -->" . $rating . " " . $imageLink . " " . $genre . "<br>";
my @infoArray = ($imageLink, $genre, $rating, $imdburl, $plot, $year);
return \@infoArray;
}