#!/usr/bin/perl
#Ennio Bozzetti
#S0547650
use DBI;
###########################
# Setup the DB connection #
###########################
my $dbh = DBI->connect("dbi:mysqlPP:database=crawler;host=localhost",
"root", "ghh8773v", {'RaiseError' => 1});
############################################
# Get 1 documents and store it to a hash #
############################################
my $doc = {};
my $sSQL = "SELECT tbl_terms.term FROM tbl_doc, tbl_terms WHERE tbl_doc.docID = tbl_terms.docID AND tbl_doc.docID = 58";
$sth = $dbh->prepare($sSQL);
$sth->execute;
$doc = $dbh->selectall_hashref($sSQL, 'term');
################################################
# Get the second documment and store in a hash #
################################################
my $doc2 = {};
$sSQL = "SELECT tbl_terms.term FROM tbl_doc, tbl_terms WHERE tbl_doc.docID = tbl_terms.docID AND tbl_doc.docID = 58";
$sth = $dbh->prepare($sSQL);
$sth->execute;
$doc2 = $dbh->selectall_hashref($sSQL, 'term');
my $cosine_result = 0;
$cosine_result = cosine_sim_1($doc, $doc2);
print "The cosine coefficient between doc1 and doc2 is $cosine_result";
sub cosine_sim_1 {
my $vec1 = shift;
my $vec2 = shift;
my $num = 0;
my $sum_sq1 = 0;
my $sum_sq2 = 0;
my @val1 = values %{$vec1};
my @val2 = values %{$vec2};
#######################################################
# Get the smallest hash, $vec1 holds the smallest hash #
#######################################################
if ((scalar @val1) > (scalar @val2)){
my $temp = $vec1;
$vec1 = $vec2;
$vec2 = $temp;
}
########################
# Get the intersection #
########################
while (my ($key, $val) = each(%$vec1)){
$vec1->{$key} = '1', if exists $vec2->{$key};
}
while (my ($key, $val) = each(%$vec2)){
$vec2->{$key} = '1', if exists $vec1->{$key};
}
###############################
# Calculate the cross product #
###############################
while (my ($key, $val) = each(%$vec1)){
$num += $val * ($vec2->{$key} || 0);
}
# Calculate the sum of squares #
foreach my $term (@val1){$sum_sq1 += $term * $term}
foreach my $term (@val2){$sum_sq2 += $term * $term}
return ($num/sqrt($sum_sq1 * $sum_sq2));
}
############
# Cleam up #
############
$sth->finish();
$dbh->disconnect();
