Populate a database with from an rdf file, exclude banned categories

Hello,

I use the following code to gather URLs from a DMOZ RDF.

(Thanks adrpo and Adam314)

I want it to EXCLUDE items in the "my @banned" array.  The problem is that it includes some URLs from the banned categories.

Thanks for the help!
#!/usr/bin/perl
use DBI;
use XML::Parser;
 
# Thanks adrpo and Adam314
 
# how much I sleep before I attempt a new insert
 
binmode(STDOUT, ":utf8");
 
# Connect to MySQL
my $conn = DBI->connect("dbi:mysql:dbname=db_URL;host=localhost", "user", "password");
 
print "\n\nDeleting all records from the url table\n\nAdding Records...";
my $queryDel = $conn->prepare("delete from url;");
$queryDel->execute(); 
 
my $parser = new XML::Parser(ErrorContext => 2, Style => 'Stream' );
 
$parser->setHandlers(End => \&handle_end,
		     Start=>\&handle_start,
		     Char=>\&handle_char);
$parser->parse(STDIN);
 
my $SavedLink = '';
my $WeAreInTopic = 'YES';
 
sub isBanned
{
    my $topic = shift(@_);
    my @banned = ("Top/Adult/", "Top/Arts/Bodyart/", "Top/Business/Arts_and_Entertainment/Gaming/", "Top/Religion_and_Spirituality/", "Top/Arts/Performing_Arts/Hypnotism/", "Top/Arts/Performing_Arts/Magic/", "Top/Games/Dice/", "Top/Games/Gambling/", "Top/Games/Roleplaying/", "Top/Recreation/Nudism/", "Top/Recreation/Tobacco/", "Top/Health/Men%27s_Health/Sexuality/", "Top/Shopping/Tobacco/", "Top/Society/Gay,_Lesbian,_and_Bisexual/", "Top/Society/Issues/Abortion/", "Top/Society/Sexuality/", "Top/Society/Transgendered/", "Top/Society/Relationships/Alternative_Lifestyles/", "Top/World/");
    foreach $b (@banned)
    {
       # if topic starts with banned or the other way around
    $quotedTopic = quotemeta($topic);
    $quotedBanned = quotemeta($b);
    if (($quotedBanned =~ m/$quotedTopic/) == 1 || 
    ($quotedTopic =~ m/$quotedBanned/))
       {
	   return 1;
       }  
    }
    return 0;
}
 
sub handle_char
{
	if ($WeAreInTopic eq 'YES')
	{
		my ($p, $topic) = @_;
		chomp($topic); # kill the \n
		if ($SavedLink ne '' && $topic ne '') # only when we already have a link
		{
			if(isBanned($topic))
			{
				## print "link: $SavedLink --BANNED-- | topic: $topic.\n";
			}
			else
			{
				$strQuery = "insert into url values(?);";
				my $query = $conn->prepare($strQuery);
				$query->execute($SavedLink);
				## print "link: $SavedLink -INSERTED- | topic: $topic. Now sleep $sleep seconds\n";
			}     
		}
	}
}# End char_handler
 
sub handle_start {
    my( $expat, $element, %attrs ) = @_;
    if ($element eq 'topic')
    {
	$WeAreInTopic = 'YES';
    }
    else
    {
	$WeAreInTopic = 'NO';
    }
    if ($element eq 'ExternalPage')
    {
      # ask the expat object about our position
      my $line = $expat->current_line;
      if( %attrs ) {
          while( my( $key, $value ) = each( %attrs )) 
          {
              if(($key eq 'about') && ($value ne '')) 
	    { 
		# found our ABOUT, and the value is not null 
		# remember it for later because we need to see 
		# what is in the <topic>....</topic>
		$SavedLink = $value;
              }
          }
      }
    }
}
 
# process an end-of-element event
#
sub handle_end {
# just ignore the end of an element.
}
 
print "\n\nDone\n\n";

Open in new window

LVL 16
hankknightAsked:
Who is Participating?
 
adrpoConnect With a Mentor Commented:

Just to know what you are expecting....
It took about 2 hours 40 minutes for my 2.0 Ghz Core 2 Duo laptop with to push in PostgreSQL all the dmoz data.
2 and a half milion records.

I think you can make this way faster by:
- from perl don't insert but dump a delimiter separated values file that looks like:
  link || topic
- then from mysql use:
  LOAD DATA INFILE 'generated.txt' INTO TABLE url
   FIELDS TERMINATED BY '||';
  http://dev.mysql.com/doc/refman/5.0/en/load-data.html
I leave this alternative implementation for you as an home exercise as is way simple :))

Cheers,
za-k/
adrpo@Kafka ~/dmoz
$ time zcat content.rdf.u8.gz | ./ParseAndInsert-DMOZ.pl > trace.txt 2>&1
real    169m44.919s
user    33m57.295s
sys     8m49.716s
 
assets=# select count(*) from url;
  count  
---------
 2518806
(1 row)

Open in new window

0
 
Adam314Commented:
On lines 37-38, you have:
    if (($quotedBanned =~ m/$quotedTopic/) == 1 ||
    ($quotedTopic =~ m/$quotedBanned/))
Because of the parenthesis, it is evaluating the regex in array context, compared to a scalar, it is looking for the number of matches.  There is no need for the ==1 part:
    if (   ($quotedBanned =~ m/$quotedTopic/)
        || ($quotedTopic =~ m/$quotedBanned/) )



You could also move this (lines 60-61):
    $strQuery = "insert into url values(?);";
    my $query = $conn->prepare($strQuery);
To right after you connect on line 13.  This will speed up your program.
0
 
adrpoCommented:

Well, this: ($quotedBanned =~ m/$quotedTopic/) == 1
is my creation, which proves that my perl skills tend to
asymptotically approach zero :))

Each time I attempt to do something in perl i need to RTFM :)

Cheers,
za-k/
0
Receive 1:1 tech help

Solve your biggest tech problems alongside global tech experts with 1:1 help.

 
hankknightAuthor Commented:
Thank you both.

Adam314, I followed your suggestion but it still adds the banned items!  Am I missing something?  
#!/usr/bin/perl
use DBI;
use XML::Parser;
 
# Thanks adrpo and Adam314
 
binmode(STDOUT, ":utf8");
 
print "\n\nBegin...\n\n";
 
# Connect to MySQL
my $conn = DBI->connect("dbi:mysql:dbname=name;host=localhost", "user", "password");
 
$strQuery = "insert into url values(?);";
my $query = $conn->prepare($strQuery);
 
my $queryDel = $conn->prepare("delete from url;");
$queryDel->execute(); 
 
my $parser = new XML::Parser(ErrorContext => 2, Style => 'Stream' );
 
$parser->setHandlers(End => \&handle_end,
		     Start=>\&handle_start,
		     Char=>\&handle_char);
$parser->parse(STDIN);
 
my $SavedLink = '';
my $WeAreInTopic = 'YES';
 
sub isBanned
{
    my $topic = shift(@_);
    my @banned = ("Top/Adult/", "Top/Arts/Bodyart/", "Top/Business/Arts_and_Entertainment/Gaming/", "Top/Religion_and_Spirituality/", "Top/Arts/Performing_Arts/Hypnotism/", "Top/Arts/Performing_Arts/Magic/", "Top/Games/Dice/", "Top/Games/Gambling/", "Top/Games/Roleplaying/", "Top/Recreation/Nudism/", "Top/Recreation/Tobacco/", "Top/Health/Men%27s_Health/Sexuality/", "Top/Shopping/Tobacco/", "Top/Society/Gay,_Lesbian,_and_Bisexual/", "Top/Society/Issues/Abortion/", "Top/Society/Sexuality/", "Top/Society/Transgendered/", "Top/Society/Relationships/Alternative_Lifestyles/", "Top/World/");
    foreach $b (@banned)
    {
       # if topic starts with banned or the other way around
    $quotedTopic = quotemeta($topic);
    $quotedBanned = quotemeta($b);
 
 
 
 
    if (   ($quotedBanned =~ m/$quotedTopic/)
        || ($quotedTopic =~ m/$quotedBanned/) )
       {
	   return 1;
       }  
    }
    return 0;
}
 
sub handle_char
{
	if ($WeAreInTopic eq 'YES')
	{
		my ($p, $topic) = @_;
		chomp($topic); # kill the \n
		if ($SavedLink ne '' && $topic ne '') # only when we already have a link
		{
			if(isBanned($topic))
			{
				## print "link: $SavedLink --BANNED-- | topic: $topic.\n";
			}
			else
			{
				$query->execute($SavedLink);
			}     
		}
	}
}# End char_handler
 
sub handle_start {
    my( $expat, $element, %attrs ) = @_;
    if ($element eq 'topic')
    {
	$WeAreInTopic = 'YES';
    }
    else
    {
	$WeAreInTopic = 'NO';
    }
    if ($element eq 'ExternalPage')
    {
      # ask the expat object about our position
      my $line = $expat->current_line;
      if( %attrs ) {
          while( my( $key, $value ) = each( %attrs )) 
          {
              if(($key eq 'about') && ($value ne '')) 
	    { 
		# found our ABOUT, and the value is not null 
		# remember it for later because we need to see 
		# what is in the <topic>....</topic>
		$SavedLink = $value;
              }
          }
      }
    }
}
 
# process an end-of-element event
#
sub handle_end {
# just ignore the end of an element.
}
 
print "\n\nDone\n\n";

Open in new window

0
 
adrpoCommented:

Ok. Here it goes again :)

Alternative solution included.
Add a new column to the url table, say topic text.
Insert both the url and the topic.
Then if you still have banned topics you just say:
delete from url where topic like '/Banned/Topic/%';

Cheers,
za-k/
#!/usr/bin/perl -w 
use DBI;
use XML::Parser;
 
# Thanks adrpo and Adam314
 
binmode(STDOUT, ":utf8");
 
print "\n\nBegin...\n\n";
 
# Connect to MySQL
my $conn = DBI->connect("dbi:Pg:dbname=assets;host=localhost", "postgres", "YEAH_RIGHT");
 
$strQuery = "insert into url values(?,?);";
my $query = $conn->prepare($strQuery);
 
my $queryDel = $conn->prepare("delete from url;");
$queryDel->execute(); 
 
my $parser = new XML::Parser(ErrorContext => 2, Style => 'Stream' );
 
$parser->setHandlers(End => \&handle_end,
		     Start=>\&handle_start,
		     Char=>\&handle_char);
$parser->parse(STDIN);
 
my $SavedLink = '';
my $WeAreInTopic = 'YES';
 
sub isBanned
{
    my $topic = shift(@_);
    my @banned = ("Top/Adult/", "Top/Arts/Bodyart/", "Top/Business/Arts_and_Entertainment/Gaming/", "Top/Religion_and_Spirituality/", "Top/Arts/Performing_Arts/Hypnotism/", "Top/Arts/Performing_Arts/Magic/", "Top/Games/Dice/", "Top/Games/Gambling/", "Top/Games/Roleplaying/", "Top/Recreation/Nudism/", "Top/Recreation/Tobacco/", "Top/Health/Men%27s_Health/Sexuality/", "Top/Shopping/Tobacco/", "Top/Society/Gay,_Lesbian,_and_Bisexual/", "Top/Society/Issues/Abortion/", "Top/Society/Sexuality/", "Top/Society/Transgendered/", "Top/Society/Relationships/Alternative_Lifestyles/", "Top/World/");
    foreach $b (@banned)
    {
	# if topic starts with banned or the other way around
	$quotedTopic = quotemeta($topic);
	$quotedBanned = quotemeta($b);
	$i1 = index($quotedTopic, $quotedBanned);
	$i2 = index($quotedBanned, $quotedTopic);
	if ($i1 > -1 || $i2 > -1)
	{
	    # print "Banned: $quotedBanned <-> Topic: $quotedTopic - BANNED\n";
	    return 1;
	}
    }
    
    # print "Topic: $quotedTopic - NOT BANNED\n";
    return 0;
 
}
 
sub handle_char
{
	if ($WeAreInTopic eq 'YES')
	{
		my ($p, $topic) = @_;
		chomp($topic); # kill the \n
		if ($SavedLink ne '' && $topic ne '') # only when we already have a link
		{
			if(isBanned($topic))
			{
			    print "link: $SavedLink --BANNED-- | topic: $topic.\n";
			}
			else
			{
			    print "Insert link: $SavedLink -- INSERTED -- | topic: $topic\n";
			    $query->execute($SavedLink, $topic);
			}     
		}
	}
}# End char_handler
 
sub handle_start {
    my( $expat, $element, %attrs ) = @_;
    if ($element eq 'topic')
    {
	$WeAreInTopic = 'YES';
    }
    else
    {
	$WeAreInTopic = 'NO';
    }
    if ($element eq 'ExternalPage')
    {
      # ask the expat object about our position
      my $line = $expat->current_line;
      if( %attrs ) {
          while( my( $key, $value ) = each( %attrs )) 
          {
              if(($key eq 'about') && ($value ne '')) 
	    { 
		# found our ABOUT, and the value is not null 
		# remember it for later because we need to see 
		# what is in the <topic>....</topic>
		$SavedLink = $value;
              }
          }
      }
    }
}
 
# process an end-of-element event
#
sub handle_end {
# just ignore the end of an element.
}
 
print "\n\nDone\n\n";

Open in new window

0
 
adrpoCommented:

It might also be good to make the fileld link in the url table
unique as there are a lot of duplicates.

Cheers,
za-k/
0
 
adrpoCommented:

Just a small question if I may:
 What the freaking hell are trying to do with this? :))

Cheers,
za-k/
0
 
Adam314Connect With a Mentor Commented:
Here is another version.  It should fix the problem with adding the banned items, and it should run faster.
I did these thing to improve speed:
    - Only define the banned topics once
    - Precompile all the banned topics to regular expressions, with compile once flag turned on
    - pre-parse the insert SQL
    - Changed structure a bit to do less processing on un-necessary keys
    - Read from an input file instead of STDIN

The reading from an input file means you first need to uncompress the RDF file.  This means it will take more disk space, but it will be much faster.  Disk space is generally cheaper than time  :)

To run it: first decompress the RDF file:  gunzip content.rdf.u8
Run the code: ParseAndInsert-DMOZ.pl

#!/usr/bin/perl -w 
use strict;
use DBI;
use XML::Parser;
 
my @banned = ("Top/Adult/", "Top/Arts/Bodyart/", "Top/Business/Arts_and_Entertainment/Gaming/", "Top/Religion_and_Spirituality/", "Top/Arts/Performing_Arts/Hypnotism/", "Top/Arts/Performing_Arts/Magic/", "Top/Games/Dice/", "Top/Games/Gambling/", "Top/Games/Roleplaying/", "Top/Recreation/Nudism/", "Top/Recreation/Tobacco/", "Top/Health/Men%27s_Health/Sexuality/", "Top/Shopping/Tobacco/", "Top/Society/Gay,_Lesbian,_and_Bisexual/", "Top/Society/Issues/Abortion/", "Top/Society/Sexuality/", "Top/Society/Transgendered/", "Top/Society/Relationships/Alternative_Lifestyles/", "Top/World/");
my @banned_re;
foreach (@banned) {
	push @banned_re, qr/\Q$_\E/o;
}
 
binmode(STDOUT, ":utf8");
 
print "\n\nBegin...\n\n";
 
# Connect to MySQL
my $conn = DBI->connect("dbi:Pg:dbname=assets;host=localhost", "postgres", "YEAH_RIGHT");
 
$strQuery = "insert into url values(?,?);";
my $query = $conn->prepare($strQuery);
 
my $queryDel = $conn->prepare("delete from url;");
$queryDel->execute(); 
 
my $parser = new XML::Parser(ErrorContext => 2, Style => 'Stream' );
 
$parser->setHandlers(
  End => \&handle_end,
  Start=>\&handle_start,
  Char=>\&handle_char,
  );
open(IN, "<content.rdf.u8") or die "input: $!\n";
$parser->parse(*IN);
close(IN);
 
my $inExternalPage = 0;
my $inTopic = 0;
my $Topic;
my $Url;
 
 
sub isBanned
{
	my $topic = shift;
	foreach $b (@banned_re) {
		return 1 if $topic =~ $b;
	}
	return 0;
}
 
sub handle_char
{
	return unless $inTopic;
	$Topic = $_[1];
}# End char_handler
 
sub handle_start {
	if($_[1] eq "ExternalPage") {
		$inExternalPage = 1;
		$Url = $_[3];
		$inTopic=0;
		return;
	}
	elsif( ($inExternalPage) and ($_[1] eq 'topic') ) {
		$inTopic = 1;
	}
	else {
		$inTopic=0;
	}
}
 
 
# process an end-of-element event
sub handle_end {
	$inTopic=0,return if $_[1] eq 'topic';
	
	return unless $_[1] eq 'ExternalPage';
	$inExternalPage = 0;
	$inTopic = 0;
	if(isBanned($Topic)) {
		print "--BANNED-- $Url: $Topic\n";
	}
	else {
		print "--INSERT-- $Url: $Topic\n";
		$query->execute($Url, $Topic);
	}
	
}
 
print "\n\nDone\n\n";

Open in new window

0
 
hankknightAuthor Commented:
Thank you both.

Here is what I am trying to do with this: I  will be integrating this data with a social-bookmarking website.

za-k/, I like your idea of adding the category too so it can be verified later.  I also like your idea of first creating a text dump, but it might just be an extra step.

Adam314, I tried your code but it scrambles many of the categories.  I have attached a small dump to demonstrate.

Cheers!

dmoz.sql.txt
0
 
Adam314Commented:
I just looked at that.  It turns out that the parser is passing the text for some of the scrambled topics as two seperate reads, instead of 1.
eg:  It calls the handle_char sub with "abc" then again with "def", instead of 1 call with "abcdef"

I'm guessing there will be some scrambles topics with either method, as the parser is exactly the same - only what is done with the results has changed.

I made a small addition so it displays the current record number it is working on.  This is just nice to see it is still working...

Anyways, here is some updated code that works:
#!/usr/bin/perl -w 
use strict;
use DBI;
use XML::Parser;
use Data::Dumper;
 
my @banned = ("Top/Adult/", "Top/Arts/Bodyart/", "Top/Business/Arts_and_Entertainment/Gaming/", "Top/Religion_and_Spirituality/", "Top/Arts/Performing_Arts/Hypnotism/", "Top/Arts/Performing_Arts/Magic/", "Top/Games/Dice/", "Top/Games/Gambling/", "Top/Games/Roleplaying/", "Top/Recreation/Nudism/", "Top/Recreation/Tobacco/", "Top/Health/Men%27s_Health/Sexuality/", "Top/Shopping/Tobacco/", "Top/Society/Gay,_Lesbian,_and_Bisexual/", "Top/Society/Issues/Abortion/", "Top/Society/Sexuality/", "Top/Society/Transgendered/", "Top/Society/Relationships/Alternative_Lifestyles/", "Top/World/");
my @banned_re;
foreach (@banned) {
	push @banned_re, qr/\Q$_\E/o;
}
 
binmode(STDOUT, ":utf8");
 
print "\n\nBegin...\n\n";
 
# Connect to MySQL
#my $conn = DBI->connect("dbi:Pg:dbname=assets;host=localhost", "postgres", "YEAH_RIGHT");
 
#my $query = $conn->prepare("insert into url values(?,?);");
 
#my $queryDel = $conn->prepare("delete from url;");
#$queryDel->execute(); 
 
my $parser = new XML::Parser(ErrorContext => 2, Style => 'Stream' );
 
$parser->setHandlers(
  End => \&handle_end,
  Start=>\&handle_start,
  Char=>\&handle_char,
  );
open(IN, "<content.rdf.txt") or die "input: $!\n";
$parser->parse(*IN);
close(IN);
 
my $inExternalPage = 0;
my $inTopic = 0;
my $Topic;
my $Url;
 
sub isBanned
{
	my $topic = shift;
	foreach $b (@banned_re) {
		return 1 if $topic =~ $b;
	}
	return 0;
}
 
sub handle_char
{
	return unless $inTopic;
	$Topic .= $_[1];
}# End char_handler
 
sub handle_start {
	if($_[1] eq "ExternalPage") {
		$inExternalPage = 1;
		$Url = $_[3];
		$inTopic=0;
		#print "ExternalPage: $Url\n";
		return;
	}
	elsif( ($inExternalPage) and ($_[1] eq 'topic') ) {
		$inTopic = 1;
		$Topic = '';
	}
	else {
		$inTopic=0;
	}
}
 
 
# process an end-of-element event
#
sub handle_end {
	$inTopic=0,return if $_[1] eq 'topic';
	
	return unless $_[1] eq 'ExternalPage';
	$inExternalPage = 0;
	$inTopic = 0;
	if(isBanned($Topic)) {
		print "--BANNED--$Counter   $Url: $Topic\n";
	}
	else {
		print "--INSERT--$Counter   $Url: $Topic\n";
		#$query->execute($Url, $Topic);
	}
}
 
print "\n\nDone\n\n";

Open in new window

0
 
Adam314Commented:
Sorry... just realized I didn't post the code that displays the current record number.  If you want, I'll post another update with that added in...
0
 
adrpoCommented:
Hi,

First of all, you should give the points to Adam314!

Ok, i wanted to see how fast would be to just dump a
generated file: generated.txt and import that into MySQL using
load data infile:
- 14 minutes to dump the file.
- 2 minutes to load it into mysql
Pretty fast in ~17 minutes :)

adrpo@Kafka ~/dmoz
$ time zcat content.rdf.u8.gz | ./latest.pl > generated.txt
real    14m2.209s
user    13m41.435s
sys     0m8.545s

mysql> load data infile 'c:/bin/cygwin/home/adrpo/dmoz/generated.txt' into table url fields terminated by '\t';
Query OK, 4538478 rows affected (1 min 59.30 sec)
Records: 4538478  Deleted: 0  Skipped: 0  Warnings: 0
mysql> select count(*) from url;
+----------+
| count(*) |
+----------+
|  4538478 |
+----------+
1 row in set (14.61 sec)

Below is the script that generates the file...

Cheers,
za-k/
#!/usr/bin/perl -w 
use XML::Parser;
use Data::Dumper;
 
my @banned = ("Top/Adult/", "Top/Arts/Bodyart/", "Top/Business/Arts_and_Entertainment/Gaming/", "Top/Religion_and_Spirituality/", "Top/Arts/Performing_Arts/Hypnotism/", "Top/Arts/Performing_Arts/Magic/", "Top/Games/Dice/", "Top/Games/Gambling/", "Top/Games/Roleplaying/", "Top/Recreation/Nudism/", "Top/Recreation/Tobacco/", "Top/Health/Men%27s_Health/Sexuality/", "Top/Shopping/Tobacco/", "Top/Society/Gay,_Lesbian,_and_Bisexual/", "Top/Society/Issues/Abortion/", "Top/Society/Sexuality/", "Top/Society/Transgendered/", "Top/Society/Relationships/Alternative_Lifestyles/", "Top/World/");
my @banned_re;
foreach (@banned) {
	push @banned_re, qr/\Q$_\E/o;
}
 
binmode(STDOUT, ":utf8");  
my $parser = new XML::Parser(ErrorContext => 2, Style => 'Stream' );
 
$parser->setHandlers(
  End => \&handle_end,
  Start=>\&handle_start,
  Char=>\&handle_char,
  );
$parser->parse(STDIN);
 
my $inExternalPage = 0;
my $inTopic = 0;
my $Topic;
my $Url;
 
sub isBanned
{
	my $topic = shift;
	foreach $b (@banned_re) {
		return 1 if $topic =~ $b;
	}
	return 0;
}
 
sub handle_char
{
	return unless $inTopic;
	$Topic .= $_[1];
}# End char_handler
 
sub handle_start {
	if($_[1] eq "ExternalPage") {
		$inExternalPage = 1;
		$Url = $_[3];
		$inTopic=0;
		#print "ExternalPage: $Url\n";
		return;
	}
	elsif( ($inExternalPage) and ($_[1] eq 'topic') ) {
		$inTopic = 1;
		$Topic = '';
	}
	else {
		$inTopic=0;
	}
}
 
 
# process an end-of-element event
#
sub handle_end {
	$inTopic=0,return if $_[1] eq 'topic';
	
	return unless $_[1] eq 'ExternalPage';
	$inExternalPage = 0;
	$inTopic = 0;
	if(isBanned($Topic)) {
		#print "--BANNED-- $Url: $Topic\n";
	}
	else {
		print "$Url\t$Topic\r\n";
	}
}

Open in new window

0
 
Adam314Commented:
Did you test to see how long it takes calling insert for each record with the updated script?

I would guess that it is still faster to use a text file, but just curious...
0
 
adrpoCommented:

Hi,

I can test only with PostgreSQL which gives different results,
which look like (pretty pathetic compared to 18 minutes for
load data infile):
adrpo@Kafka ~/dmoz
$ time zcat content.rdf.u8.gz | ./latest.pl
real    82m30.701s
user    22m39.154s
sys     12m1.763s

Cheers,
za-k/
0
 
Adam314Commented:
I figured it'd be much faster with a data file.  If a data file is possible, that'd be the preferred way to go.
I'm guessing mysql is similar...
0
 
hankknightAuthor Commented:
The "load data LOCAL infile" way seems to be the way to go.

I have asked a question about that here:
http://www.experts-exchange.com/Programming/Languages/Scripting/Perl/Q_23139463.html

Thank you guys for everything!
0
All Courses

From novice to tech pro — start learning today.