Search Engine Problem

Hi guys hope you can help.

I have the following 2 files:
1) search.html (user inputs what text to search for)
2) search.cgi (page of results returned)
Im running this on Windows XP with WAMP.

My docroot is c:\wamp\www

At the moment, I can only get the search to search ONE folder.
What Id like to do is to be able to search my entire website, or multiple files, but not sure how to edit my search.cgi.

Any help greatly appreciated. Here is my code.

================================================================== search.html

<html>
 <head>
  <title>Search Engine</title>
 </head>
 <body bgcolor=#FFFFFF text=#000000>
  <center>
   <h1>Search Engine</h1>
  </center>

      Use the form below to search through the files!<p>
<hr size=7 width=75%><p>
<form method=POST action="search.cgi">
<center><table border>
<tr>
<th>Text to Search For: </th><th><input type=text name="terms" size=40><br></th>
</tr><tr>
<th>Boolean: <select name="boolean">
<option>AND
<option>OR
</select> </th> <th>Case <select name="case">
<option>Insensitive
<option>Sensitive
</select><br></th>
</tr><tr>
<th colspan=2><input type=submit value="Search!"> <input type=reset><br></th>
</tr></table></form></center>
<hr size=7 width=75%><p>
Simple Search<a
href="http://localhost">Archive</a>.
</body></html>

======================================================================= eof

======================================================================== search.cgi
#!c:/perl/bin/perl.exe
$basedir = 'c:/wamp/www/hi/';  #the root directory to the files you want to search.
$baseurl = 'http://localhost/hi/';
@files = ('*.txt');
$title = "Archive";
$title_url = 'http://localhost/';
$search_url = 'search.html'; #link to the form you set up to do the search.

# Done                                                           #
##############################################################################

# Parse Form Search Information
&parse_form;

# Get Files To Search Through
&get_files;

# Search the files
&search;

# Print Results of Search
&return_html;


sub parse_form {

   # Get the input
   read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});

   # Split the name-value pairs
   @pairs = split(/&/, $buffer);

   foreach $pair (@pairs) {
      ($name, $value) = split(/=/, $pair);

      $value =~ tr/+/ /;
      $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;

      $FORM{$name} = $value;
   }
}

sub get_files {

   chdir($basedir);
   foreach $file (@files) {
      $ls = `dir $file`;
      @ls = split(/\s+/,$ls);
      foreach $temp_file (@ls) {
         if (-d $file) {
            $filename = "$file$temp_file";
            if (-T $filename) {
               push(@FILES,$filename);
            }
         }
         elsif (-T $temp_file) {
            push(@FILES,$temp_file);
         }
      }
   }
}

sub search {

   @terms = split(/\s+/, $FORM{'terms'});

   foreach $FILE (@FILES) {

      open(FILE,"$FILE");
      @LINES = <FILE>;
      close(FILE);

      $string = join(' ',@LINES);
      $string =~ s/\n//g;
      if ($FORM{'boolean'} eq 'AND') {
         foreach $term (@terms) {
            if ($FORM{'case'} eq 'Insensitive') {
               if (!($string =~ /$term/i)) {
                  $include{$FILE} = 'no';
                last;
               }
               else {
                  $include{$FILE} = 'yes';
               }
            }
            elsif ($FORM{'case'} eq 'Sensitive') {
               if (!($string =~ /$term/)) {
                  $include{$FILE} = 'no';
                  last;
               }
               else {
                  $include{$FILE} = 'yes';
               }
            }
         }
      }
      elsif ($FORM{'boolean'} eq 'OR') {
         foreach $term (@terms) {
            if ($FORM{'case'} eq 'Insensitive') {
               if ($string =~ /$term/i) {
                  $include{$FILE} = 'yes';
                  last;
               }
               else {
                  $include{$FILE} = 'no';
               }
            }
            elsif ($FORM{'case'} eq 'Sensitive') {
               if ($string =~ /$term/) {
              $include{$FILE} = 'yes';
                  last;
               }
               else {
                  $include{$FILE} = 'no';
               }
            }
         }
      }
      if ($string =~ /<title>(.*)<\/title>/i) {
         $titles{$FILE} = "$1";
      }
      else {
         $titles{$FILE} = "$FILE";
      }
   }
}
     
sub return_html {
   print "Content-type: text/html\n\n";
   print "<html>\n <head>\n  <title>Results of Search</title>\n </head>\n";
   print "<body>\n <center>\n  <h1>Results of Search in $title</h1>\n </center>\n";
   print "Below are the results of your Search in no particular order:<p><hr size=7 width=75%><p>\n";
   print "<ul>\n";
   foreach $key (keys %include) {
      if ($include{$key} eq 'yes') {
         print "<li><a href=\"$baseurl$key\">$titles{$key}</a>\n";
      }
   }
   print "</ul>\n";
   print "<hr size=7 width=75%>\n";
   print "Search Information:<p>\n";
   print "<ul>\n";
   print "<li><b>Terms:</b> ";
   $i = 0;
   foreach $term (@terms) {
      print "$term";
      $i++;
      if (!($i == @terms)) {
         print ", ";
      }
   }
   print "\n";
   print "<li><b>Boolean Used:</b> $FORM{'boolean'}\n";
   print "<li><b>Case $FORM{'case'}</b>\n";
   print "</ul><br><hr size=7 width=75%><P>\n";
   print "<ul>\n<li><a href=\"$search_url\">Back to Search Page</a>\n";
   print "<li><a href=\"$title_url\">$title</a>\n";
   print "</ul>\n";
   print "<hr size=7 width=75%>\n";
   print "Engine<a href=\"http://localhost/\">Archive</a>\n";
   print "</body>\n</html>\n";
}
   
======================================================================================= EOF

Important part of search.cgi is the following:

$basedir = 'c:/wamp/www/hi/';  #the root directory to the files you want to search.
$baseurl = 'http://localhost/hi/';
@files = ('*.txt');
$title = "Archive";
$title_url = 'http://localhost/';
$search_url = 'search.html'; #link

--------------------------------------------------
So, at the moment, if for example, in the search.html, I put in the text to find as the word "mind", it will search all text files in the directory c:\wamp\www\hi and return the names of the text files that contain the word "mind".
Id like to be able to extend this to include all directories under c:\wamp\www.

Thanks guys
LVL 1
Simon336697Asked:
Who is Participating?
 
FishMongerCommented:
Here's a simplified example.

use File::Find;

my $ext = '.txt';

sub get_files {
   find(\&wanted, $basedir);
}

sub wanted {
   return if $File::Find::name !~ /$ext$/;
   push(@FILES, $File::Find::name);
}
0
 
FishMongerCommented:
The File::Find module is what you're needing.

http://search.cpan.org/~nwclark/perl-5.8.8/lib/File/Find.pm
0
 
Simon336697Author Commented:
Hi Fish!
Thanks for that, I hate to ask.
How would i encorporate this into my current cgi Fish.
0
Upgrade your Question Security!

Your question, your audience. Choose who sees your identity—and your question—with question security.

 
Simon336697Author Commented:
Ive got the following code:
----------------------------------------
use File::Find;
find (\&matchPattern, '/perl');
print "Search whole C: drive\n";
find (\&matchPattern, '/');

sub matchPattern
{
    if ($File::Find::name =~ m"\.*.TXT")
    {
         print "C:$File::Find::name\n";
    }
}

----------------------------------------
That when executed at a command line, does what I want, but not sure how to incorporate this into my exisitng cgi script.
0
 
Simon336697Author Commented:
Thanks Fish that was great! :>)
0
 
Simon336697Author Commented:
I got it to work with the following....

======================================================== search.cgi

#!c:/perl/bin/perl.exe
use File::Find;
##############################################################################
# Simple Search                 Version 1.0                                  #
# Copyright 1996 Matt Wright    mattw@scriptarchive.com                      #
# Created 12/16/95              Last Modified 12/16/95                       #
# Scripts Archive at:           http://www.scriptarchive.com/                #
##############################################################################
# COPYRIGHT NOTICE                                                           #
# Copyright 1996 Matthew M. Wright  All Rights Reserved.                     #
#                                                                            #
# Simple Search may be used and modified free of charge by anyone so long as #
# this copyright notice and the comments above remain intact.  By using this #
# code you agree to indemnify Matthew M. Wright from any liability that      #  
# might arise from it's use.                                                 #  
#                                                                            #
# Selling the code for this program without prior written consent is         #
# expressly forbidden.  In other words, please ask first before you try and  #
# make money off of my program.                                              #
#                                                                            #
# Obtain permission before redistributing this software over the Internet or #
# in any other medium.  In all cases copyright and header must remain intact.#
##############################################################################
# Define Variables                                               #

$basedir = 'hi/';  #the root directory to the files you want to search.
$baseurl = '';
#@files = ('*.txt','/gothere/*.txt');
@files = ('*.txt','*.html');
#@files = ('go.txt'); #works if put in mind for criteria.
#'fun/*.html'
$title = "Simon's Script Archive";
$title_url = 'http://localhost/';
$search_url = 'search.html'; #link to the form you set up to do the search.
#YOU HAVE TO EDIT THE SEARCH.HTML, THE ACTION = PART, TO THE URL OF YOUR SEARCH SCRIPT.
#Place the perl `script in your cgi-bin or change the name to search.cgi.  
#Now you simply have to edit the search.html page and you are ready to go!

# Done                                                           #
##############################################################################

# Parse Form Search Information
&parse_form;

# Get Files To Search Through
&get_files;

# Search the files
&search;

# Print Results of Search
&return_html;


sub parse_form {

   # Get the input
   read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});

   # Split the name-value pairs
   @pairs = split(/&/, $buffer);

   foreach $pair (@pairs) {
      ($name, $value) = split(/=/, $pair);

      $value =~ tr/+/ /;
      $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;

      $FORM{$name} = $value;
   }
}



my $ext = '.txt';

sub get_files {
   find(\&wanted, $basedir);
}

sub wanted {
   return if $File::Find::name !~ /$ext$/;
   push(@FILES, $File::Find::name);
}


#find (\&matchPattern, '/perl');
#print "Search whole C: drive\n";
#find (\&matchPattern, '/');
#
#sub matchPattern
#{
#    if ($File::Find::name =~ m"\.*.TXT")
#    {
#         print "C:$File::Find::name\n";
#    }
#}


#sub get_files {
#
#   chdir($basedir);
#   foreach $file (@files) {
#      $ls = `dir $file`;
#      @ls = split(/\s+/,$ls);
#      foreach $temp_file (@ls) {
#         if (-d $file) {
#            $filename = "$file$temp_file";
#            if (-T $filename) {
#               push(@FILES,$filename);
#            }
#         }
#         elsif (-T $temp_file) {
#            push(@FILES,$temp_file);
#         }
#      }
#   }
#}

sub search {

   @terms = split(/\s+/, $FORM{'terms'});

   foreach $FILE (@FILES) {

      open(FILE,"$FILE");
      @LINES = <FILE>;
      close(FILE);

      $string = join(' ',@LINES);
      $string =~ s/\n//g;
      if ($FORM{'boolean'} eq 'AND') {
         foreach $term (@terms) {
            if ($FORM{'case'} eq 'Insensitive') {
               if (!($string =~ /$term/i)) {
                  $include{$FILE} = 'no';
                last;
               }
               else {
                  $include{$FILE} = 'yes';
               }
            }
            elsif ($FORM{'case'} eq 'Sensitive') {
               if (!($string =~ /$term/)) {
                  $include{$FILE} = 'no';
                  last;
               }
               else {
                  $include{$FILE} = 'yes';
               }
            }
         }
      }
      elsif ($FORM{'boolean'} eq 'OR') {
         foreach $term (@terms) {
            if ($FORM{'case'} eq 'Insensitive') {
               if ($string =~ /$term/i) {
                  $include{$FILE} = 'yes';
                  last;
               }
               else {
                  $include{$FILE} = 'no';
               }
            }
            elsif ($FORM{'case'} eq 'Sensitive') {
               if ($string =~ /$term/) {
              $include{$FILE} = 'yes';
                  last;
               }
               else {
                  $include{$FILE} = 'no';
               }
            }
         }
      }
      if ($string =~ /<title>(.*)<\/title>/i) {
         $titles{$FILE} = "$1";
      }
      else {
         $titles{$FILE} = "$FILE";
      }
   }
}
     
sub return_html {
   print "Content-type: text/html\n\n";
   print "<html>\n <head>\n  <title>Results of Search</title>\n </head>\n";
   print "<body>\n <center>\n  <h1>Results of Search in $title</h1>\n </center>\n";
   print "Below are the results of your Search in no particular order:<p><hr size=7 width=75%><p>\n";
   print "<ul>\n";
   foreach $key (keys %include) {
      if ($include{$key} eq 'yes') {
         print "<li><a href=\"$baseurl$key\">$titles{$key}</a>\n";
      }
   }
   print "</ul>\n";
   print "<hr size=7 width=75%>\n";
   print "Search Information:<p>\n";
   print "<ul>\n";
   print "<li><b>Terms:</b> ";
   $i = 0;
   foreach $term (@terms) {
      print "$term";
      $i++;
      if (!($i == @terms)) {
         print ", ";
      }
   }
   print "\n";
   print "<li><b>Boolean Used:</b> $FORM{'boolean'}\n";
   print "<li><b>Case $FORM{'case'}</b>\n";
   print "</ul><br><hr size=7 width=75%><P>\n";
   print "<ul>\n<li><a href=\"$search_url\">Back to Search Page</a>\n";
   print "<li><a href=\"$title_url\">$title</a>\n";
   print "</ul>\n";
   print "<hr size=7 width=75%>\n";
   print "Search Script written by Matt Wright and can be found at <a href=\"http://www.scriptarchive.com/\">Matt's Script Archive</a>\n";
   print "</body>\n</html>\n";
}
   

=========================================================================
0
 
FishMongerCommented:
FYI,

Matt's scripts are VERY poorly written.  You really should switch over to the 'Simple Search' script from the nms project.  The nms scripts are well written and drop in replacements for Matt's scripts.

http://nms-cgi.sourceforge.net/scripts.shtml
0
Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.

All Courses

From novice to tech pro — start learning today.