alexlindley
asked on
WWW Intelligent Search Tool for the internet, it takes the top 5 URLS from 3 search-engines and returns to the user keywords relating to the search term they entered
The question is this i have 5 classes passed below
Searcher = is a GUI class, calls up the other classes in order
Keyword = is the main class that does all the hard work it takes the urls provided by the ask, excite and lycos classes and downloads the HTML code from the webpages extracts the raw text, then removes what are called Noise words there is a file of theses but it doesn't really matter (there words like: it and the put different), and searches through each webpages results to collect the 10 most frequent words, the all the webpages results are stored in a vector and sorted to find the top 5 words overall and then display them in the gui main box.
ask, excite, lycos = are the same they search their respective search engine and remove the URLs
The progam compiles i think everything works up to the Sorting methods Sortindividual() and SortAll() in the Keyword class but i am having trouble getting the connections between the methods to work and return the final answer can anyone help i know its a very big problem
-------------------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----
import javax.swing.*; // Packages used
import java.awt.*;
import java.awt.event.*;
import java.io.*;
import java.util.*;
import java.net.*;
import java.lang.*;
public class Searcher extends JFrame implements ActionListener
{
private JLabel prompt = new JLabel("Search Request: ");
private JTextField input = new JTextField(20);
private JTextArea display = new JTextArea(10,20);
private JButton search = new JButton("Search");
private JButton refresh = new JButton("Refresh");
private JButton exit = new JButton("Exit");
/* Searcher() constructor sets the layout and adds
components to the top-level JFrame.
*/
public Searcher()
{
getContentPane().setLayout (new FlowLayout());
getContentPane().add(promp t);
getContentPane().add(input );
getContentPane().add(searc h);
getContentPane().add(refre sh);
getContentPane().add(exit) ;
getContentPane().add(displ ay);
display.setLineWrap(true);
display.setEditable(false) ;
search.addActionListener(t his);
refresh.addActionListener( this);
exit.addActionListener(thi s);
}
/* actionPerformed() handles all action events for the program.
In this case static methods of the MetricConverter class are
called to perform the conversions requested by the user. The
user's input is taken from a JTextField and the results are
appended to a JTextArea.
@param e -- the ActionEvent which prompted this method call
*/
public void actionPerformed(ActionEven t e)
{
Object source = e.getSource();
if (source == search)
{
Vector noiseword = new Vector();
Vector Word = new Vector();
Vector extract = new Vector();
Vector keywords = new Vector();
String userRequest = input.getText();
int j = 0;
Vector Site1 = new Vector();
Vector Site2 = new Vector();
Vector Site3 = new Vector();
Vector Sites = new Vector();
userRequest = userRequest.trim();//Remov e all white spaces
userRequest = userRequest.toLowerCase(); //turn all characters to lower case
userRequest = userRequest.replace(' ','+');//Replaces any spaces in between words with +
while (userRequest.indexOf('"') >= 0)
{
userRequest = userRequest.substring(0, userRequest.indexOf('"')) + "%22" + userRequest.substring(user Request.in dexOf('"') +1);
//replaces " marks with %22
}
ask newask = new ask( userRequest, Site1);
lycos newlycos = new lycos( userRequest, Site2);
excite newexcite = new excite ( userRequest, Site3);
System.out.println("Search ing ask.co.uk:");
newask.SearchRequest();
newask.getAddress();
System.out.println(Site1);
System.out.println("Search ing lycos.co.uk:");
newlycos.SearchRequest();
newask.getAddress();
System.out.println(Site2);
System.out.println("Search ing excite.co.uk");
newexcite.SearchRequest();
newexcite.getAddress();
System.out.println(Site3);
for (int as = 0; as > Site1.size(); as++)
{
Sites.addElement(Site1.ele mentAt(as) );
}
for (int ly =0; ly > Site2.size(); ly++)
{
Sites.addElement(Site2.ele mentAt(ly) );
}
for (int ex = 0; ex > Site3.size(); ex++)
{
Sites.addElement(Site3.ele mentAt(ex) );
}
Keyword newKeyword = new Keyword(Word, Sites);
newKeyword.resultDown();
newKeyword.SortAll();
newKeyword.returnKeywords( );
display.append("This is the List of words: ");
int s;
for (s = 0; s < keywords.size(); s++)
{
display.append(keywords.el ementAt(s) .toString( ));
}
}
else if (source == refresh)
{
display.setText(" ");
input.setText(" ");
}
else if (source == exit)
{
System.exit(0);
}
}
/* main() creates an instance of this (Converter) class and sets
the size and visibility of its JFrame.
An anonymous class is used to create an instance of the
WindowListener class, which handles the window close events
for the application.
*/
public static void main(String args[]) throws IOException
{
try
{
UIManager.setLookAndFeel(" javax.swin g.plaf.met al.MetalLo okAndFeel" );
} catch (Exception e) { }
Searcher f = new Searcher();
f.setSize(400, 300);
f.setVisible(true);
f.addWindowListener(new WindowAdapter()
{ // Quit the application
public void windowClosing(WindowEvent e)
{
System.exit(0);
}
});
}
}
-------------------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----
import java.io.*;
import java.util.*;
import java.lang.*;
import java.net.*;
public class Keyword
{
Vector ENDSites = new Vector();
Vector extract = new Vector();
Vector Words = new Vector();
Vector keywords = new Vector();
Vector Results = new Vector();
String [] Site = new String [20];
String Output;
int resultDown = 0;
public Keyword(Vector Word, Vector Sites)
{
ENDSites = Sites;
Words = Word;
}
public void resultDown()
{
URL u;
InputStream is = null;
DataInputStream dis;
String downHtml;
String [] URL = new String [20];
while (resultDown < ENDSites.size())
{
try
{
u = new URL(URL[resultDown]);
BufferedReader br = new BufferedReader(new InputStreamReader (u.openStream()));
while((downHtml = br.readLine()) !=null)
{
Output = Output + downHtml;
}
}
catch (MalformedURLException mue)
{
System.out.println("Ouch - a MalformedURLException happened.");
mue.printStackTrace();
}
catch (IOException ioe)
{
System.out.println("Oops- an IOException happened.");
ioe.printStackTrace();
}
RemoveHTML();
vecToken();
RemoveNoise();
returnKeywords();
SortIndividual();
resultDown++;
}
}
public void RemoveHTML()
{
System.out.println("We get Here");
StringBuffer returnMessage = new StringBuffer(Output);
int startPosition = Output.indexOf("<"); // encountered the first opening brace
int endPosition = Output.indexOf(">"); // encountered the first closing braces
while( startPosition != -1 )
{
returnMessage.delete( startPosition, endPosition+1 ); // remove the tag
startPosition = (returnMessage.toString()) .indexOf(" <"); // look for the next opening brace
endPosition = (returnMessage.toString()) .indexOf(" >"); // look for the next closing brace
}
}
public void vecToken()
{
try
{
int s = 0;
String delimiters = ";(),. ";
StringTokenizer st = new StringTokenizer(Output, delimiters, true);
while (st.hasMoreTokens()) // make sure there is elements to get
{
extract.addElement(st.next Token());
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
public void RemoveNoise()
{
Vector noise = new Vector();
File f = new File("noise.out");
String data;
try
{
FileInputStream fstream = new FileInputStream(f);
DataInputStream in = new DataInputStream(fstream);
while(in.available() !=0)
{
noise.addElement(in.readLi ne());
}
in.close();
}
catch (IOException e)
{
System.out.println("Check for file 'noise.out'is in the right directory");
}
extract.removeAll(noise);
extract.removeAll(Words);
}
public void SortIndividual()
{
Vector wordList = new Vector();
Vector wordFreq = new Vector();
int[] topTen = new int[10];
Integer counter;
int count = 0;
for (int j = 0; j < extract.size(); j++)
{
count = 0 ;
if (extract.elementAt(j)!="#" )
{
wordList.addElement(extrac t.elementA t(j));
for(int k = j+1; k < extract.size(); k++)
{
if (extract.elementAt(j).equa ls(extract .elementAt (k)))
{
count++;
extract.setElementAt("#",k );
}
}
counter = new Integer(count);
wordFreq.addElement(counte r);
}
}
topTen[0]=1;
for (int l = 0; l < wordFreq.size(); l++)
{
counter = (Integer) wordFreq.elementAt(l);
for (int m = 0; m < 10; m++)
{
if (counter.intValue() > topTen[m])
{
topTen[m] = counter.intValue();
Results.addElement(wordLis t.elementA t(l));
}
}
}
}
public void SortAll()
{
Vector wordList = new Vector();
Vector wordFreq = new Vector();
int[] topFive = new int[5];
Integer counter;
int count = 0;
for (int j = 0; j < Words.size(); j++)
{
count = 0 ;
if (Results.elementAt(j)!="#" )
{
wordList.addElement(Result s.elementA t(j));
for(int k = j+1; k < extract.size(); k++)
{
if (Results.elementAt(j).equa ls(Results .elementAt (k)))
{
count++;
Results.setElementAt("#",k );
}
}
counter = new Integer(count);
wordFreq.addElement(counte r);
}
}
topFive[0]=1;
for (int l = 0; l < wordFreq.size(); l++)
{
counter = (Integer) wordFreq.elementAt(l);
for (int m = 0; m < 10; m++)
{
if (counter.intValue() > topFive[m])
{
topFive[m] = counter.intValue();
keywords.addElement(wordLi st.element At(l));
}
}
}
}
public Vector returnKeywords()
{
int s;
for (s = 0; s < keywords.size(); s++)
{
System.out.println(keyword s.elementA t(s).toStr ing());
}
return(keywords);
}
}//end of class Definition
-------------------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----
import java.io.*;
import java.net.*;
import java.util.*;
public class ask
{
String userRequest, userSearch;
String extract;
Vector S1 = new Vector();
public ask(String userRequest, Vector Site1 )
{
userSearch = userRequest;
S1 = Site1;
}
public void SearchRequest()
{
try
{
URL u = new URL("http://web.ask.com/web?q="+ userSearch + "&ste=0&qsrc=0&o=0");
BufferedReader br = new BufferedReader(new InputStreamReader (u.openStream()));
String theHTML;
while((theHTML = br.readLine()) !=null)
{
extract = extract + theHTML;
}
}
catch (MalformedURLException e)
{
System.err.println(" is not a valid URL");
}
catch (NoRouteToHostException e)
{
System.out.println("No result returned");
SearchRequest();
}
catch (SocketException e)
{
System.out.println("No results from searchengine");
SearchRequest();
}
catch (IOException e)
{
System.err.println(e);
}
}
public void getAddress()
{
String Address;
int Addressindex = 0, startindex, endindex;
while ((Addressindex < 5) && ((extract.indexOf("onmouse over=\"ret urn ss('go to ") >= 0)
&& (extract.indexOf("')\" onmouseout=\"cs()\">") >=0)))
{
startindex = (extract.indexOf("onmouseo ver=\"retu rn ss('go to ")+ 29);
endindex = extract.indexOf("')\" onmouseout=\"cs()\">", startindex);
Address = extract.substring(startind ex, endindex);
extract = extract.substring(endindex );
Address =(Address.trim());
Address =("http://" + Address + "/");
S1.addElement(Address);
System.out.println(Address );
Addressindex++;
}
}
} // end of class definition
-------------------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----
import java.io.*;
import java.net.*;
import java.util.*;
public class excite
{
String userRequest, userSearch;
String extract;
Vector S3 = new Vector();
public excite(String userRequest, Vector Site3 )
{
userSearch = userRequest;
S3 = Site3;
}
public void SearchRequest()
{
try
{
URL u = new URL("http://www.excite.co.uk/search/web/results?q="+ userSearch + "&c=web");
BufferedReader br = new BufferedReader(new InputStreamReader (u.openStream()));
String theHTML;
while((theHTML = br.readLine()) !=null)
{
extract = extract + theHTML;
}
}
catch (MalformedURLException e)
{
System.err.println(" is not a valid URL");
}
catch (NoRouteToHostException e)
{
System.out.println("No result returned");
SearchRequest();
}
catch (SocketException e)
{
System.out.println("No results from searchengine");
SearchRequest();
}
catch (IOException e)
{
System.err.println(e);
}
}
public void getAddress()
{
String Address;
int Addressindex = 0, startindex, endindex;
while ((Addressindex < 5) && ((extract.indexOf("class=' c_grey'>") >= 0)
&& (extract.indexOf("</a></di v>") >=0)))
{
startindex = (extract.indexOf("class='c _grey'>")+ 15);
endindex = extract.indexOf("</a></div >", startindex);
Address = extract.substring(startind ex, endindex);
extract = extract.substring(endindex );
Address = (Address.trim());
Address =("http://" + Address + "/");
S3.addElement(Address);
System.out.println(Address );
Addressindex++;
}
}
} // end of class definition
-------------------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----
import java.io.*;
import java.net.*;
import java.util.*;
public class lycos
{
String userRequest, userSearch;
String extract;
Vector S2 = new Vector();
public lycos(String userRequest, Vector Site2 )
{
userSearch = userRequest;
S2 = Site2;
}
public void SearchRequest()
{
try
{
URL u = new URL("http://search.lycos.co.uk/cgi-bin/pursuit?SITE=uk&query="+ userSearch + "&x=0&y=0&cat=loc&matchmod e=and&SITE =uk&idx=al l&enc=utf- 8");
BufferedReader br = new BufferedReader(new InputStreamReader (u.openStream()));
String theHTML;
while((theHTML = br.readLine()) !=null)
{
extract = extract + theHTML;
}
}
catch (MalformedURLException e)
{
System.err.println(" is not a valid URL");
}
catch (NoRouteToHostException e)
{
System.out.println("No result returned");
SearchRequest();
}
catch (SocketException e)
{
System.out.println("No results from searchengine");
SearchRequest();
}
catch (IOException e)
{
System.err.println(e);
}
}
public void getAddress()
{
String Address;
int Addressindex = 0, startindex, endindex;
while ((Addressindex < 5) && ((extract.indexOf("<span class=\"siteurl\">") >= 0)
&& (extract.indexOf("</span>" ) >=0)))
{
startindex = (extract.indexOf("<span class=\"siteurl\">"));
endindex = extract.indexOf("</span>", startindex);
Address = extract.substring(startind ex, endindex);
extract = extract.substring(endindex );
Address = (Address.trim());
Address =("http://" + Address + "/");
S2.addElement(Address);
System.out.println(Address );
Addressindex++;
}
}
} // end of class definition
Searcher = is a GUI class, calls up the other classes in order
Keyword = is the main class that does all the hard work it takes the urls provided by the ask, excite and lycos classes and downloads the HTML code from the webpages extracts the raw text, then removes what are called Noise words there is a file of theses but it doesn't really matter (there words like: it and the put different), and searches through each webpages results to collect the 10 most frequent words, the all the webpages results are stored in a vector and sorted to find the top 5 words overall and then display them in the gui main box.
ask, excite, lycos = are the same they search their respective search engine and remove the URLs
The progam compiles i think everything works up to the Sorting methods Sortindividual() and SortAll() in the Keyword class but i am having trouble getting the connections between the methods to work and return the final answer can anyone help i know its a very big problem
--------------------------
import javax.swing.*; // Packages used
import java.awt.*;
import java.awt.event.*;
import java.io.*;
import java.util.*;
import java.net.*;
import java.lang.*;
public class Searcher extends JFrame implements ActionListener
{
private JLabel prompt = new JLabel("Search Request: ");
private JTextField input = new JTextField(20);
private JTextArea display = new JTextArea(10,20);
private JButton search = new JButton("Search");
private JButton refresh = new JButton("Refresh");
private JButton exit = new JButton("Exit");
/* Searcher() constructor sets the layout and adds
components to the top-level JFrame.
*/
public Searcher()
{
getContentPane().setLayout
getContentPane().add(promp
getContentPane().add(input
getContentPane().add(searc
getContentPane().add(refre
getContentPane().add(exit)
getContentPane().add(displ
display.setLineWrap(true);
display.setEditable(false)
search.addActionListener(t
refresh.addActionListener(
exit.addActionListener(thi
}
/* actionPerformed() handles all action events for the program.
In this case static methods of the MetricConverter class are
called to perform the conversions requested by the user. The
user's input is taken from a JTextField and the results are
appended to a JTextArea.
@param e -- the ActionEvent which prompted this method call
*/
public void actionPerformed(ActionEven
{
Object source = e.getSource();
if (source == search)
{
Vector noiseword = new Vector();
Vector Word = new Vector();
Vector extract = new Vector();
Vector keywords = new Vector();
String userRequest = input.getText();
int j = 0;
Vector Site1 = new Vector();
Vector Site2 = new Vector();
Vector Site3 = new Vector();
Vector Sites = new Vector();
userRequest = userRequest.trim();//Remov
userRequest = userRequest.toLowerCase();
userRequest = userRequest.replace(' ','+');//Replaces any spaces in between words with +
while (userRequest.indexOf('"') >= 0)
{
userRequest = userRequest.substring(0, userRequest.indexOf('"')) + "%22" + userRequest.substring(user
//replaces " marks with %22
}
ask newask = new ask( userRequest, Site1);
lycos newlycos = new lycos( userRequest, Site2);
excite newexcite = new excite ( userRequest, Site3);
System.out.println("Search
newask.SearchRequest();
newask.getAddress();
System.out.println(Site1);
System.out.println("Search
newlycos.SearchRequest();
newask.getAddress();
System.out.println(Site2);
System.out.println("Search
newexcite.SearchRequest();
newexcite.getAddress();
System.out.println(Site3);
for (int as = 0; as > Site1.size(); as++)
{
Sites.addElement(Site1.ele
}
for (int ly =0; ly > Site2.size(); ly++)
{
Sites.addElement(Site2.ele
}
for (int ex = 0; ex > Site3.size(); ex++)
{
Sites.addElement(Site3.ele
}
Keyword newKeyword = new Keyword(Word, Sites);
newKeyword.resultDown();
newKeyword.SortAll();
newKeyword.returnKeywords(
display.append("This is the List of words: ");
int s;
for (s = 0; s < keywords.size(); s++)
{
display.append(keywords.el
}
}
else if (source == refresh)
{
display.setText(" ");
input.setText(" ");
}
else if (source == exit)
{
System.exit(0);
}
}
/* main() creates an instance of this (Converter) class and sets
the size and visibility of its JFrame.
An anonymous class is used to create an instance of the
WindowListener class, which handles the window close events
for the application.
*/
public static void main(String args[]) throws IOException
{
try
{
UIManager.setLookAndFeel("
} catch (Exception e) { }
Searcher f = new Searcher();
f.setSize(400, 300);
f.setVisible(true);
f.addWindowListener(new WindowAdapter()
{ // Quit the application
public void windowClosing(WindowEvent e)
{
System.exit(0);
}
});
}
}
--------------------------
import java.io.*;
import java.util.*;
import java.lang.*;
import java.net.*;
public class Keyword
{
Vector ENDSites = new Vector();
Vector extract = new Vector();
Vector Words = new Vector();
Vector keywords = new Vector();
Vector Results = new Vector();
String [] Site = new String [20];
String Output;
int resultDown = 0;
public Keyword(Vector Word, Vector Sites)
{
ENDSites = Sites;
Words = Word;
}
public void resultDown()
{
URL u;
InputStream is = null;
DataInputStream dis;
String downHtml;
String [] URL = new String [20];
while (resultDown < ENDSites.size())
{
try
{
u = new URL(URL[resultDown]);
BufferedReader br = new BufferedReader(new InputStreamReader (u.openStream()));
while((downHtml = br.readLine()) !=null)
{
Output = Output + downHtml;
}
}
catch (MalformedURLException mue)
{
System.out.println("Ouch - a MalformedURLException happened.");
mue.printStackTrace();
}
catch (IOException ioe)
{
System.out.println("Oops- an IOException happened.");
ioe.printStackTrace();
}
RemoveHTML();
vecToken();
RemoveNoise();
returnKeywords();
SortIndividual();
resultDown++;
}
}
public void RemoveHTML()
{
System.out.println("We get Here");
StringBuffer returnMessage = new StringBuffer(Output);
int startPosition = Output.indexOf("<"); // encountered the first opening brace
int endPosition = Output.indexOf(">"); // encountered the first closing braces
while( startPosition != -1 )
{
returnMessage.delete( startPosition, endPosition+1 ); // remove the tag
startPosition = (returnMessage.toString())
endPosition = (returnMessage.toString())
}
}
public void vecToken()
{
try
{
int s = 0;
String delimiters = ";(),. ";
StringTokenizer st = new StringTokenizer(Output, delimiters, true);
while (st.hasMoreTokens()) // make sure there is elements to get
{
extract.addElement(st.next
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
public void RemoveNoise()
{
Vector noise = new Vector();
File f = new File("noise.out");
String data;
try
{
FileInputStream fstream = new FileInputStream(f);
DataInputStream in = new DataInputStream(fstream);
while(in.available() !=0)
{
noise.addElement(in.readLi
}
in.close();
}
catch (IOException e)
{
System.out.println("Check for file 'noise.out'is in the right directory");
}
extract.removeAll(noise);
extract.removeAll(Words);
}
public void SortIndividual()
{
Vector wordList = new Vector();
Vector wordFreq = new Vector();
int[] topTen = new int[10];
Integer counter;
int count = 0;
for (int j = 0; j < extract.size(); j++)
{
count = 0 ;
if (extract.elementAt(j)!="#"
{
wordList.addElement(extrac
for(int k = j+1; k < extract.size(); k++)
{
if (extract.elementAt(j).equa
{
count++;
extract.setElementAt("#",k
}
}
counter = new Integer(count);
wordFreq.addElement(counte
}
}
topTen[0]=1;
for (int l = 0; l < wordFreq.size(); l++)
{
counter = (Integer) wordFreq.elementAt(l);
for (int m = 0; m < 10; m++)
{
if (counter.intValue() > topTen[m])
{
topTen[m] = counter.intValue();
Results.addElement(wordLis
}
}
}
}
public void SortAll()
{
Vector wordList = new Vector();
Vector wordFreq = new Vector();
int[] topFive = new int[5];
Integer counter;
int count = 0;
for (int j = 0; j < Words.size(); j++)
{
count = 0 ;
if (Results.elementAt(j)!="#"
{
wordList.addElement(Result
for(int k = j+1; k < extract.size(); k++)
{
if (Results.elementAt(j).equa
{
count++;
Results.setElementAt("#",k
}
}
counter = new Integer(count);
wordFreq.addElement(counte
}
}
topFive[0]=1;
for (int l = 0; l < wordFreq.size(); l++)
{
counter = (Integer) wordFreq.elementAt(l);
for (int m = 0; m < 10; m++)
{
if (counter.intValue() > topFive[m])
{
topFive[m] = counter.intValue();
keywords.addElement(wordLi
}
}
}
}
public Vector returnKeywords()
{
int s;
for (s = 0; s < keywords.size(); s++)
{
System.out.println(keyword
}
return(keywords);
}
}//end of class Definition
--------------------------
import java.io.*;
import java.net.*;
import java.util.*;
public class ask
{
String userRequest, userSearch;
String extract;
Vector S1 = new Vector();
public ask(String userRequest, Vector Site1 )
{
userSearch = userRequest;
S1 = Site1;
}
public void SearchRequest()
{
try
{
URL u = new URL("http://web.ask.com/web?q="+ userSearch + "&ste=0&qsrc=0&o=0");
BufferedReader br = new BufferedReader(new InputStreamReader (u.openStream()));
String theHTML;
while((theHTML = br.readLine()) !=null)
{
extract = extract + theHTML;
}
}
catch (MalformedURLException e)
{
System.err.println(" is not a valid URL");
}
catch (NoRouteToHostException e)
{
System.out.println("No result returned");
SearchRequest();
}
catch (SocketException e)
{
System.out.println("No results from searchengine");
SearchRequest();
}
catch (IOException e)
{
System.err.println(e);
}
}
public void getAddress()
{
String Address;
int Addressindex = 0, startindex, endindex;
while ((Addressindex < 5) && ((extract.indexOf("onmouse
&& (extract.indexOf("')\" onmouseout=\"cs()\">") >=0)))
{
startindex = (extract.indexOf("onmouseo
endindex = extract.indexOf("')\" onmouseout=\"cs()\">", startindex);
Address = extract.substring(startind
extract = extract.substring(endindex
Address =(Address.trim());
Address =("http://" + Address + "/");
S1.addElement(Address);
System.out.println(Address
Addressindex++;
}
}
} // end of class definition
--------------------------
import java.io.*;
import java.net.*;
import java.util.*;
public class excite
{
String userRequest, userSearch;
String extract;
Vector S3 = new Vector();
public excite(String userRequest, Vector Site3 )
{
userSearch = userRequest;
S3 = Site3;
}
public void SearchRequest()
{
try
{
URL u = new URL("http://www.excite.co.uk/search/web/results?q="+ userSearch + "&c=web");
BufferedReader br = new BufferedReader(new InputStreamReader (u.openStream()));
String theHTML;
while((theHTML = br.readLine()) !=null)
{
extract = extract + theHTML;
}
}
catch (MalformedURLException e)
{
System.err.println(" is not a valid URL");
}
catch (NoRouteToHostException e)
{
System.out.println("No result returned");
SearchRequest();
}
catch (SocketException e)
{
System.out.println("No results from searchengine");
SearchRequest();
}
catch (IOException e)
{
System.err.println(e);
}
}
public void getAddress()
{
String Address;
int Addressindex = 0, startindex, endindex;
while ((Addressindex < 5) && ((extract.indexOf("class='
&& (extract.indexOf("</a></di
{
startindex = (extract.indexOf("class='c
endindex = extract.indexOf("</a></div
Address = extract.substring(startind
extract = extract.substring(endindex
Address = (Address.trim());
Address =("http://" + Address + "/");
S3.addElement(Address);
System.out.println(Address
Addressindex++;
}
}
} // end of class definition
--------------------------
import java.io.*;
import java.net.*;
import java.util.*;
public class lycos
{
String userRequest, userSearch;
String extract;
Vector S2 = new Vector();
public lycos(String userRequest, Vector Site2 )
{
userSearch = userRequest;
S2 = Site2;
}
public void SearchRequest()
{
try
{
URL u = new URL("http://search.lycos.co.uk/cgi-bin/pursuit?SITE=uk&query="+ userSearch + "&x=0&y=0&cat=loc&matchmod
BufferedReader br = new BufferedReader(new InputStreamReader (u.openStream()));
String theHTML;
while((theHTML = br.readLine()) !=null)
{
extract = extract + theHTML;
}
}
catch (MalformedURLException e)
{
System.err.println(" is not a valid URL");
}
catch (NoRouteToHostException e)
{
System.out.println("No result returned");
SearchRequest();
}
catch (SocketException e)
{
System.out.println("No results from searchengine");
SearchRequest();
}
catch (IOException e)
{
System.err.println(e);
}
}
public void getAddress()
{
String Address;
int Addressindex = 0, startindex, endindex;
while ((Addressindex < 5) && ((extract.indexOf("<span class=\"siteurl\">") >= 0)
&& (extract.indexOf("</span>"
{
startindex = (extract.indexOf("<span class=\"siteurl\">"));
endindex = extract.indexOf("</span>",
Address = extract.substring(startind
extract = extract.substring(endindex
Address = (Address.trim());
Address =("http://" + Address + "/");
S2.addElement(Address);
System.out.println(Address
Addressindex++;
}
}
} // end of class definition
ASKER
Hi RomanRega
I have followed all the sujestion above, which have solved thoughs problems,
i am now having aproblem when i put in a search term e.g. Thomas,
that i get a
java.lang.StringIndexOutOf BoundsExce ption,
know i have seen this error before but
when i looked on the java.sun website i couldn't find any advice on how to get around it?
One idea i had was to limit the size of the string allowed to be taken in, but how would i do that?
and would that work?
any help would be appreciated thanks
I have followed all the sujestion above, which have solved thoughs problems,
i am now having aproblem when i put in a search term e.g. Thomas,
that i get a
java.lang.StringIndexOutOf
know i have seen this error before but
when i looked on the java.sun website i couldn't find any advice on how to get around it?
One idea i had was to limit the size of the string allowed to be taken in, but how would i do that?
and would that work?
any help would be appreciated thanks
That error means that your are trying to access to a character that does not exist in a String.
Could be either beacouse the index youre giving is too big or too small.
I cant see your code, but i have a basic idea where this error could be triggered:
...
int startPosition = Output.indexOf("<"); // encountered the first opening brace
int endPosition;
if (startPosition>=0) endPosition = Output.indexOf(">", startPosition); // encountered the first closing braces
while( startPosition != -1 )
{
returnMessage.delete( startPosition, endPosition+1 ); // remove the tag
startPosition = (returnMessage.toString()) .indexOf(" <"); // look for the next opening brace
if (startPosition>=0) endPosition = Output.indexOf(">", startPosition); // encountered the first closing braces
}
Output=returnMessage.toStr ing();
...
In fact if not open '>' is found startPosition will be -1;
You cant use -1 as a valid index, the String starts from 0.
Anyway alway be sure to pass proper index to calls like "delete( startPosition, endPosition+1 );", "Output.indexOf(">", startPosition);" or similar
Could be either beacouse the index youre giving is too big or too small.
I cant see your code, but i have a basic idea where this error could be triggered:
...
int startPosition = Output.indexOf("<"); // encountered the first opening brace
int endPosition;
if (startPosition>=0) endPosition = Output.indexOf(">", startPosition); // encountered the first closing braces
while( startPosition != -1 )
{
returnMessage.delete( startPosition, endPosition+1 ); // remove the tag
startPosition = (returnMessage.toString())
if (startPosition>=0) endPosition = Output.indexOf(">", startPosition); // encountered the first closing braces
}
Output=returnMessage.toStr
...
In fact if not open '>' is found startPosition will be -1;
You cant use -1 as a valid index, the String starts from 0.
Anyway alway be sure to pass proper index to calls like "delete( startPosition, endPosition+1 );", "Output.indexOf(">", startPosition);" or similar
ASKER
Hi every one
I have solved some of the problems with my program with the help of RomanRega
but i am still having problems with some of my methods
they are the RemoveHTML() method which at the moment doen't remove the html
here is the first version of the method
StringBuffer returnMessage = new StringBuffer(Output);
int startPosition = Output.indexOf("<"); // encountered the first opening brace
int endPosition = Output.indexOf(">");
if (startPosition >= 0) endPosition = Output.indexOf(">", startPosition); // encountered the first closing braces
while( startPosition != -1 )
{
returnMessage.delete( startPosition, endPosition +1 ); // remove the tag
startPosition = (returnMessage.toString()) .indexOf(" <"); // look for the next opening brace
//endPosition = (returnMessage.toString()) .indexOf(" >", startPosition);
if (startPosition >= 0) endPosition = Output.indexOf(">", startPosition); // encountered the first closing braces
}
Output=returnMessage.toStr ing();
System.out.println(Output) ;
but this only removes some of the < or > charcters i need it to remove all the in between stuff
i have another version i have been working on but this seems to do the same
char currentChar;
int startI = 0;
int endI;
int tokNo;
StringTokenizer temp;
boolean flag = false;
for (endI = 0; endI < Output.length(); endI++)
{
currentChar = Output.charAt(endI);
if (currentChar == '<')
{
flag = true;
temp = new StringTokenizer(Output.sub string(sta rtI, endI));
tokNo = temp.countTokens();
for(int words = 0; words < tokNo; words++)
{
extract.addElement(temp.ne xtToken(" \n\t"));
}
}
while(flag && endI < Output.length())
{
currentChar = Output.charAt(endI);
if (currentChar == '>')
{
startI = endI + 1;
flag = false;
}
else
{
endI++;
}
}
}
Can anyone help i have to get the program finished by the end of the week!
I have solved some of the problems with my program with the help of RomanRega
but i am still having problems with some of my methods
they are the RemoveHTML() method which at the moment doen't remove the html
here is the first version of the method
StringBuffer returnMessage = new StringBuffer(Output);
int startPosition = Output.indexOf("<"); // encountered the first opening brace
int endPosition = Output.indexOf(">");
if (startPosition >= 0) endPosition = Output.indexOf(">", startPosition); // encountered the first closing braces
while( startPosition != -1 )
{
returnMessage.delete( startPosition, endPosition +1 ); // remove the tag
startPosition = (returnMessage.toString())
//endPosition = (returnMessage.toString())
if (startPosition >= 0) endPosition = Output.indexOf(">", startPosition); // encountered the first closing braces
}
Output=returnMessage.toStr
System.out.println(Output)
but this only removes some of the < or > charcters i need it to remove all the in between stuff
i have another version i have been working on but this seems to do the same
char currentChar;
int startI = 0;
int endI;
int tokNo;
StringTokenizer temp;
boolean flag = false;
for (endI = 0; endI < Output.length(); endI++)
{
currentChar = Output.charAt(endI);
if (currentChar == '<')
{
flag = true;
temp = new StringTokenizer(Output.sub
tokNo = temp.countTokens();
for(int words = 0; words < tokNo; words++)
{
extract.addElement(temp.ne
}
}
while(flag && endI < Output.length())
{
currentChar = Output.charAt(endI);
if (currentChar == '>')
{
startI = endI + 1;
flag = false;
}
else
{
endI++;
}
}
}
Can anyone help i have to get the program finished by the end of the week!
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
But seems that you got tired and deconcetrated along the way.
There are bugs cluttering the whole file.
First of all:
for (int as = 0; as > Site1.size(); as++)
{
Sites.addElement(Site1.ele
}
is not gonna work. Should be "as < Site1.size()"
In Keyword you should initialize Output to an empty string
Output="";
or you get a NullPointer Exception inside resultDown() when you do "Output = Output + downHtml;"
Always in resultDown(), you need to change in something like this.
int k=0;
while (k < ENDSites.size())
{
try
{
u = new URL(ENDSites.get(k).toStri
...
}catch ....
}finally{
k++;
}
Or at least fill the URL array with the URLs ( and, better, change that name. it's confusing)
RemoveHtml() does a hell of job for nothing.
After removing the unneeded you should assign the result back to Output.
Just add
"Output=returnMessage.toSt
I suggest that you do the search oof the ">" like this
"endPosition = (returnMessage.toString())
so you start to esarch from the startPosition and not again from the beginning.
I notice that you always enclose the url between a "http://" and a "/". It's not always necessary. Check like this:
if (!Address.startsWith("http://")) Address =("http://" + Address );
if (!Address.endsWith("/")) Address = Address + "/";
And one other thing. In Searcher.actionPerformed()
keywords= newKeyword.returnKeywords(
And I stop here because I think there's still a hell of debugging and probably youre were just stucked and willing to finish alone your work.
So I Hope i helped you to get unstucked.
Good work