Java Reference
In-Depth Information
* @param spider
* The spider that will be working with this
* object.
*/
public void init(Spider spider) {
}
/**
* Called when the spider encounters a URL. This function
* will always return true. Because this spider will
* theoretically visit every URL on the Internet, all
* URL's will be processed.
*
* @param url
* The URL that the spider found.
* @param source
* The page that the URL was found on.
* @param type
* The type of link this URL is.
* @return True if the spider should scan for links on
* this page.
*/
public boolean spiderFoundURL(URL url, URL source,
SpiderReportable.URLType type) {
return true;
}
/**
* Called when the spider is about to process a NON-HTML
* URL.
*
* @param url
* The URL that the spider found.
* @param stream
* An InputStream to read the page contents from.
* @throws IOException
* Thrown if an IO error occurs while processing
* the page.
*/
public void spiderProcessURL(URL url, InputStream stream)
throws IOException {
byte[] buffer = new byte[1024];
int length;
String filename = URLUtility.convertFilename(this.path, url,
true);
Search WWH ::




Custom Search