Java Reference
In-Depth Information
* The type of link this URL is.
* @return True if the spider should scan for links on
* this page.
*/
public boolean spiderFoundURL(URL url, URL source,
SpiderReportable.URLType type) {
if ((this.base != null)
&& (!this.base.equalsIgnoreCase(url.getHost()))) {
return false;
}
return true;
}
/**
* Called when the spider is about to process a NON-HTML
* URL.
*
* @param url
* The URL that the spider found.
* @param stream
* An InputStream to read the page contents from.
* @throws IOException
* Thrown if an IO error occurs while processing
* the page.
*/
public void spiderProcessURL(URL url, InputStream stream)
throws IOException {
byte[] buffer = new byte[1024];
int length;
String filename =
URLUtility.convertFilename(this.path, url, true);
try {
OutputStream os = new FileOutputStream(filename);
do {
length = stream.read(buffer);
if (length != -1) {
os.write(buffer, 0, length);
}
} while (length != -1);
os.close();
} catch (FileNotFoundException e) {
Search WWH ::




Custom Search