Java Reference
In-Depth Information
/**
* Construct a SpiderParseHTML object. This object allows
* you to parse HTML, while the spider collects link
* information in the background.
*
* @param base
* The URL that is being parsed, this is used for
* relative links.
* @param is
* The InputStream being parsed.
* @param spider
* The Spider that is parsing.
* @throws WorkloadException
* An error occurred with the workload
* management.
*/
public SpiderParseHTML(URL base, SpiderInputStream is,
Spider spider)
throws WorkloadException {
super(is);
this.stream = is;
this.spider = spider;
this.base = base;
this.depth = spider.getWorkloadManager().getDepth(base);
}
/**
* Get the InputStream being parsed.
*
* @return The InputStream being parsed.
*/
public SpiderInputStream getStream() {
return this.stream;
}
/**
* Read a single character. This function will process any
* tags that the spider needs for navigation, then pass
* the character on to the caller. This allows the spider
* to transparently gather its links.
*
* @return The character read.
* @throws IOException
* I/O error.
*/
@Override
Search WWH ::




Custom Search