Java Reference
In-Depth Information
try {
logger.fine("Processing: " + this.url);
// get the URL's contents
connection = this.url.openConnection();
connection.setConnectTimeout(
this.spider.getOptions().timeout);
connection.setReadTimeout(this.spider.getOptions().timeout);
if (this.spider.getOptions().userAgent != null) {
connection.setRequestProperty("User-Agent",
this.spider.getOptions().userAgent);
}
// read the URL
is = connection.getInputStream();
// parse the URL
if (connection.getContentType().equalsIgnoreCase(
"text/html")) {
SpiderParseHTML parse = new SpiderParseHTML(
connection.getURL(),
new SpiderInputStream(is, null), this.spider);
this.spider.getReport().spiderProcessURL(this.url, parse);
} else {
this.spider.getReport().spiderProcessURL(this.url, is);
}
} catch (IOException e) {
logger.log(Level.INFO,
"I/O error on URL:" + this.url.toString());
try {
this.spider.getWorkloadManager().markError(this.url);
} catch (WorkloadException e1) {
logger.log(Level.WARNING,
"Error marking workload(1).", e);
}
this.spider.getReport().spiderURLError(this.url);
return;
} catch (Throwable e) {
try {
this.spider.getWorkloadManager().markError(this.url);
} catch (WorkloadException e1) {
logger.log(Level.WARNING,
"Error marking workload(2).", e);
}
Search WWH ::




Custom Search