Java Reference
In-Depth Information
public int read() throws IOException {
int result = super.read();
if (result == 0) {
HTMLTag tag = getTag();
if (tag.getName().equalsIgnoreCase("a")) {
String href = tag.getAttributeValue("href");
handleA(href);
} else if (tag.getName().equalsIgnoreCase("img")) {
String src = tag.getAttributeValue("src");
addURL(src, SpiderReportable.URLType.IMAGE);
} else if (tag.getName().equalsIgnoreCase("style")) {
String src = tag.getAttributeValue("src");
addURL(src, SpiderReportable.URLType.STYLE);
} else if (tag.getName().equalsIgnoreCase("link")) {
String href = tag.getAttributeValue("href");
addURL(href, SpiderReportable.URLType.SCRIPT);
} else if (tag.getName().equalsIgnoreCase("base")) {
String href = tag.getAttributeValue("href");
this.base = new URL(this.base, href);
}
}
return result;
}
/**
* Read all characters on the page. This will discard
* these characters, but allow the spider to examine the
* tags and find links.
*
* @throws IOException
* I/O error.
*/
public void readAll() throws IOException {
while (read() != -1) {
;
}
}
/**
* Used internally, to add a URL to the spider's workload.
*
* @param u
* The URL to add.
* @param type
* What type of link this is.
Search WWH ::




Custom Search