Accessing Web Sources - Beginning Java Programming: The Object-Oriented Approach

Java Reference

In-Depth Information

This is all you need to do to set up. Now take a look at how to extract data from a web page.

Screen Scraping Without Cookies

Create a class called WikipediaGetter with the following content:

import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

import org.jsoup.Jsoup;

import org.jsoup.nodes.Document;

import org.jsoup.nodes.Element;

import org.jsoup.select.Elements;

public class WikipediaGetter {

public static void main(String[] args) throws IOException {

List<String[]> coffee = new ArrayList<String[]>();

Document doc = Jsoup.connect(

Elements wikiTables = doc.select("table.wikitable");

System.out.println(wikiTables.size() + " wikitables found");

for (Element table : wikiTables) {

if (table.html().contains("<th>Arabica</th>")) {

// We've found our table!

Elements rows = table.select("tr");

for (Element row : rows) {

Elements cells = row.select("td");

if (cells.size() == 0)

continue;

String[] line = new String[cells.size()];

for (int i = 0; i < line.length; i++) {

line[i] = cells.get(i).text();

}

coffee.add(line);

}

break;

}

for (String[] variety : coffee) {

System.out.println("----- " + variety[0] + " -----");

System.out.println("Arabica: " + variety[1]);

System.out.println("Region(s): " + variety[2]);

System.out.println("Comments: " + variety[3]);

}

Search WWH ::

Custom Search

Home