Java Reference
In-Depth Information
\^([\d.]+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(.+?)" (\d{3}) (\d+) "([\^"]+)"
"([\^"]+)"
You may find it informative to refer back to Table 4-1 and review the full syntax used here.
Note in particular the use of the nongreedy quantifier +? in \"(.+?)\ " to match a quoted
string; you can't just use .+ because that would match too much (up to the quote at the end
of the line). Code to extract the various fields such as IP address, request, referrer URL, and
browser version is shown in Example 4-9 .
Example 4-9. LogRegExp.java
public
public class
class LogRegExp
LogRegExp {
public
public static
static void
void main ( String argv []) {
String logEntryPattern =
"^([\\d.]+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+-]\\d{4})\\] " +
"\"(.+?)\" (\\d{3}) (\\d+) \"([^\"]+)\" \"([^\"]+)\"" ;
System . out . println ( "RE Pattern:" );
System . out . println ( logEntryPattern );
System . out . println ( "Input line is:" );
String logEntryLine = LogExample . logEntryLine ;
System . out . println ( logEntryLine );
Pattern p = Pattern . compile ( logEntryPattern );
Matcher matcher = p . matcher ( logEntryLine );
iif (! matcher . matches () ||
LogExample . NUM_FIELDS != matcher . groupCount ()) {
System . err . println ( "Bad log entry (or problem with regex):" );
System . err . println ( logEntryLine );
return
return ;
}
System . out . println ( "IP Address: " + matcher . group ( 1 ));
System . out . println ( "UserName: " + matcher . group ( 3 ));
System . out . println ( "Date/Time: " + matcher . group ( 4 ));
System . out . println ( "Request: " + matcher . group ( 5 ));
System . out . println ( "Response: " + matcher . group ( 6 ));
System . out . println ( "Bytes Sent: " + matcher . group ( 7 ));
iif (! matcher . group ( 8 ). equals ( "-" ))
System . out . println ( "Referer: " + matcher . group ( 8 ));
System . out . println ( "User-Agent: " + matcher . group ( 9 ));
}
}
Search WWH ::




Custom Search