import org.apache.http.norobots.NoRobotClient;
NoRobotClient nrc = NoRobotClient("googlebot");
nrc.parse( new URL( "http://www.apache.org/" ) );
boolean test = nrc.isUrlAllowed( new URL( "http://www.apache.org/index.html" ) );