Documents.java
package com.ljs.ootp.extract.html;
import com.github.rholder.retry.RetryException;
import com.github.rholder.retry.Retryer;
import com.github.rholder.retry.RetryerBuilder;
import com.github.rholder.retry.StopStrategies;
import com.github.rholder.retry.WaitStrategies;
import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
/**
*
* @author lstephen
*/
public final class Documents {
private static final Integer NUMBER_OF_RETRIES = 3;
private static final Logger LOGGER =
Logger.getLogger(Documents.class.getName());
private Documents() { }
public static Document load(final String url) {
LOGGER.log(Level.INFO, "Loading page {0}...", url);
Retryer<Document> retryer = RetryerBuilder
.<Document>newBuilder()
.retryIfException()
.withStopStrategy(StopStrategies.stopAfterAttempt(NUMBER_OF_RETRIES))
.withWaitStrategy(WaitStrategies.exponentialWait())
.build();
try {
return retryer.call(new Callable<Document>() {
@Override
public Document call() throws IOException {
try (
InputStream in = new URL(url).openStream()) {
return Jsoup.parse(in, Charsets.ISO_8859_1.name(), "");
}
}
});
} catch (RetryException | ExecutionException e) {
throw Throwables.propagate(e);
}
}
}