package org.apache.any23.plugin.crawler;

import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.crawler.WebCrawler;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import java.io.File;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Pattern;

/* loaded from: input_file:org/apache/any23/plugin/crawler/SiteCrawler.class */
public class SiteCrawler {
    public static final String DEFAULT_PAGE_FILTER_RE = ".*(\\.(css|js|bmp|gif|jpe?g|png|tiff?|mid|mp2|mp3|mp4|wav|wma|avi|mov|mpeg|ram|m4v|wmv|rm|smil|pdf|swf|zip|rar|gz|xml|txt))$";
    public static final int DEFAULT_NUM_OF_CRAWLERS = 10;
    public static final Class<? extends WebCrawler> DEFAULT_WEB_CRAWLER = DefaultWebCrawler.class;
    private final CrawlController controller;
    private final CrawlConfig crawlConfig;
    private ExecutorService service;
    public final Pattern defaultFilters = Pattern.compile(DEFAULT_PAGE_FILTER_RE);
    private final List<CrawlerListener> listeners = new ArrayList();
    private int numOfCrawlers = 10;
    private Class<? extends WebCrawler> webCrawler = DEFAULT_WEB_CRAWLER;

    public SiteCrawler(File file) {
        try {
            this.crawlConfig = new CrawlConfig();
            this.crawlConfig.setCrawlStorageFolder(file.getAbsolutePath());
            this.crawlConfig.setUserAgentString("Apache Any23 Web Crawler");
            PageFetcher pageFetcher = new PageFetcher(this.crawlConfig);
            this.controller = new CrawlController(this.crawlConfig, pageFetcher, new RobotstxtServer(new RobotstxtConfig(), pageFetcher));
        } catch (Exception e) {
            throw new IllegalArgumentException("Error while initializing crawler controller.", e);
        }
    }

    public int getNumOfCrawlers() {
        return this.numOfCrawlers;
    }

    public void setNumOfCrawlers(int i) {
        if (i <= 0) {
            throw new IllegalArgumentException("Invalid number of crawlers, must be > 0 .");
        }
        this.numOfCrawlers = i;
    }

    public Class<? extends WebCrawler> getWebCrawler() {
        return this.webCrawler;
    }

    public void setWebCrawler(Class<? extends WebCrawler> cls) {
        if (cls == null) {
            throw new NullPointerException("c cannot be null.");
        }
        this.webCrawler = cls;
    }

    public int getMaxDepth() {
        return this.crawlConfig.getMaxDepthOfCrawling();
    }

    public void setMaxDepth(int i) {
        if (i < -1 || i == 0) {
            throw new IllegalArgumentException("Invalid maxDepth, must be -1 or > 0");
        }
        this.crawlConfig.setMaxDepthOfCrawling(i);
    }

    public int getMaxPages() {
        return this.crawlConfig.getMaxPagesToFetch();
    }

    public void setMaxPages(int i) {
        if (i < -1 || i == 0) {
            throw new IllegalArgumentException("Invalid maxPages, must be -1 or > 0");
        }
        this.crawlConfig.setMaxPagesToFetch(i);
    }

    public int getPolitenessDelay() {
        return this.crawlConfig.getPolitenessDelay();
    }

    public void setPolitenessDelay(int i) {
        if (i >= 0) {
            this.crawlConfig.setPolitenessDelay(i);
        }
    }

    public void addListener(CrawlerListener crawlerListener) {
        this.listeners.add(crawlerListener);
    }

    public void removeListener(CrawlerListener crawlerListener) {
        this.listeners.remove(crawlerListener);
    }

    public synchronized void start(URL url, Pattern pattern, boolean z) throws Exception {
        SharedData.setCrawlData(url.toExternalForm(), pattern, Collections.synchronizedList(this.listeners));
        this.controller.addSeed(url.toExternalForm());
        Runnable runnable = new Runnable() { // from class: org.apache.any23.plugin.crawler.SiteCrawler.1
            @Override // java.lang.Runnable
            public void run() {
                SiteCrawler.this.controller.start(SiteCrawler.this.getWebCrawler(), SiteCrawler.this.getNumOfCrawlers());
            }
        };
        if (z) {
            runnable.run();
        } else {
            if (this.service != null) {
                throw new IllegalStateException("Another service seems to run.");
            }
            this.service = Executors.newSingleThreadExecutor();
            this.service.execute(runnable);
        }
    }

    public void start(URL url, boolean z) throws Exception {
        start(url, this.defaultFilters, z);
    }

    public synchronized void stop() {
        this.service.shutdownNow();
    }
}
