package cn.edu.hfut.dmic.webcollector.crawler;

import cn.edu.hfut.dmic.webcollector.fetcher.Visitor;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatum;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Links;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.net.HttpRequest;
import cn.edu.hfut.dmic.webcollector.net.HttpResponse;
import cn.edu.hfut.dmic.webcollector.net.Requester;
import cn.edu.hfut.dmic.webcollector.util.RegexRule;
import org.jsoup.nodes.Document;
import org.zlog.Logger;
import org.zlog.LoggerFactory;

/* loaded from: classes.dex */
public abstract class BasicCrawler extends Crawler implements Visitor, Requester {
    public static final Logger LOG = LoggerFactory.getLogger(BasicCrawler.class);
    protected boolean autoParse;
    protected RegexRule regexRule = new RegexRule();

    public BasicCrawler(boolean z) {
        this.autoParse = true;
        this.visitor = this;
        this.requester = this;
        this.autoParse = z;
    }

    public void addRegex(String str) {
        this.regexRule.addRule(str);
    }

    @Override // cn.edu.hfut.dmic.webcollector.fetcher.Visitor
    public void afterVisit(Page page, CrawlDatums crawlDatums) {
        String contentType;
        Document doc;
        if (!this.autoParse || this.regexRule.isEmpty() || (contentType = page.getResponse().getContentType()) == null || !contentType.contains("text/html") || (doc = page.getDoc()) == null) {
            return;
        }
        crawlDatums.add(new Links().addByRegex(doc, this.regexRule));
    }

    @Override // cn.edu.hfut.dmic.webcollector.fetcher.Visitor
    public void fail(Page page, CrawlDatums crawlDatums) {
    }

    public RegexRule getRegexRule() {
        return this.regexRule;
    }

    @Override // cn.edu.hfut.dmic.webcollector.net.Requester
    public HttpResponse getResponse(CrawlDatum crawlDatum) throws Exception {
        return new HttpRequest(crawlDatum).getResponse();
    }

    public boolean isAutoParse() {
        return this.autoParse;
    }

    @Override // cn.edu.hfut.dmic.webcollector.fetcher.Visitor
    public void notFound(Page page, CrawlDatums crawlDatums) {
    }

    public void setAutoParse(boolean z) {
        this.autoParse = z;
    }

    public void setRegexRule(RegexRule regexRule) {
        this.regexRule = regexRule;
    }
}
