package com.dianyou.browser.reading;

import com.umeng.commonsdk.proguard.g;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.select.Selector;

/* loaded from: classes2.dex */
public class ArticleTextExtractor {

    /* renamed from: a, reason: collision with root package name */
    private static final Pattern f7679a = Pattern.compile("p|div|td|h1|h2|article|section");
    private static final Pattern h = Pattern.compile("hidden|display: ?none|font-size: ?small");
    private static final Pattern i = Pattern.compile("by|name|author|posted|twitter|handle|news", 2);
    private static final Set<String> j = new LinkedHashSet();
    private static final OutputFormatter k;
    private static final List<Pattern> m;

    /* renamed from: b, reason: collision with root package name */
    private String f7680b;

    /* renamed from: c, reason: collision with root package name */
    private Pattern f7681c;

    /* renamed from: d, reason: collision with root package name */
    private String f7682d;
    private Pattern e;
    private String f;
    private Pattern g;
    private OutputFormatter l = k;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes2.dex */
    public static class a implements Comparator<com.dianyou.browser.reading.a> {
        private a() {
        }

        @Override // java.util.Comparator
        /* renamed from: a, reason: merged with bridge method [inline-methods] */
        public int compare(com.dianyou.browser.reading.a aVar, com.dianyou.browser.reading.a aVar2) {
            return aVar2.f7694a.compareTo(aVar.f7694a);
        }
    }

    static {
        j.add("hacker news");
        j.add("facebook");
        j.add("home");
        j.add("articles");
        k = new OutputFormatter();
        m = Collections.singletonList(Pattern.compile("By\\S*(.*)[\\.,].*"));
    }

    public ArticleTextExtractor() {
        a("com(bx|ment|munity)|dis(qus|cuss)|e(xtra|[-]?mail)|foot|header|menu|re(mark|ply)|rss|sh(are|outbox)|sponsora(d|ll|gegate|rchive|ttachment)|(pag(er|ination))|popup|print|login|si(debar|gn|ngle)");
        b("(^(body|content|h?entry|main|page|post|text|blog|story|haupt))|arti(cle|kel)|instapaper_body");
        c("nav($|igation)|user|com(ment|bx)|(^com-)|contact|foot|masthead|(me(dia|ta))|outbrain|promo|related|scroll|(sho(utbox|pping))|sidebar|sponsor|tags|tool|widget|player|disclaimer|toc|infobox|vcard");
    }

    private int a(Element element) {
        ArrayList arrayList = new ArrayList(5);
        Iterator<Element> it = element.children().iterator();
        Element element2 = null;
        int i2 = 0;
        while (it.hasNext()) {
            Element next = it.next();
            String ownText = next.ownText();
            int length = ownText.length();
            if (length >= 20) {
                if (length > 200) {
                    i2 += Math.max(50, length / 10);
                }
                if (next.tagName().equals("h1") || next.tagName().equals("h2")) {
                    i2 += 30;
                } else if (next.tagName().equals("div") || next.tagName().equals("p")) {
                    i2 += a(next, ownText);
                    if (next.tagName().equals("p") && length > 50) {
                        arrayList.add(next);
                    }
                    if (next.className().toLowerCase().equals("caption")) {
                        element2 = next;
                    }
                }
            }
        }
        Iterator<Element> it2 = element.children().iterator();
        int i3 = 0;
        while (it2.hasNext()) {
            Element next2 = it2.next();
            if (this.g.matcher(next2.id()).find() || this.g.matcher(next2.className()).find()) {
                i3 -= 30;
            } else {
                Iterator<Element> it3 = next2.children().iterator();
                while (it3.hasNext()) {
                    Element next3 = it3.next();
                    String ownText2 = next3.ownText();
                    int length2 = ownText2.length();
                    if (length2 >= 20) {
                        int max = length2 > 200 ? Math.max(50, length2 / 10) + 0 : 0;
                        if (next3.tagName().equals("h1") || next3.tagName().equals("h2")) {
                            max += 30;
                        } else if (next3.tagName().equals("div") || next3.tagName().equals("p")) {
                            max += a(next3, ownText2);
                        }
                        i3 += max;
                    }
                }
            }
        }
        int i4 = i2 + (i3 / 3);
        if (element2 != null) {
            i4 += 30;
        }
        if (arrayList.size() >= 2) {
            Iterator<Element> it4 = element.children().iterator();
            while (it4.hasNext()) {
                Element next4 = it4.next();
                if ("h1;h2;h3;h4;h5;h6".contains(next4.tagName())) {
                    i4 += 20;
                } else if ("table;li;td;th".contains(next4.tagName())) {
                    a(next4, -30);
                }
                if ("p".contains(next4.tagName())) {
                    a(next4, 30);
                }
            }
        }
        return i4;
    }

    private static int a(Element element, String str) {
        int round;
        if (b.a(str, "&quot;") + b.a(str, "&lt;") + b.a(str, "&gt;") + b.a(str, "px") > 5) {
            round = -30;
        } else {
            double length = str.length();
            Double.isNaN(length);
            round = (int) Math.round(length / 35.0d);
        }
        a(element, round);
        return round;
    }

    private int a(Element element, boolean z) {
        Element first;
        int c2 = c(element);
        double length = element.ownText().length();
        Double.isNaN(length);
        int round = c2 + ((int) Math.round((length / 100.0d) * 10.0d)) + a(element);
        return (!z || (first = element.select("[extragravityscore]").first()) == null) ? round : round + Integer.parseInt(first.attr("extragravityscore"));
    }

    private ArticleTextExtractor a(String str) {
        this.f7680b = str;
        this.f7681c = Pattern.compile(str);
        return this;
    }

    private JResult a(JResult jResult, String str, OutputFormatter outputFormatter, Boolean bool, int i2) throws Exception {
        if (str.isEmpty()) {
            throw new IllegalArgumentException("html string is empty!?");
        }
        return a(jResult, Jsoup.parse(str), outputFormatter, bool, i2);
    }

    private JResult a(JResult jResult, Document document, OutputFormatter outputFormatter, Boolean bool, int i2) throws Exception {
        Document mo90clone = document.mo90clone();
        JResult a2 = a(jResult, document, outputFormatter, bool, i2, true);
        return a2.getText().isEmpty() ? a(jResult, mo90clone, outputFormatter, bool, i2, false) : a2;
    }

    private JResult a(JResult jResult, Document document, OutputFormatter outputFormatter, Boolean bool, int i2, boolean z) {
        ArrayList arrayList;
        Element a2;
        if (document == null) {
            throw new NullPointerException("missing document");
        }
        jResult.setTitle(a(document));
        jResult.setDescription(c(document));
        jResult.setCanonicalUrl(b(document));
        jResult.setType(k(document));
        jResult.setSitename(l(document));
        jResult.setLanguage(m(document));
        jResult.setAuthorName(e(document));
        jResult.setAuthorDescription(a(document, jResult.getAuthorName()));
        Date d2 = d(document);
        if (d2 == null) {
            jResult.setDate(e(b.n(jResult.getUrl())));
        } else {
            jResult.setDate(d2);
        }
        if (z) {
            n(document);
        }
        Element a3 = a(p(document));
        if (a3 != null) {
            if (bool.booleanValue() && (a2 = a(a3, (arrayList = new ArrayList()))) != null) {
                jResult.setImageUrl(b.a(a2.attr("src")));
                jResult.setImages(arrayList);
            }
            String a4 = a(outputFormatter.a(a3), jResult.getTitle());
            if (a4.length() > jResult.getTitle().length()) {
                if (i2 > 0 && a4.length() > i2) {
                    a4 = a(a4, i2);
                }
                jResult.setText(a4);
            }
            String element = a3.toString();
            Elements select = a3.select("a[href]");
            Integer num = 0;
            Iterator<Element> it = select.iterator();
            while (it.hasNext()) {
                Element next = it.next();
                num = Integer.valueOf(element.indexOf(next.toString(), num.intValue()));
                jResult.addLink(next.attr("abs:href"), next.text(), num);
            }
        }
        if (bool.booleanValue() && jResult.getImageUrl().isEmpty()) {
            jResult.setImageUrl(g(document));
        }
        jResult.setRssUrl(h(document));
        jResult.setVideoUrl(i(document));
        jResult.setFaviconUrl(j(document));
        jResult.setKeywords(f(document));
        if (jResult.getAuthorName().length() > 255) {
            jResult.setAuthorName(a(jResult.getAuthorName(), 255));
        }
        String d3 = d(jResult.getAuthorDescription());
        if (d(jResult.getText()).equals(d3) || d(jResult.getDescription()).equals(d3)) {
            jResult.setAuthorDescription("");
        } else if (jResult.getAuthorDescription().length() > 1000) {
            jResult.setAuthorDescription(a(jResult.getAuthorDescription(), 1000));
        }
        if (jResult.getImageUrl().length() > 255) {
            jResult.setImageUrl("");
        }
        return jResult;
    }

    private static String a(String str, int i2) {
        int i3;
        StringBuilder sb = new StringBuilder(i2);
        int i4 = 0;
        for (int i5 = 0; i5 < str.length(); i5++) {
            char charAt = str.charAt(i5);
            if (charAt <= 127) {
                i3 = 1;
            } else if (charAt <= 2047) {
                i3 = 2;
            } else {
                if (charAt > 55295) {
                    if (charAt <= 56319) {
                        i3 = 4;
                    } else if (charAt <= 57343) {
                        i3 = 0;
                    }
                }
                i3 = 3;
            }
            i4 += i3;
            if (i4 > i2) {
                break;
            }
            sb.append(charAt);
        }
        return sb.toString();
    }

    private static String a(String str, String str2) {
        return str;
    }

    private static String a(Document document) {
        String g = g(document.title());
        if (!g.isEmpty()) {
            return g;
        }
        String b2 = b.b(document.select("head title").text());
        if (!b2.isEmpty()) {
            return b2;
        }
        String b3 = b.b(document.select("head meta[name=title]").attr(com.umeng.analytics.pro.b.W));
        if (!b3.isEmpty()) {
            return b3;
        }
        String b4 = b.b(document.select("head meta[property=og:title]").attr(com.umeng.analytics.pro.b.W));
        if (!b4.isEmpty()) {
            return b4;
        }
        String b5 = b.b(document.select("head meta[name=twitter:title]").attr(com.umeng.analytics.pro.b.W));
        return b5.isEmpty() ? b.b(document.select("h1:first-of-type").text()) : b5;
    }

    private String a(Document document, String str) {
        if (str.isEmpty()) {
            return "";
        }
        Elements select = document.select(".byline > .bio");
        if (select != null && !select.isEmpty()) {
            return select.first().text();
        }
        Elements select2 = document.select(".byline span[class*=teaser]");
        if (select2 != null && !select2.isEmpty()) {
            return select2.first().text();
        }
        try {
            Element a2 = a(document.select(":containsOwn(" + str + ')'));
            return a2 != null ? a2.text() : "";
        } catch (Selector.SelectorParseException unused) {
            return "";
        }
    }

    private Element a(Collection<Element> collection) {
        int i2 = -200;
        Element element = null;
        for (Element element2 : collection) {
            int a2 = a(element2, false);
            if (a2 > i2) {
                element = element2;
                i2 = a2;
            }
        }
        return element;
    }

    private static Element a(Element element, List<com.dianyou.browser.reading.a> list) {
        int i2;
        int i3;
        int i4;
        boolean z;
        String attr;
        Elements select = element.select("img");
        if (select.isEmpty()) {
            select = element.parent().select("img");
        }
        Iterator<Element> it = select.iterator();
        double d2 = 1.0d;
        Element element2 = null;
        int i5 = 0;
        while (it.hasNext()) {
            Element next = it.next();
            String attr2 = next.attr("src");
            if (!attr2.isEmpty() && !f(attr2)) {
                try {
                    int parseInt = Integer.parseInt(next.attr("height"));
                    i2 = parseInt >= 50 ? 20 : -20;
                    i3 = parseInt;
                } catch (Exception unused) {
                    i2 = 0;
                    i3 = 0;
                }
                try {
                    int parseInt2 = Integer.parseInt(next.attr("width"));
                    i2 = parseInt2 >= 50 ? i2 + 20 : i2 - 20;
                    i4 = parseInt2;
                } catch (Exception unused2) {
                    i4 = 0;
                }
                String attr3 = next.attr("alt");
                if (attr3.length() > 35) {
                    i2 += 20;
                }
                String attr4 = next.attr("title");
                if (attr4.length() > 35) {
                    i2 += 20;
                }
                if (next.parent() == null || (attr = next.parent().attr("rel")) == null || !attr.contains("nofollow")) {
                    z = false;
                } else {
                    i2 -= 40;
                    z = attr.contains("nofollow");
                }
                double d3 = i2;
                Double.isNaN(d3);
                int i6 = (int) (d3 * d2);
                if (i6 > i5) {
                    d2 /= 2.0d;
                    element2 = next;
                    i5 = i6;
                }
                list.add(new com.dianyou.browser.reading.a(attr2, Integer.valueOf(i6), attr4, i3, i4, attr3, z));
            }
        }
        Collections.sort(list, new a());
        return element2;
    }

    private static void a(Element element, int i2) {
        b(element, i2 + b(element));
    }

    private static int b(Element element) {
        try {
            return Integer.parseInt(element.attr("gravityScore"));
        } catch (Exception unused) {
            return 0;
        }
    }

    private ArticleTextExtractor b(String str) {
        this.f7682d = str;
        this.e = Pattern.compile(str);
        return this;
    }

    private static String b(Document document) {
        String a2 = b.a(document.select("head link[rel=canonical]").attr("href"));
        if (!a2.isEmpty()) {
            return a2;
        }
        String a3 = b.a(document.select("head meta[property=og:url]").attr(com.umeng.analytics.pro.b.W));
        return a3.isEmpty() ? b.a(document.select("head meta[name=twitter:url]").attr(com.umeng.analytics.pro.b.W)) : a3;
    }

    private static void b(Element element, int i2) {
        element.attr("gravityScore", Integer.toString(i2));
    }

    private int c(Element element) {
        int i2 = this.e.matcher(element.className()).find() ? 35 : 0;
        if (this.e.matcher(element.id()).find()) {
            i2 += 45;
        }
        if (this.f7681c.matcher(element.className()).find()) {
            i2 -= 20;
        }
        if (this.f7681c.matcher(element.id()).find()) {
            i2 -= 20;
        }
        if (this.g.matcher(element.className()).find()) {
            i2 -= 50;
        }
        if (this.g.matcher(element.id()).find()) {
            i2 -= 50;
        }
        String attr = element.attr("style");
        if (attr != null && !attr.isEmpty() && h.matcher(attr).find()) {
            i2 -= 50;
        }
        String attr2 = element.attr("itemprop");
        return (attr2 == null || attr2.isEmpty() || !this.e.matcher(attr2).find()) ? i2 : i2 + 100;
    }

    private ArticleTextExtractor c(String str) {
        this.f = str;
        this.g = Pattern.compile(str);
        return this;
    }

    private static String c(Document document) {
        String b2 = b.b(document.select("head meta[name=description]").attr(com.umeng.analytics.pro.b.W));
        if (!b2.isEmpty()) {
            return b2;
        }
        String b3 = b.b(document.select("head meta[property=og:description]").attr(com.umeng.analytics.pro.b.W));
        return b3.isEmpty() ? b.b(document.select("head meta[name=twitter:description]").attr(com.umeng.analytics.pro.b.W)) : b3;
    }

    private static String d(String str) {
        return str.length() < 50 ? str : str.substring(0, 50);
    }

    private static Date d(Document document) {
        String format;
        Element first = document.select("meta[name=ptime]").first();
        String b2 = first != null ? b.b(first.attr(com.umeng.analytics.pro.b.W)) : "";
        if (b2.isEmpty()) {
            b2 = b.b(document.select("meta[name=utime]").attr(com.umeng.analytics.pro.b.W));
        }
        if (b2.isEmpty()) {
            b2 = b.b(document.select("meta[name=pdate]").attr(com.umeng.analytics.pro.b.W));
        }
        if (b2.isEmpty()) {
            b2 = b.b(document.select("meta[property=article:published]").attr(com.umeng.analytics.pro.b.W));
        }
        if (b2.isEmpty()) {
            return e(b2);
        }
        Elements select = document.select("meta[property=article:published_time]");
        if (!select.isEmpty()) {
            Element element = select.get(0);
            if (element.hasAttr(com.umeng.analytics.pro.b.W)) {
                String attr = element.attr(com.umeng.analytics.pro.b.W);
                try {
                    if (attr.endsWith("Z")) {
                        format = attr.substring(0, attr.length() - 1) + "GMT-00:00";
                    } else {
                        format = String.format(attr.substring(0, attr.length() - 6), attr.substring(attr.length() - 6, attr.length()));
                    }
                    attr = format;
                } catch (StringIndexOutOfBoundsException unused) {
                }
                return e(attr);
            }
        }
        Elements select2 = document.select("meta[property=dateCreated], span[property=dateCreated]");
        if (!select2.isEmpty()) {
            Element element2 = select2.get(0);
            return element2.hasAttr(com.umeng.analytics.pro.b.W) ? e(element2.attr(com.umeng.analytics.pro.b.W)) : e(element2.text());
        }
        Elements select3 = document.select("meta[itemprop=datePublished], span[itemprop=datePublished]");
        if (!select3.isEmpty()) {
            Element element3 = select3.get(0);
            return element3.hasAttr(com.umeng.analytics.pro.b.W) ? e(element3.attr(com.umeng.analytics.pro.b.W)) : element3.hasAttr("value") ? e(element3.attr("value")) : e(element3.text());
        }
        Elements select4 = document.select("meta[name=OriginalPublicationDate]");
        if (!select4.isEmpty()) {
            Element element4 = select4.get(0);
            if (element4.hasAttr(com.umeng.analytics.pro.b.W)) {
                return e(element4.attr(com.umeng.analytics.pro.b.W));
            }
        }
        Elements select5 = document.select("meta[name=DisplayDate]");
        if (!select5.isEmpty()) {
            Element element5 = select5.get(0);
            if (element5.hasAttr(com.umeng.analytics.pro.b.W)) {
                return e(element5.attr(com.umeng.analytics.pro.b.W));
            }
        }
        Elements select6 = document.select("meta[name*=date]");
        if (!select6.isEmpty()) {
            Element element6 = select6.get(0);
            if (element6.hasAttr(com.umeng.analytics.pro.b.W)) {
                return e(element6.attr(com.umeng.analytics.pro.b.W));
            }
        }
        Elements select7 = document.select(".date-header");
        if (select7.isEmpty()) {
            return null;
        }
        return e(select7.get(0).text());
    }

    /* JADX WARN: Removed duplicated region for block: B:65:0x0112  */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private java.lang.String e(org.jsoup.nodes.Document r5) {
        /*
            Method dump skipped, instructions count: 300
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: com.dianyou.browser.reading.ArticleTextExtractor.e(org.jsoup.nodes.Document):java.lang.String");
    }

    private static Date e(String str) {
        return new Date(0L);
    }

    private static Collection<String> f(Document document) {
        String b2 = b.b(document.select("head meta[name=keywords]").attr(com.umeng.analytics.pro.b.W));
        if (b2.startsWith("[") && b2.endsWith("]")) {
            b2 = b2.substring(1, b2.length() - 1);
        }
        String[] split = b2.split("\\s*,\\s*");
        return (split.length > 1 || !(split.length <= 0 || split[0] == null || split[0].isEmpty())) ? Arrays.asList(split) : Collections.emptyList();
    }

    private static boolean f(String str) {
        return b.a(str, g.an) >= 2;
    }

    private static String g(String str) {
        String[] split = str.split("\\|");
        StringBuilder sb = new StringBuilder(split.length);
        int i2 = 0;
        for (String str2 : split) {
            if (!j.contains(str2.toLowerCase().trim()) && (i2 != split.length - 1 || sb.length() <= str2.length())) {
                if (i2 > 0) {
                    sb.append('|');
                }
                sb.append(str2);
                i2++;
            }
        }
        return b.b(sb.toString());
    }

    private static String g(Document document) {
        String a2 = b.a(document.select("head meta[property=og:image]").attr(com.umeng.analytics.pro.b.W));
        if (!a2.isEmpty()) {
            return a2;
        }
        String a3 = b.a(document.select("head meta[name=twitter:image]").attr(com.umeng.analytics.pro.b.W));
        if (!a3.isEmpty()) {
            return a3;
        }
        String a4 = b.a(document.select("link[rel=image_src]").attr("href"));
        return a4.isEmpty() ? b.a(document.select("head meta[name=thumbnail]").attr(com.umeng.analytics.pro.b.W)) : a4;
    }

    private static String h(Document document) {
        return b.a(document.select("link[rel=alternate]").select("link[type=application/rss+xml]").attr("href"));
    }

    private static String i(Document document) {
        return b.a(document.select("head meta[property=og:video]").attr(com.umeng.analytics.pro.b.W));
    }

    private static String j(Document document) {
        String a2 = b.a(document.select("head link[rel=icon]").attr("href"));
        return a2.isEmpty() ? b.a(document.select("head link[rel^=shortcut],link[rel$=icon]").attr("href")) : a2;
    }

    private static String k(Document document) {
        return b.b(document.select("head meta[property=og:type]").attr(com.umeng.analytics.pro.b.W));
    }

    private static String l(Document document) {
        String b2 = b.b(document.select("head meta[property=og:site_name]").attr(com.umeng.analytics.pro.b.W));
        if (b2.isEmpty()) {
            b2 = b.b(document.select("head meta[name=twitter:site]").attr(com.umeng.analytics.pro.b.W));
        }
        return b2.isEmpty() ? b.b(document.select("head meta[property=og:site_name]").attr(com.umeng.analytics.pro.b.W)) : b2;
    }

    private static String m(Document document) {
        String b2 = b.b(document.select("head meta[property=language]").attr(com.umeng.analytics.pro.b.W));
        if (b2.isEmpty()) {
            b2 = b.b(document.select("html").attr("lang"));
            if (b2.isEmpty()) {
                b2 = b.b(document.select("head meta[property=og:locale]").attr(com.umeng.analytics.pro.b.W));
            }
        }
        return (b2.isEmpty() || b2.length() <= 2) ? b2 : b2.substring(0, 2);
    }

    private static void n(Document document) {
        o(document);
    }

    private static Document o(Document document) {
        Iterator<Element> it = document.getElementsByTag("script").iterator();
        while (it.hasNext()) {
            it.next().remove();
        }
        Iterator<Element> it2 = document.getElementsByTag("noscript").iterator();
        while (it2.hasNext()) {
            it2.next().remove();
        }
        Iterator<Element> it3 = document.getElementsByTag("style").iterator();
        while (it3.hasNext()) {
            it3.next().remove();
        }
        return document;
    }

    private static Collection<Element> p(Document document) {
        LinkedHashMap linkedHashMap = new LinkedHashMap(64);
        Iterator<Element> it = document.select("body").select("*").iterator();
        int i2 = 100;
        while (it.hasNext()) {
            Element next = it.next();
            if (f7679a.matcher(next.tagName()).matches()) {
                linkedHashMap.put(next, null);
                b(next, i2);
                i2 /= 2;
            }
        }
        return linkedHashMap.keySet();
    }

    public JResult a(JResult jResult, String str, int i2) throws Exception {
        return a(jResult, str, this.l, (Boolean) true, i2);
    }
}
