package com.huawei.hitouch.digestmodule.mhtrepair;

import android.text.TextUtils;
import com.huawei.hiai.pdk.unifiedaccess.HttpConfig;
import com.huawei.hitouch.hitouchcommon.common.loadappcapacity.vmall.KeyString;
import com.huawei.hitouch.hitouchcommon.common.util.FileUtil;
import com.huawei.hms.support.hianalytics.HiAnalyticsConstant;
import com.huawei.scanner.shopcommonmodule.constants.Constants;
import com.sun.mail.imap.IMAPStore;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.parser.Tag;
import org.jsoup.select.Elements;

/* loaded from: classes3.dex */
public class Readability {
    private final Document bld;
    private boolean ble = false;
    private String blf;
    private Element blg;
    private Element blh;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: com.huawei.hitouch.digestmodule.mhtrepair.Readability$1, reason: invalid class name */
    /* loaded from: classes3.dex */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] bli;

        static {
            int[] iArr = new int[Patterns.RegEx.values().length];
            bli = iArr;
            try {
                iArr[Patterns.RegEx.UNLIKELY_CANDIDATES.ordinal()] = 1;
            } catch (NoSuchFieldError unused) {
            }
            try {
                bli[Patterns.RegEx.OK_MAYBE_ITS_A_CANDIDATE.ordinal()] = 2;
            } catch (NoSuchFieldError unused2) {
            }
            try {
                bli[Patterns.RegEx.POSITIVE.ordinal()] = 3;
            } catch (NoSuchFieldError unused3) {
            }
            try {
                bli[Patterns.RegEx.NEGATIVE.ordinal()] = 4;
            } catch (NoSuchFieldError unused4) {
            }
            try {
                bli[Patterns.RegEx.DIV_TO_P_ELEMENTS.ordinal()] = 5;
            } catch (NoSuchFieldError unused5) {
            }
            try {
                bli[Patterns.RegEx.VIDEO.ordinal()] = 6;
            } catch (NoSuchFieldError unused6) {
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes3.dex */
    public static class Patterns {
        private static Pattern blj;
        private static Pattern blk;
        private static Pattern bll;
        private static Pattern blm;
        private static Pattern bln;
        private static Pattern blo;

        /* loaded from: classes3.dex */
        public enum RegEx {
            UNLIKELY_CANDIDATES,
            OK_MAYBE_ITS_A_CANDIDATE,
            POSITIVE,
            NEGATIVE,
            DIV_TO_P_ELEMENTS,
            VIDEO
        }

        public static Pattern a(RegEx regEx) {
            switch (AnonymousClass1.bli[regEx.ordinal()]) {
                case 1:
                    return Readability.a(blj, "combx|comment|disqus|foot|header|menu|meta|nav|rss|shoutbox|sidebar|sponsor");
                case 2:
                    if (blk == null) {
                        blk = Pattern.compile("and|article|body|column|main", 2);
                    }
                    return blk;
                case 3:
                    return Readability.a(bll, "article|body|content|entry|hentry|main|page|pagination|post|text|blog|story");
                case 4:
                    return Readability.a(blm, "combx|comment|com|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget");
                case 5:
                    return Readability.a(bln, "<(a|blockquote|dl|div|img|ol|p|pre|table|ul)");
                case 6:
                    return Readability.a(blo, "http:\\/\\/(www\\.)?(youtube|vimeo)\\.com");
                default:
                    return null;
            }
        }
    }

    public Readability(Document document) {
        this.bld = document;
    }

    private String IB() {
        if (this.bld.title().trim().length() > 0) {
            return cX(this.bld.title());
        }
        Elements select = this.bld.select("[id*=head],[id*=title],[class*=head],[class*=title],h1,h2,h3,h4,h5,h6");
        return select.size() > 0 ? select.get(0).text() : "";
    }

    private static boolean O(String str, String str2) {
        String[] strArr = {"no_", "no-", "not_", "not-"};
        for (int i = 0; i < 4; i++) {
            String str3 = strArr[i];
            if (str.contains(str3 + str2) || str.contains(str3.toUpperCase(Locale.ENGLISH) + str2)) {
                return true;
            }
        }
        return false;
    }

    private static String a(Element element, boolean z) {
        String trim = element.text().trim();
        return z ? trim.replaceAll("\\s{1,}", "") : trim;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static Pattern a(Pattern pattern, String str) {
        return pattern == null ? Pattern.compile(str, 2) : pattern;
    }

    private static Element a(Element element, float f) {
        element.attr("readabilityContentScore", Integer.toString((int) (q(element) * f)));
        return element;
    }

    private static void a(Document document, String str, String str2) {
        Iterator<Element> it = document.select(str2).iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (!O(next.attr("class"), str) && !"html".equals(next.tagName().toLowerCase(Locale.ENGLISH)) && !"body".equals(next.tagName().toLowerCase(Locale.ENGLISH))) {
                next.remove();
            }
        }
    }

    private void a(Element element, int i, int i2, int i3, boolean z) {
        if (i == 0 && i2 == 0 && i3 == 0 && z) {
            element.remove();
        }
    }

    private void a(Elements elements, ArrayList<Element> arrayList) {
        Iterator<Element> it = elements.iterator();
        while (it.hasNext()) {
            Element next = it.next();
            Element parent = next.parent();
            Element parent2 = parent.parent();
            String a2 = a(next, true);
            if (a2.length() >= 25 || next.getElementsByTag("img").size() != 0) {
                if (!parent.hasAttr("readabilityContentScore")) {
                    h(parent);
                    arrayList.add(parent);
                }
                if (!parent2.hasAttr("readabilityContentScore")) {
                    h(parent2);
                    arrayList.add(parent2);
                }
                int length = 1 + a2.replace((char) 65292, ',').split(",").length;
                int floor = (int) Math.floor(a2.length() / 100.0d);
                if (floor > 3) {
                    floor = 3;
                }
                int i = length + floor;
                b(parent, i);
                b(parent2, i / 2);
            }
        }
    }

    private static boolean a(Element element, int i) {
        Elements siblingElements;
        if (i <= 6 && (siblingElements = element.siblingElements()) != null && siblingElements.size() == 1) {
            Element element2 = siblingElements.get(0);
            String className = element.className();
            if (TextUtils.equals(element2.tagName(), "img") && !TextUtils.isEmpty(className) && className.contains("load")) {
                return true;
            }
        }
        return false;
    }

    private static boolean a(Element element, int i, int i2) {
        int size = e(element, "img").size();
        float m = m(element);
        int length = a(element, true).length();
        if (length < 25 && size == 0) {
            return true;
        }
        if (i < 25 && m > 0.2f) {
            return true;
        }
        if (i >= 25 && m > 0.5f) {
            return true;
        }
        if ((i2 == 1 && length < 75) || i2 > 1) {
            return true;
        }
        com.huawei.base.b.a.debug("Digest_Readability", " Not change toRemove");
        return false;
    }

    private boolean a(Element element, int i, Element element2) {
        boolean z = element2 == element;
        if (q(element2) >= i) {
            z = true;
        }
        if ("p".equalsIgnoreCase(element2.tagName())) {
            float m = m(element2);
            String a2 = a(element2, true);
            int length = a2.length();
            if (length > 80 && m < 0.25f) {
                return true;
            }
            if (length < 80 && Float.compare(m, 0.0f) == 0 && a2.matches(".*\\.( |$).*")) {
                return true;
            }
            com.huawei.base.b.a.debug("Digest_Readability", " append is not change");
        }
        return z;
    }

    private static boolean a(Element element, String str, int i) {
        int size = e(element, "p").size();
        int size2 = e(element, "li").size() - 100;
        int size3 = e(element, "input").size();
        if ((size2 > size && !"ul".equalsIgnoreCase(str) && !"ol".equalsIgnoreCase(str)) || size3 > Math.floor(size / 3.0d)) {
            return true;
        }
        com.huawei.base.b.a.debug("Digest_Readability", " input and tag is not legal");
        int i2 = 0;
        Iterator<Element> it = e(element, "embed").iterator();
        while (it.hasNext()) {
            if (!Patterns.a(Patterns.RegEx.VIDEO).matcher(it.next().absUrl("src")).find()) {
                i2++;
            }
        }
        return a(element, i, i2);
    }

    private void aX(boolean z) {
        Iterator<Element> it = this.bld.getAllElements().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (!z) {
                String str = next.className() + next.id();
                Matcher matcher = Patterns.a(Patterns.RegEx.UNLIKELY_CANDIDATES).matcher(str);
                Matcher matcher2 = Patterns.a(Patterns.RegEx.OK_MAYBE_ITS_A_CANDIDATE).matcher(str);
                if (matcher.find() && !matcher2.find() && !"body".equalsIgnoreCase(next.tagName())) {
                    next.remove();
                }
            }
            if ("div".equalsIgnoreCase(next.tagName()) || "section".equalsIgnoreCase(next.tagName())) {
                j(next);
            }
        }
    }

    private static int b(Element element, String str) {
        return a(element, true).split(str).length;
    }

    private static Element b(Element element, int i) {
        element.attr("readabilityContentScore", Integer.toString(q(element) + i));
        return element;
    }

    public static void b(Document document) {
        a(document, "video", "video,[class~=video],[class~=vedio]");
        a(document, HttpConfig.AUDIO_NAME, "audio,[class~=audio]");
    }

    private void c(String str, String str2, Element element) {
        if (FileUtil.checkFileExist(str + str2)) {
            Element element2 = new Element(Tag.valueOf("img"), "");
            element2.attr("src", str2);
            element.after((Node) element2);
            element.remove();
        }
    }

    private static void c(Element element) {
        l(element);
        o(element);
        c(element, "form");
        c(element, "object");
        c(element, "h1");
        if (e(element, "h2").size() == 1) {
            c(element, "h2");
        }
        c(element, "iframe");
        p(element);
        d(element, "table");
        d(element, "ul");
        d(element, "div");
        d(element, "section");
        g(element);
    }

    public static void c(Element element, String str) {
        Elements e = e(element, str);
        boolean z = "object".equalsIgnoreCase(str) || "embed".equalsIgnoreCase(str) || "iframe".equalsIgnoreCase(str);
        Iterator<Element> it = e.iterator();
        while (it.hasNext()) {
            Element next = it.next();
            Matcher matcher = Patterns.a(Patterns.RegEx.VIDEO).matcher(next.outerHtml());
            if (!z || !matcher.find()) {
                next.remove();
            }
        }
    }

    private String cX(String str) {
        String[] strArr = {"-", "—", "-", HiAnalyticsConstant.REPORT_VAL_SEPARATOR};
        for (int i = 0; i < 4; i++) {
            int lastIndexOf = str.lastIndexOf(strArr[i]);
            if (lastIndexOf > 0) {
                String trim = str.substring(0, lastIndexOf).trim();
                if (trim.length() >= 5) {
                    return trim;
                }
            }
        }
        return str;
    }

    private static boolean cY(String str) {
        return (IMAPStore.ID_ADDRESS.equalsIgnoreCase(str) || "ol".equalsIgnoreCase(str) || "ul".equalsIgnoreCase(str) || "dl".equalsIgnoreCase(str)) || ("dd".equalsIgnoreCase(str) || "dt".equalsIgnoreCase(str) || "li".equalsIgnoreCase(str) || "form".equalsIgnoreCase(str));
    }

    private static boolean cZ(String str) {
        return ("h1".equalsIgnoreCase(str) || "h2".equalsIgnoreCase(str)) || ("h3".equalsIgnoreCase(str) || "h4".equalsIgnoreCase(str)) || ("h5".equalsIgnoreCase(str) || "h6".equalsIgnoreCase(str) || "th".equalsIgnoreCase(str));
    }

    public static void d(Element element) {
        if (element == null) {
            return;
        }
        e(element);
        for (Element first = element.children().first(); first != null; first = first.nextElementSibling()) {
            e(element);
            d(first);
        }
    }

    private static void d(Element element, String str) {
        Iterator<Element> it = e(element, str).iterator();
        while (it.hasNext()) {
            Element next = it.next();
            int n = n(next);
            if (n < 0) {
                next.remove();
            } else if (b(next, ",") + b(next, "，") < 10 && a(next, str, n) && k(next)) {
                next.remove();
            }
        }
    }

    private static Elements e(Element element, String str) {
        Elements elementsByTag = element.getElementsByTag(str);
        elementsByTag.remove(element);
        return elementsByTag;
    }

    private static void e(Element element) {
        if ("a".equals(element.tagName())) {
            element.removeAttr("target");
            element.removeAttr("href");
        } else if (!"img".equals(element.tagName()) && !"figure".equals(element.tagName())) {
            f(element);
        } else if (element.attr(Constants.STYLE).contains("max-width")) {
            element.removeAttr(Constants.STYLE);
        }
    }

    private static void f(Element element) {
        if (!"svg".equals(element.tagName())) {
            element.removeAttr("width");
            element.removeAttr("height");
        }
        element.removeAttr("align");
        if ("div".equals(element.tagName())) {
            if (element.attr(Constants.STYLE).contains("padding-bottom")) {
                element.removeAttr(Constants.STYLE);
            }
        } else {
            if (element.attr(Constants.STYLE).contains("background-image")) {
                return;
            }
            element.removeAttr(Constants.STYLE);
        }
    }

    private static void g(Element element) {
        Iterator<Element> it = element.getElementsByTag("a").iterator();
        while (it.hasNext()) {
            Element next = it.next();
            next.removeAttr("target");
            next.removeAttr("href");
            next.removeAttr("class");
            Elements siblingElements = next.siblingElements();
            if (siblingElements.size() == 1 && "img".equals(siblingElements.get(0).tagName()) && !TextUtils.isEmpty(a(next, true)) && a(next, true).length() < 8) {
                next.remove();
            }
        }
    }

    private static void h(Element element) {
        element.attr("readabilityContentScore", Integer.toString(0));
        String tagName = element.tagName();
        if ("div".equalsIgnoreCase(tagName)) {
            b(element, 5);
        } else if ("pre".equalsIgnoreCase(tagName) || "td".equalsIgnoreCase(tagName) || "blockquote".equalsIgnoreCase(tagName)) {
            b(element, 3);
        } else if (cY(tagName)) {
            b(element, -3);
        } else if (cZ(tagName)) {
            b(element, -5);
        } else if ("article".equalsIgnoreCase(tagName)) {
            b(element, 10);
        } else if ("section".equalsIgnoreCase(tagName)) {
            b(element, 8);
        } else {
            com.huawei.base.b.a.debug("Digest_Readability", "tagName is unLegal");
        }
        b(element, n(element));
    }

    private Element i(Element element) {
        Element createElement = this.bld.createElement("div");
        createElement.attr(KeyString.SCHEMA_PRAMS_ID, "readability-content");
        int q = ((int) (((float) q(element)) * 0.2f)) > 10 ? (int) (q(element) * 0.2f) : 10;
        Iterator<Element> it = element.parent().children().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            boolean a2 = a(element, q, next);
            com.huawei.base.b.a.debug("Digest_Readability", "|Looking at sibling node: (" + next.className() + ":" + next.id() + ") with score " + q(next));
            if (a2) {
                createElement.appendChild(next);
            }
        }
        return createElement;
    }

    private void init(boolean z) {
        Document document = this.bld;
        if (document == null) {
            return;
        }
        if (document.body() != null && this.blf == null) {
            this.blf = this.bld.body().html();
        }
        ID();
        Element IC = IC();
        Element aW = aW(z);
        if (isEmpty(a(aW, false))) {
            if (!z) {
                this.bld.body().html(this.blf);
                init(true);
                return;
            }
            com.huawei.base.b.a.debug("Digest_Readability", "|readability was unable to parse this page for content");
        }
        r(IC);
        s(aW);
    }

    private static boolean isEmpty(String str) {
        return str == null || str.length() == 0;
    }

    private void j(Element element) {
        if (Patterns.a(Patterns.RegEx.DIV_TO_P_ELEMENTS).matcher(element.html()).find()) {
            return;
        }
        try {
            if (k(element)) {
                element.tagName("p");
            }
        } catch (IllegalArgumentException e) {
            com.huawei.base.b.a.debug("Digest_Readability", "|Could not alter div to p, reverting back to div." + e.toString());
        }
    }

    private static boolean k(Element element) {
        if (element == null) {
            return true;
        }
        if (!"div".equalsIgnoreCase(element.tagName()) && !"section".equalsIgnoreCase(element.tagName())) {
            return true;
        }
        return !Pattern.compile("background-image").matcher(element.attr(Constants.STYLE)).find();
    }

    private static void l(Element element) {
        if (element == null) {
            return;
        }
        if (!"readability-styled".equals(element.className()) && k(element)) {
            element.removeAttr(Constants.STYLE);
        }
        for (Element first = element.children().first(); first != null; first = first.nextElementSibling()) {
            if (!"readability-styled".equals(first.className()) && k(first)) {
                first.removeAttr(Constants.STYLE);
            }
            l(first);
        }
    }

    private static float m(Element element) {
        Elements e = e(element, "a");
        int length = a(element, true).length();
        float f = 0.0f;
        while (e.iterator().hasNext()) {
            f += a(r0.next(), true).length();
        }
        return f / length;
    }

    private static int n(Element element) {
        String str = element.className() + element.id();
        if (isEmpty(str)) {
            return 0;
        }
        Matcher matcher = Patterns.a(Patterns.RegEx.NEGATIVE).matcher(str);
        Matcher matcher2 = Patterns.a(Patterns.RegEx.POSITIVE).matcher(str);
        int i = matcher.find() ? -25 : 0;
        return matcher2.find() ? i + 25 : i;
    }

    private static void o(Element element) {
        element.html(element.html().replaceAll("(<br\\s*\\/?>(\\s|&nbsp;?)*){1,}", "<br />"));
    }

    private static void p(Element element) {
        for (int i = 1; i < 7; i++) {
            Iterator<Element> it = e(element, "h" + i).iterator();
            while (it.hasNext()) {
                Element next = it.next();
                if (n(next) < 0 || m(next) > 0.33f) {
                    next.remove();
                }
            }
        }
    }

    private static int q(Element element) {
        try {
            return Integer.parseInt(element.attr("readabilityContentScore"));
        } catch (NumberFormatException unused) {
            return 0;
        }
    }

    public Element IC() {
        Element createElement = this.bld.createElement("h1");
        createElement.html(IB());
        return createElement;
    }

    public void ID() {
        Iterator<Element> it = this.bld.getElementsByTag("script").iterator();
        while (it.hasNext()) {
            it.next().remove();
        }
        Element head = this.bld.head();
        if (head != null) {
            Iterator<Element> it2 = e(head, "link").iterator();
            while (it2.hasNext()) {
                Element next = it2.next();
                if ("stylesheet".equalsIgnoreCase(next.attr("rel"))) {
                    next.remove();
                }
            }
        }
        Iterator<Element> it3 = this.bld.getElementsByTag(Constants.STYLE).iterator();
        while (it3.hasNext()) {
            it3.next().remove();
        }
        this.bld.body().html(this.bld.body().html().replaceAll("(?i)(<br[^>]*>[ \n\r\t]*){2,}", "</p><p>").replaceAll("(?i)<(\\/?)font[^>]*>", "<$1span>"));
    }

    public Element IE() {
        return this.blh;
    }

    public void a(String str, Element element) {
        if (element == null) {
            return;
        }
        element.select("head").remove();
        Iterator<Element> it = element.getElementsByTag("p").iterator();
        while (it.hasNext()) {
            Element next = it.next();
            Elements elementsByTag = next.getElementsByTag("img");
            if (str.equals(next.text()) && elementsByTag.size() == 0) {
                next.remove();
            }
        }
        for (int i = 1; i < 7; i++) {
            Iterator<Element> it2 = e(element, "h" + i).iterator();
            while (it2.hasNext()) {
                Element next2 = it2.next();
                if (str.equals(next2.text())) {
                    next2.remove();
                }
            }
        }
    }

    public void a(Element element, String str) {
        Iterator<Element> it = element.getElementsByTag("div").iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (next.children().size() == 0 && TextUtils.isEmpty(next.text())) {
                String attr = next.attr(Constants.STYLE);
                if (!TextUtils.isEmpty(attr)) {
                    Matcher matcher = Pattern.compile("background-image: url\\((\\&quot;|\")(\\w|-)*.files/(\\w|-)*").matcher(attr);
                    if (matcher.find()) {
                        String group = matcher.group();
                        if (!TextUtils.isEmpty(group)) {
                            c(str, group.replaceAll("background-image: url\\((\\&quot;|\")", ""), next);
                        }
                    }
                }
            }
        }
    }

    public final void aV(boolean z) {
        this.ble = z;
        init(false);
    }

    protected Element aW(boolean z) {
        aX(z);
        Elements elementsByTag = this.bld.getElementsByTag("p");
        ArrayList<Element> arrayList = new ArrayList<>(20);
        a(elementsByTag, arrayList);
        int size = arrayList.size();
        Element element = null;
        for (int i = 0; i < size; i++) {
            Element element2 = arrayList.get(i);
            a(element2, 1.0f - m(element2));
            com.huawei.base.b.a.debug("Digest_Readability", "|Candidate: (" + element2.className() + ":" + element2.id() + ") with score " + q(element2));
            if (element == null || q(element2) > q(element)) {
                element = element2;
            }
        }
        if (this.ble || element == null || "body".equalsIgnoreCase(element.tagName())) {
            element = this.bld.createElement("div");
            element.html(this.bld.body().html());
            this.bld.body().html("");
            this.bld.body().appendChild(element);
            h(element);
        }
        Element i2 = i(element);
        b(i2);
        return i2;
    }

    public void b(Element element) {
        boolean z;
        c(element);
        Elements e = e(element, "p");
        boolean z2 = false;
        for (int size = e.size() - 1; size >= 0; size--) {
            Element element2 = e.get(size);
            int size2 = e(element2, "img").size();
            int size3 = e(element2, "embed").size();
            int size4 = e(element2, "object").size();
            String a2 = a(element2, true);
            if (!TextUtils.isEmpty(a2)) {
                int length = a2.length();
                if (z2 || length >= 5) {
                    z = a(element2, length) || isEmpty(a2);
                    z2 = true;
                    a(element2, size2, size3, size4, z);
                }
            }
            z = true;
            a(element2, size2, size3, size4, z);
        }
        d(element);
        element.html(element.html().replaceAll("(?i)<br[^>]*>\\s*<p", "<p"));
    }

    public final void init() {
        init(false);
    }

    public void r(Element element) {
        this.blg = element;
    }

    public void s(Element element) {
        this.blh = element;
    }
}
