package com.tingwen.widget.webcollector;

import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.CharEncoding;
import org.mozilla.universalchardet.UniversalDetector;

/* loaded from: classes.dex */
public class CharsetDetector {
    private static final int CHUNK_SIZE = 2000;
    private static Pattern metaPattern = Pattern.compile("<meta\\s+([^>]*http-equiv=(\"|')?content-type(\"|')?[^>]*)>", 2);
    private static Pattern charsetPattern = Pattern.compile("charset=\\s*([a-z][_\\-0-9a-z]*)", 2);
    private static Pattern charsetPatternHTML5 = Pattern.compile("<meta\\s+charset\\s*=\\s*[\"']?([a-z][_\\-0-9a-z]*)[^>]*>", 2);

    public static String guessEncoding(byte[] bArr) {
        try {
            String guessEncodingByNutch = guessEncodingByNutch(bArr);
            return guessEncodingByNutch == null ? guessEncodingByMozilla(bArr) : guessEncodingByNutch;
        } catch (Exception e) {
            return guessEncodingByMozilla(bArr);
        }
    }

    public static String guessEncodingByMozilla(byte[] bArr) {
        UniversalDetector universalDetector = new UniversalDetector(null);
        universalDetector.handleData(bArr, 0, bArr.length);
        universalDetector.dataEnd();
        String detectedCharset = universalDetector.getDetectedCharset();
        universalDetector.reset();
        return detectedCharset == null ? "UTF-8" : detectedCharset;
    }

    private static String guessEncodingByNutch(byte[] bArr) {
        int min = Math.min(bArr.length, 2000);
        try {
            String str = new String(bArr, "ascii");
            Matcher matcher = metaPattern.matcher(str);
            String str2 = null;
            if (matcher.find()) {
                Matcher matcher2 = charsetPattern.matcher(matcher.group(1));
                if (matcher2.find()) {
                    str2 = new String(matcher2.group(1));
                }
            }
            if (str2 == null) {
                Matcher matcher3 = charsetPatternHTML5.matcher(str);
                if (matcher3.find()) {
                    str2 = new String(matcher3.group(1));
                }
            }
            if (str2 == null) {
                if (min >= 3 && bArr[0] == -17 && bArr[1] == -69 && bArr[2] == -65) {
                    str2 = "UTF-8";
                } else if (min >= 2) {
                    if (bArr[0] == -1 && bArr[1] == -2) {
                        str2 = CharEncoding.UTF_16LE;
                    } else if (bArr[0] == -2 && bArr[1] == -1) {
                        str2 = CharEncoding.UTF_16BE;
                    }
                }
            }
            return str2;
        } catch (UnsupportedEncodingException e) {
            return null;
        }
    }
}
