package opennlp.tools.formats.leipzig;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.function.Function;
import java.util.function.ToIntFunction;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import opennlp.tools.langdetect.Language;
import opennlp.tools.langdetect.LanguageSample;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;

/* loaded from: classes3.dex */
public class LeipzigLanguageSampleStream implements ObjectStream<LanguageSample> {
    private Map<String, Integer> langSampleCounts;
    private final Random random;
    private ObjectStream<LanguageSample> sampleStream;
    private File[] sentencesFiles;
    private Iterator<File> sentencesFilesIt;
    private final int sentencesPerSample;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes3.dex */
    public class LeipzigSentencesStream implements ObjectStream<LanguageSample> {
        private final String lang;
        private Iterator<String> lineIterator;

        LeipzigSentencesStream(String str, File file, int i, int i2) throws IOException {
            this.lang = str;
            int count = (int) Files.lines(file.toPath()).count();
            int i3 = i * i2;
            int i4 = 0;
            if (count < i3) {
                throw new InvalidFormatException(String.format("%s does not contain enough lines (%d lines < %d required lines).", file.getPath(), Integer.valueOf(count), Integer.valueOf(i3)));
            }
            List list = (List) IntStream.range(0, count).boxed().collect(Collectors.toList());
            Collections.shuffle(list, LeipzigLanguageSampleStream.this.random);
            HashSet hashSet = new HashSet(list.subList(0, i3));
            ArrayList arrayList = new ArrayList();
            PlainTextByLineStream plainTextByLineStream = new PlainTextByLineStream(new MarkableFileInputStreamFactory(file), StandardCharsets.UTF_8);
            while (true) {
                try {
                    String read = plainTextByLineStream.read();
                    if (read == null) {
                        plainTextByLineStream.close();
                        Collections.shuffle(arrayList, LeipzigLanguageSampleStream.this.random);
                        this.lineIterator = arrayList.iterator();
                        return;
                    } else {
                        if (read.indexOf(9) != -1 && hashSet.contains(Integer.valueOf(i4))) {
                            arrayList.add(read);
                        }
                        i4++;
                    }
                } finally {
                }
            }
        }

        @Override // opennlp.tools.util.ObjectStream, java.lang.AutoCloseable
        public /* synthetic */ void close() {
            ObjectStream.CC.$default$close(this);
        }

        @Override // opennlp.tools.util.ObjectStream
        public LanguageSample read() throws IOException {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < LeipzigLanguageSampleStream.this.sentencesPerSample && this.lineIterator.hasNext(); i++) {
                String next = this.lineIterator.next();
                sb.append(next.substring(next.indexOf(9) + 1) + " ");
            }
            if (sb.length() > 0) {
                return new LanguageSample(new Language(this.lang), sb);
            }
            return null;
        }

        @Override // opennlp.tools.util.ObjectStream
        public /* synthetic */ void reset() {
            ObjectStream.CC.$default$reset(this);
        }
    }

    public LeipzigLanguageSampleStream(File file, int i, final int i2) throws IOException {
        this.sentencesPerSample = i;
        File[] listFiles = file.listFiles(new FileFilter() { // from class: opennlp.tools.formats.leipzig.LeipzigLanguageSampleStream.1
            @Override // java.io.FileFilter
            public boolean accept(File file2) {
                return !file2.isHidden() && file2.isFile() && file2.getName().length() >= 3 && file2.getName().substring(0, 3).matches("[a-z]+");
            }
        });
        this.sentencesFiles = listFiles;
        Arrays.sort(listFiles);
        this.langSampleCounts = (Map) ((Map) Arrays.stream(this.sentencesFiles).map(new Function() { // from class: opennlp.tools.formats.leipzig.LeipzigLanguageSampleStream$$ExternalSyntheticLambda1
            @Override // java.util.function.Function
            public final Object apply(Object obj) {
                String substring;
                substring = ((File) obj).getName().substring(0, 3);
                return substring;
            }
        }).collect(Collectors.groupingBy(new Function() { // from class: opennlp.tools.formats.leipzig.LeipzigLanguageSampleStream$$ExternalSyntheticLambda2
            @Override // java.util.function.Function
            public final Object apply(Object obj) {
                return ((String) obj).toString();
            }
        }, Collectors.summingInt(new ToIntFunction() { // from class: opennlp.tools.formats.leipzig.LeipzigLanguageSampleStream$$ExternalSyntheticLambda4
            @Override // java.util.function.ToIntFunction
            public final int applyAsInt(Object obj) {
                return LeipzigLanguageSampleStream.lambda$new$1((String) obj);
            }
        })))).entrySet().stream().collect(Collectors.toMap(LeipzigLanguageSampleStream$$ExternalSyntheticLambda3.INSTANCE, new Function() { // from class: opennlp.tools.formats.leipzig.LeipzigLanguageSampleStream$$ExternalSyntheticLambda0
            @Override // java.util.function.Function
            public final Object apply(Object obj) {
                Integer valueOf;
                valueOf = Integer.valueOf(i2 / ((Integer) ((Map.Entry) obj).getValue()).intValue());
                return valueOf;
            }
        }));
        this.random = new Random(23L);
        reset();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static /* synthetic */ int lambda$new$1(String str) {
        return 1;
    }

    @Override // opennlp.tools.util.ObjectStream, java.lang.AutoCloseable
    public /* synthetic */ void close() {
        ObjectStream.CC.$default$close(this);
    }

    @Override // opennlp.tools.util.ObjectStream
    public LanguageSample read() throws IOException {
        LanguageSample read;
        ObjectStream<LanguageSample> objectStream = this.sampleStream;
        if (objectStream != null && (read = objectStream.read()) != null) {
            return read;
        }
        if (!this.sentencesFilesIt.hasNext()) {
            return null;
        }
        File next = this.sentencesFilesIt.next();
        String substring = next.getName().substring(0, 3);
        this.sampleStream = new LeipzigSentencesStream(substring, next, this.sentencesPerSample, this.langSampleCounts.get(substring).intValue());
        return read();
    }

    @Override // opennlp.tools.util.ObjectStream
    public void reset() throws IOException {
        this.sentencesFilesIt = Arrays.asList(this.sentencesFiles).iterator();
        this.sampleStream = null;
    }
}
