package com.hankcs.hanlp.corpus.document;

import com.bee.scheduling.by0;
import com.bee.scheduling.jz0;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.CompoundWord;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import java.io.File;
import java.io.Serializable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: classes3.dex */
public class Document implements Serializable {
    public List<Sentence> sentenceList;

    public Document(List<Sentence> list) {
        this.sentenceList = list;
    }

    public static Document create(File file) {
        by0.Cdo cdo = new by0.Cdo(file.getAbsolutePath());
        LinkedList linkedList = new LinkedList();
        while (cdo.hasNext()) {
            String trim = cdo.next().trim();
            if (!trim.isEmpty()) {
                Sentence create = Sentence.create(trim);
                if (create == null) {
                    jz0.f4842do.warning("使用 " + trim + " 创建句子失败");
                    return null;
                }
                linkedList.add(create);
            }
        }
        return new Document(linkedList);
    }

    public static Document create(String str) {
        Matcher matcher = Pattern.compile(".+?((。/w)|(！/w )|(？/w )|\\n|$)").matcher(str);
        LinkedList linkedList = new LinkedList();
        while (matcher.find()) {
            String group = matcher.group();
            Sentence create = Sentence.create(group);
            if (create == null) {
                jz0.f4842do.warning("使用" + group + "构建句子失败");
                return null;
            }
            linkedList.add(create);
        }
        return new Document(linkedList);
    }

    public List<List<IWord>> getComplexSentenceList() {
        LinkedList linkedList = new LinkedList();
        Iterator<Sentence> it = this.sentenceList.iterator();
        while (it.hasNext()) {
            linkedList.add(it.next().wordList);
        }
        return linkedList;
    }

    public List<List<Word>> getSimpleSentenceList() {
        LinkedList linkedList = new LinkedList();
        for (Sentence sentence : this.sentenceList) {
            LinkedList linkedList2 = new LinkedList();
            for (IWord iWord : sentence.wordList) {
                if (iWord instanceof CompoundWord) {
                    Iterator<Word> it = ((CompoundWord) iWord).innerList.iterator();
                    while (it.hasNext()) {
                        linkedList2.add(it.next());
                    }
                } else {
                    linkedList2.add((Word) iWord);
                }
            }
            linkedList.add(linkedList2);
        }
        return linkedList;
    }

    public List<List<Word>> getSimpleSentenceList(Set<String> set) {
        LinkedList linkedList = new LinkedList();
        for (Sentence sentence : this.sentenceList) {
            LinkedList linkedList2 = new LinkedList();
            for (IWord iWord : sentence.wordList) {
                if (!(iWord instanceof CompoundWord)) {
                    linkedList2.add((Word) iWord);
                } else if (set.contains(iWord.getLabel())) {
                    Iterator<Word> it = ((CompoundWord) iWord).innerList.iterator();
                    while (it.hasNext()) {
                        linkedList2.add(it.next());
                    }
                } else {
                    linkedList2.add(((CompoundWord) iWord).toWord());
                }
            }
            linkedList.add(linkedList2);
        }
        return linkedList;
    }

    public List<List<Word>> getSimpleSentenceList(boolean z) {
        LinkedList linkedList = new LinkedList();
        for (Sentence sentence : this.sentenceList) {
            LinkedList linkedList2 = new LinkedList();
            for (IWord iWord : sentence.wordList) {
                if (!(iWord instanceof CompoundWord)) {
                    linkedList2.add((Word) iWord);
                } else if (z) {
                    Iterator<Word> it = ((CompoundWord) iWord).innerList.iterator();
                    while (it.hasNext()) {
                        linkedList2.add(it.next());
                    }
                } else {
                    linkedList2.add(((CompoundWord) iWord).toWord());
                }
            }
            linkedList.add(linkedList2);
        }
        return linkedList;
    }

    public List<Word> getSimpleWordList() {
        List<IWord> wordList = getWordList();
        LinkedList linkedList = new LinkedList();
        for (IWord iWord : wordList) {
            if (iWord instanceof CompoundWord) {
                linkedList.addAll(((CompoundWord) iWord).innerList);
            } else {
                linkedList.add((Word) iWord);
            }
        }
        return linkedList;
    }

    public List<IWord> getWordList() {
        LinkedList linkedList = new LinkedList();
        Iterator<Sentence> it = this.sentenceList.iterator();
        while (it.hasNext()) {
            linkedList.addAll(it.next().wordList);
        }
        return linkedList;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        Iterator<Sentence> it = this.sentenceList.iterator();
        while (it.hasNext()) {
            sb.append(it.next());
            sb.append(' ');
        }
        if (sb.length() > 0) {
            sb.deleteCharAt(sb.length() - 1);
        }
        return sb.toString();
    }
}
