package edu.stanford.nlp.trees;

import edu.stanford.nlp.io.ExtensionFileFilter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.Sets;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.text.NumberFormat;
import java.util.AbstractCollection;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/trees/Treebank.class */
public abstract class Treebank extends AbstractCollection<Tree> {
    private TreeReaderFactory trf;
    private String encoding;
    public static final String DEFAULT_TREE_FILE_SUFFIX = "mrg";

    /* loaded from: input_file:edu/stanford/nlp/trees/Treebank$CounterTreeProcessor.class */
    private static final class CounterTreeProcessor implements TreeVisitor {
        int i;

        private CounterTreeProcessor() {
        }

        @Override // edu.stanford.nlp.trees.TreeVisitor
        public void visitTree(Tree tree) {
            this.i++;
        }

        public int total() {
            return this.i;
        }
    }

    public Treebank() {
        this(new LabeledScoredTreeReaderFactory());
    }

    public Treebank(TreeReaderFactory treeReaderFactory) {
        this.encoding = "UTF-8";
        this.trf = treeReaderFactory;
    }

    public Treebank(TreeReaderFactory treeReaderFactory, String str) {
        this.encoding = "UTF-8";
        this.trf = treeReaderFactory;
        this.encoding = str;
    }

    public Treebank(int i) {
        this(i, new LabeledScoredTreeReaderFactory());
    }

    public Treebank(int i, TreeReaderFactory treeReaderFactory) {
        this.encoding = "UTF-8";
        this.trf = treeReaderFactory;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public TreeReaderFactory treeReaderFactory() {
        return this.trf;
    }

    public String encoding() {
        return this.encoding;
    }

    @Override // java.util.AbstractCollection, java.util.Collection
    public abstract void clear();

    public void loadPath(String str) {
        loadPath(new File(str));
    }

    public void loadPath(File file) {
        loadPath(file, DEFAULT_TREE_FILE_SUFFIX, true);
    }

    public void loadPath(String str, String str2, boolean z) {
        loadPath(new File(str), new ExtensionFileFilter(str2, z));
    }

    public void loadPath(File file, String str, boolean z) {
        loadPath(file, new ExtensionFileFilter(str, z));
    }

    public void loadPath(String str, FileFilter fileFilter) {
        loadPath(new File(str), fileFilter);
    }

    public abstract void loadPath(File file, FileFilter fileFilter);

    public abstract void apply(TreeVisitor treeVisitor);

    public Treebank transform(TreeTransformer treeTransformer) {
        return new TransformingTreebank(this, treeTransformer);
    }

    @Override // java.util.AbstractCollection
    public String toString() {
        final StringBuilder sb = new StringBuilder();
        apply(new TreeVisitor() { // from class: edu.stanford.nlp.trees.Treebank.1
            @Override // edu.stanford.nlp.trees.TreeVisitor
            public void visitTree(Tree tree) {
                sb.append(tree.toString());
                sb.append("\n");
            }
        });
        return sb.toString();
    }

    @Override // java.util.AbstractCollection, java.util.Collection, java.util.List
    public int size() {
        CounterTreeProcessor counterTreeProcessor = new CounterTreeProcessor();
        apply(counterTreeProcessor);
        return counterTreeProcessor.total();
    }

    public void decimate(Writer writer, Writer writer2, Writer writer3) throws IOException {
        PrintWriter printWriter = new PrintWriter(writer, true);
        PrintWriter printWriter2 = new PrintWriter(writer2, true);
        PrintWriter printWriter3 = new PrintWriter(writer3, true);
        int i = 0;
        Iterator<Tree> it = iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            if (i == 8) {
                next.pennPrint(printWriter2);
            } else if (i == 9) {
                next.pennPrint(printWriter3);
            } else {
                next.pennPrint(printWriter);
            }
            i = (i + 1) % 10;
        }
    }

    public String textualSummary() {
        return textualSummary(null);
    }

    public String textualSummary(TreebankLanguagePack treebankLanguagePack) {
        int i = 0;
        int i2 = 0;
        Counter counter = new Counter();
        Counter counter2 = new Counter();
        Counter counter3 = new Counter();
        Counter counter4 = new Counter();
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        int i6 = 0;
        int i7 = 0;
        int i8 = Integer.MAX_VALUE;
        int i9 = 0;
        HashSet hashSet = new HashSet();
        Counter counter5 = new Counter();
        Counter counter6 = new Counter();
        Tree tree = null;
        Iterator<Tree> it = iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            counter2.incrementCount(next.value());
            i++;
            int length = next.yield().length();
            if (length < i8) {
                i8 = length;
            }
            if (length > i9) {
                i9 = length;
            }
            if (next.numChildren() > 1) {
                i2++;
                counter.incrementCount(next.localTree());
            } else if (next.isLeaf()) {
                i3++;
            } else {
                Tree firstChild = next.firstChild();
                if (firstChild.isLeaf()) {
                    i4++;
                    tree = next;
                } else if (firstChild.isPreTerminal()) {
                    i5++;
                }
                counter3.incrementCount(firstChild.value());
            }
            Iterator<Tree> it2 = next.iterator();
            while (it2.hasNext()) {
                Tree next2 = it2.next();
                if (next2.isLeaf()) {
                    i6++;
                    hashSet.add(next2.value());
                } else if (next2.isPreTerminal()) {
                    i7++;
                    counter5.incrementCount(next2.value());
                    if (treebankLanguagePack != null && treebankLanguagePack.isPunctuationTag(next2.value())) {
                        counter4.incrementCount(next2.firstChild().value());
                    }
                } else {
                    if (!next2.isPhrasal()) {
                        throw new IllegalStateException("Bad tree in treebank!: " + next2);
                    }
                    counter6.incrementCount(next2.value());
                }
            }
        }
        StringWriter stringWriter = new StringWriter(2000);
        PrintWriter printWriter = new PrintWriter(stringWriter);
        NumberFormat numberInstance = NumberFormat.getNumberInstance();
        numberInstance.setMaximumFractionDigits(0);
        printWriter.println("Treebank has " + i + " trees and " + i6 + " words (tokens)");
        if (i7 != i6) {
            printWriter.println("  Warning! numTags differs and is " + i7);
        }
        if (counter2.size() == 1) {
            printWriter.println("  The root category is: " + ((String) counter2.keySet().toArray()[0]));
        } else {
            printWriter.println("  Warning! " + counter2.size() + " different roots in treebank: " + counter2.toString(numberInstance));
        }
        if (i2 > 0) {
            printWriter.println("  Warning! " + i2 + " trees without unary initial rewrite.  Subtrees: " + counter.toString(numberInstance));
        }
        if (i3 > 0 || i4 > 0 || i5 > 0) {
            printWriter.println("  Warning! Non-phrasal trees: " + i3 + " bare leaves; " + i4 + " root rewrites as leaf; and " + i5 + " root rewrites as tagged word");
            if (i4 > 0) {
                printWriter.println("  Example bad root rewrites as leaf: " + tree);
            }
        }
        printWriter.println("  Sentences range from " + i8 + " to " + i9 + " words, with an average length of " + (((i6 * 100) / i) / 100.0d) + " words.");
        printWriter.println("  " + counter6.size() + " phrasal category types, " + counter5.size() + " tag types, and " + hashSet.size() + " word types");
        Set intersection = Sets.intersection(hashSet, new HashSet(Arrays.asList("*", "0", "*T*", "*RNR*", "*U*", "*?*", "*EXP*", "*ICH*", "*NOT*", "*PPA*", "*OP*", "*pro*", "*PRO*")));
        if (intersection.size() > 0) {
            printWriter.println("  Caution! " + intersection.size() + " word types are known empty elements: " + intersection);
        }
        Set intersection2 = Sets.intersection(counter6.keySet(), counter5.keySet());
        if (intersection2.size() > 0) {
            printWriter.println("  Warning! " + intersection2.size() + " items are tags and categories: " + intersection2);
        }
        printWriter.println("    Cats: " + counter6.toString(numberInstance));
        printWriter.println("    Tags: " + counter5.toString(numberInstance));
        printWriter.println("    " + counter3.size() + " start categories: " + counter3.toString(numberInstance));
        if (!counter4.isEmpty()) {
            printWriter.println("    Puncts: " + counter4.toString(numberInstance));
        }
        return stringWriter.toString();
    }

    @Override // java.util.AbstractCollection, java.util.Collection
    public boolean remove(Object obj) {
        throw new UnsupportedOperationException("Treebank is read-only");
    }
}
