package edu.stanford.nlp.process;

import edu.stanford.nlp.ling.FeatureLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.util.StringUtils;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/process/PTBTokenizer.class */
public class PTBTokenizer<T> extends AbstractTokenizer<T> {
    private boolean tokenizeCRs;
    private boolean invertible;
    private boolean suppressEscaping;
    private PTBLexer lexer;
    private LexedTokenFactory<T> tokenFactory;

    /* loaded from: input_file:edu/stanford/nlp/process/PTBTokenizer$PTBTokenizerFactory.class */
    public static class PTBTokenizerFactory<T> implements TokenizerFactory<T> {
        protected boolean tokenizeCRs;
        protected boolean invertible;
        protected boolean suppressEscaping;
        protected LexedTokenFactory<T> factory;

        public static PTBTokenizerFactory<Word> newPTBTokenizerFactory() {
            return newPTBTokenizerFactory(false);
        }

        public static PTBTokenizerFactory<Word> newPTBTokenizerFactory(boolean z) {
            return new PTBTokenizerFactory<>(z, new WordTokenFactory());
        }

        public PTBTokenizerFactory(boolean z, LexedTokenFactory<T> lexedTokenFactory) {
            this(z, false, false, lexedTokenFactory);
        }

        public static PTBTokenizerFactory<FeatureLabel> newPTBTokenizerFactory(boolean z, boolean z2) {
            return new PTBTokenizerFactory<>(z, z2, new FeatureLabelTokenFactory());
        }

        public static PTBTokenizerFactory<Word> newPTBTokenizerFactory(boolean z, boolean z2, boolean z3) {
            return new PTBTokenizerFactory<>(z, z2, z3, new WordTokenFactory());
        }

        private PTBTokenizerFactory(boolean z, boolean z2, LexedTokenFactory<T> lexedTokenFactory) {
            this(z, z2, false, lexedTokenFactory);
        }

        private PTBTokenizerFactory(boolean z, boolean z2, boolean z3, LexedTokenFactory<T> lexedTokenFactory) {
            this.suppressEscaping = false;
            this.tokenizeCRs = z;
            this.invertible = z2;
            this.suppressEscaping = z3;
            this.factory = lexedTokenFactory;
        }

        @Override // edu.stanford.nlp.objectbank.IteratorFromReaderFactory
        public Iterator<T> getIterator(Reader reader) {
            return getTokenizer(reader);
        }

        @Override // edu.stanford.nlp.objectbank.TokenizerFactory
        public Tokenizer<T> getTokenizer(Reader reader) {
            return new PTBTokenizer(reader, this.tokenizeCRs, this.invertible, this.suppressEscaping, this.factory);
        }
    }

    public static PTBTokenizer<Word> newPTBTokenizer(Reader reader) {
        return newPTBTokenizer(reader, false);
    }

    public static PTBTokenizer<Word> newPTBTokenizer(Reader reader, boolean z) {
        return new PTBTokenizer<>(reader, z, new WordTokenFactory());
    }

    public static PTBTokenizer<FeatureLabel> newPTBTokenizer(Reader reader, boolean z, boolean z2) {
        return new PTBTokenizer<>(reader, z, z2, new FeatureLabelTokenFactory());
    }

    public PTBTokenizer(Reader reader, boolean z, LexedTokenFactory<T> lexedTokenFactory) {
        this(reader, z, false, lexedTokenFactory);
    }

    private PTBTokenizer(Reader reader, boolean z, boolean z2, LexedTokenFactory<T> lexedTokenFactory) {
        this(reader, z, z2, false, lexedTokenFactory);
    }

    private PTBTokenizer(Reader reader, boolean z, boolean z2, boolean z3, LexedTokenFactory<T> lexedTokenFactory) {
        this.tokenizeCRs = z;
        this.tokenFactory = lexedTokenFactory;
        this.invertible = z2;
        this.suppressEscaping = z3;
        setSource(reader);
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // edu.stanford.nlp.process.AbstractTokenizer
    protected T getNext() {
        if (this.lexer == null) {
            return null;
        }
        T t = null;
        try {
            t = this.lexer.next();
            while (!this.tokenizeCRs && PTBLexer.cr.equals(((HasWord) t).word())) {
                t = this.lexer.next();
            }
        } catch (Exception e) {
            this.nextToken = null;
        }
        return t;
    }

    public void setSource(Reader reader) {
        if (this.invertible) {
            this.lexer = new PTBLexer(reader, this.invertible, this.tokenizeCRs);
        } else {
            this.lexer = new PTBLexer(reader, this.tokenFactory, this.tokenizeCRs, this.suppressEscaping);
        }
    }

    public static String ptb2Text(String str) {
        StringBuilder sb = new StringBuilder(str.length());
        PTB2TextLexer pTB2TextLexer = new PTB2TextLexer(new StringReader(str));
        while (true) {
            try {
                String next = pTB2TextLexer.next();
                if (next == null) {
                    break;
                }
                sb.append(next);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return sb.toString();
    }

    public static String ptb2Text(List list) {
        int size = list.size();
        for (int i = 0; i < size; i++) {
            if (list.get(i) instanceof Word) {
                list.set(i, ((Word) list.get(i)).word());
            }
        }
        return ptb2Text(StringUtils.join(list));
    }

    public static TokenizerFactory<Word> factory() {
        return PTBTokenizerFactory.newPTBTokenizerFactory();
    }

    public static TokenizerFactory<Word> factory(boolean z) {
        return PTBTokenizerFactory.newPTBTokenizerFactory(z);
    }

    public static <T> TokenizerFactory<T> factory(boolean z, LexedTokenFactory<T> lexedTokenFactory) {
        return new PTBTokenizerFactory(z, lexedTokenFactory);
    }

    public static TokenizerFactory<FeatureLabel> factory(boolean z, boolean z2) {
        return PTBTokenizerFactory.newPTBTokenizerFactory(z, z2);
    }

    public static TokenizerFactory<Word> factory(boolean z, boolean z2, boolean z3) {
        return PTBTokenizerFactory.newPTBTokenizerFactory(z, z2, z3);
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length < 1) {
            System.err.println("usage: java edu.stanford.nlp.process.PTBTokenizer [options]* filename+");
            System.err.println("  options: -nl|-preserveLines|-dump|-ioFileList|-charset|-parseInside");
            return;
        }
        int i = 0;
        String str = "utf-8";
        Pattern pattern = null;
        Pattern pattern2 = null;
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        while (strArr[i].charAt(0) == '-') {
            if ("-nl".equals(strArr[i])) {
                z = true;
            } else if ("-preserveLines".equals(strArr[i])) {
                z2 = true;
                z = true;
            } else if ("-dump".equals(strArr[i])) {
                z4 = true;
            } else if ("-ioFileList".equals(strArr[i])) {
                z3 = true;
            } else if ("-charset".equals(strArr[i]) && i < strArr.length - 1) {
                i++;
                str = strArr[i];
            } else if (!"-parseInside".equals(strArr[i]) || i >= strArr.length - 1) {
                System.err.println("Unknown option: " + strArr[i]);
            } else {
                i++;
                try {
                    pattern = Pattern.compile("<(?:" + strArr[i] + ")>");
                    pattern2 = Pattern.compile("</(?:" + strArr[i] + ")>");
                } catch (Exception e) {
                    pattern = null;
                    pattern2 = null;
                }
            }
            i++;
        }
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = null;
        if (z3) {
            arrayList2 = new ArrayList();
            for (int i2 = i; i2 < strArr.length; i2++) {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(strArr[i2]), str));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine != null) {
                        String[] split = readLine.split("\\s+");
                        arrayList.add(split[0]);
                        arrayList2.add(split[1]);
                    }
                }
                bufferedReader.close();
            }
        } else {
            for (int i3 = i; i3 < strArr.length; i3++) {
                arrayList.add(strArr[i3]);
            }
        }
        int size = arrayList.size();
        for (int i4 = 0; i4 < size; i4++) {
            BufferedReader bufferedReader2 = new BufferedReader(new InputStreamReader(new FileInputStream((String) arrayList.get(i4)), str));
            PrintWriter printWriter = arrayList2 == null ? new PrintWriter((OutputStream) System.out, true) : new PrintWriter((Writer) new BufferedWriter(new OutputStreamWriter(new FileOutputStream((String) arrayList2.get(i4)), str)), true);
            PTBTokenizer<FeatureLabel> newPTBTokenizer = newPTBTokenizer(bufferedReader2, z, true);
            boolean z5 = pattern == null;
            boolean z6 = true;
            while (newPTBTokenizer.hasNext()) {
                FeatureLabel next = newPTBTokenizer.next();
                String word = next.word();
                if (pattern != null && pattern.matcher(word).matches()) {
                    z5 = true;
                } else if (pattern2 != null && pattern2.matcher(word).matches()) {
                    z5 = false;
                } else if (z5) {
                    if (z4) {
                        word = next.toString();
                    }
                    if (!z2) {
                        printWriter.println(word);
                    } else if (PTBLexer.cr.equals(word)) {
                        z6 = true;
                        printWriter.println("");
                    } else {
                        if (z6) {
                            z6 = false;
                        } else {
                            printWriter.print(" ");
                        }
                        printWriter.print(word);
                    }
                }
            }
            bufferedReader2.close();
            if (arrayList2 != null) {
                printWriter.close();
            }
        }
    }
}
