package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.WordTag;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.trees.Tree;
import java.io.Serializable;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/parser/lexparser/GermanUnknownWordModel.class */
public class GermanUnknownWordModel implements Serializable {
    private static final String encoding = "UTF-8";
    private static final boolean useFirst = false;
    private static final boolean useGT = false;
    private static final String unknown = "UNK";
    private static final String numberMatch = "[0-9]+\\.?[0-9]*";
    private Map<String, Counter<String>> tagHash = new HashMap();
    private Set seenEnd = new HashSet();
    private Map unknownGT = new HashMap();
    private static final long serialVersionUID = 221;
    private static boolean useEnd = true;
    private static boolean useFirstCap = true;
    private static int endLength = 2;

    public GermanUnknownWordModel(Options.LexOptions lexOptions) {
        endLength = lexOptions.unknownSuffixSize;
        useEnd = lexOptions.unknownSuffixSize > 0 && lexOptions.useUnknownWordSignatures > 0;
        useFirstCap = lexOptions.useUnknownWordSignatures > 0;
    }

    public double score(IntTaggedWord intTaggedWord) {
        return score(intTaggedWord.toTaggedWord());
    }

    public double score(TaggedWord taggedWord) {
        double count;
        String word = taggedWord.word();
        String tag = taggedWord.tag();
        if (word.matches(numberMatch)) {
            count = tag.equals("CARD") ? 0.0d : Double.NEGATIVE_INFINITY;
        } else if (useEnd || useFirstCap) {
            String signature = getSignature(word);
            if (!this.seenEnd.contains(signature)) {
                signature = "UNK";
            }
            Counter<String> counter = this.tagHash.get(tag);
            count = counter == null ? Double.NEGATIVE_INFINITY : counter.keySet().contains(signature) ? counter.getCount(signature) : counter.getCount("UNK");
        } else {
            System.err.println("Warning: no unknown word model in place!\nGiving the combination " + word + " " + tag + " zero probability.");
            count = Double.NEGATIVE_INFINITY;
        }
        return count;
    }

    private double scoreGT(String str) {
        return this.unknownGT.containsKey(str) ? ((Double) this.unknownGT.get(str)).doubleValue() : Double.NEGATIVE_INFINITY;
    }

    private String getSignature(String str) {
        String str2 = "";
        int length = str.length() - 1;
        if (useFirstCap) {
            String substring = str.substring(0, 1);
            str2 = substring.equals(substring.toUpperCase()) ? str2 + "C" : str2 + "c";
        }
        if (useEnd) {
            str2 = str2 + str.substring(length - endLength > 0 ? length - endLength : 0, length);
        }
        return str2;
    }

    public void train(Collection<Tree> collection) {
        if (useEnd) {
            System.out.println("treating unknown word as the average of their equivalents by identity of last three letters.");
        }
        trainUnknownGT(collection);
        HashMap hashMap = new HashMap();
        Counter counter = new Counter();
        Iterator<Tree> it = collection.iterator();
        while (it.hasNext()) {
            for (TaggedWord taggedWord : it.next().taggedYield()) {
                String signature = getSignature(taggedWord.word());
                String tag = taggedWord.tag();
                if (!hashMap.containsKey(tag)) {
                    hashMap.put(tag, new Counter());
                }
                ((Counter) hashMap.get(tag)).incrementCount(signature);
                counter.incrementCount(tag);
                this.seenEnd.add(signature);
            }
        }
        for (String str : hashMap.keySet()) {
            Counter counter2 = (Counter) hashMap.get(str);
            if (!this.tagHash.containsKey(str)) {
                this.tagHash.put(str, new Counter<>());
            }
            counter.incrementCount(str);
            counter2.setCount((Counter) "UNK", 1.0d);
            for (String str2 : counter2.keySet()) {
                this.tagHash.get(str).setCount((Counter<String>) str2, Math.log(counter2.getCount(str2) / counter.getCount(str)));
            }
        }
    }

    private void trainUnknownGT(Collection collection) {
        Counter counter = new Counter();
        Counter counter2 = new Counter();
        Counter counter3 = new Counter();
        Counter counter4 = new Counter();
        Counter counter5 = new Counter();
        HashSet hashSet = new HashSet();
        int i = 0;
        Iterator it = collection.iterator();
        while (it.hasNext()) {
            for (TaggedWord taggedWord : ((Tree) it.next()).taggedYield()) {
                i++;
                WordTag wordTag = toWordTag(taggedWord);
                String word = wordTag.word();
                String tag = wordTag.tag();
                counter2.incrementCount(wordTag);
                counter.incrementCount(taggedWord);
                counter3.incrementCount(tag);
                hashSet.add(word);
            }
        }
        System.out.println("Total tokens: " + i);
        System.out.println("Total WordTag types: " + counter2.keySet().size());
        System.out.println("Total TaggedWord types: " + counter.keySet().size());
        System.out.println("Total tag types: " + counter3.keySet().size());
        System.out.println("Total word types: " + hashSet.size());
        for (WordTag wordTag2 : counter2.keySet()) {
            if (counter2.getCount(wordTag2) == 1.0d) {
                counter4.incrementCount(wordTag2.tag());
            }
        }
        for (String str : counter3.keySet()) {
            Iterator it2 = hashSet.iterator();
            while (it2.hasNext()) {
                if (!counter2.keySet().contains(new WordTag((String) it2.next(), str))) {
                    counter5.incrementCount(str);
                }
            }
        }
        for (String str2 : counter3.keySet()) {
            this.unknownGT.put(str2, new Double(Math.log(counter4.getCount(str2) / (counter3.getCount(str2) * counter5.getCount(str2)))));
        }
    }

    private static WordTag toWordTag(TaggedWord taggedWord) {
        return new WordTag(taggedWord.word(), taggedWord.tag());
    }
}
