/*
 * Decompiled with CFR 0.152.
 */
package opennlp.grok.preprocess.tokenize;

import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import opennlp.common.preprocess.Tokenizer;
import opennlp.common.util.ObjectIntPair;
import opennlp.common.util.PerlHelp;
import opennlp.common.xml.NLPDocument;
import opennlp.common.xml.XmlUtils;
import opennlp.grok.preprocess.tokenize.TokContextGenerator;
import opennlp.grok.preprocess.tokenize.TokEventCollector;
import opennlp.maxent.ContextGenerator;
import opennlp.maxent.EventCollector;
import opennlp.maxent.EventCollectorAsStream;
import opennlp.maxent.EventStream;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.maxent.MaxentModel;
import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
import org.jdom.Element;

public class TokenizerME
implements Tokenizer {
    private MaxentModel model;
    private final ContextGenerator cg = new TokContextGenerator();

    public TokenizerME(MaxentModel mod) {
        this.model = mod;
    }

    public void process(NLPDocument doc) {
        Iterator i = doc.wordIterator();
        while (i.hasNext()) {
            Element parentToken;
            String tokenType;
            Element oldWord = (Element)i.next();
            String[] tokenized = this.tokenize(oldWord.getText());
            if (tokenized.length <= 1 || (tokenType = (parentToken = oldWord.getParent()).getAttributeValue("type")) != null) continue;
            ArrayList<Element> $toks = new ArrayList<Element>(tokenized.length);
            int j = 0;
            while (j < tokenized.length) {
                $toks.add(NLPDocument.createTOK((String)tokenized[j]));
                ++j;
            }
            XmlUtils.replace((Element)parentToken, $toks);
        }
    }

    public Set requires() {
        HashSet set = new HashSet();
        return set;
    }

    public String[] tokenize(String s) {
        String[] toksByWhitespace = PerlHelp.splitByWhitespace((String)s);
        ArrayList<String> tokens = new ArrayList<String>();
        int i = 0;
        while (i < toksByWhitespace.length) {
            String tok = toksByWhitespace[i];
            if (tok.length() > 0 && PerlHelp.isAlphanumeric((String)tok)) {
                tokens.add(tok);
            } else {
                String suffixTok;
                int index = 0;
                int end = tok.length();
                StringBuffer sb = new StringBuffer(tok);
                int j = 0;
                while (j < end) {
                    double[] probs = this.model.eval(this.cg.getContext((Object)new ObjectIntPair((Object)sb, j)));
                    String best = this.model.getBestOutcome(probs);
                    char c = sb.charAt(j);
                    if (!(!best.equals("T") && c != '?' || index > j - 1 || c == '.' && i != tokens.size() - 1)) {
                        int nextIndex = j <= end - 1 && (c == 'x' || c == 's' || c == 'z') && sb.charAt(j + 1) == '\'' || j <= end - 2 && sb.charAt(j + 1) == '\'' && sb.charAt(j + 2) == 's' || j <= end - 3 && sb.charAt(j + 1) == '\'' && sb.charAt(j + 2) == 'r' && sb.charAt(j + 3) == 'e' ? j + 1 : j;
                        String tokToAdd = sb.substring(index, nextIndex);
                        if (tokToAdd.length() > 0) {
                            tokens.add(tokToAdd);
                        }
                        index = nextIndex;
                    }
                    ++j;
                }
                if (index <= end && (suffixTok = sb.substring(index, end)).length() > 0) {
                    tokens.add(suffixTok);
                }
            }
            ++i;
        }
        String[] tokenSA = new String[tokens.size()];
        tokens.toArray(tokenSA);
        return tokenSA;
    }

    public static void train(String[] args) {
        try {
            FileReader datafr = new FileReader(new File(args[0]));
            File output = new File(args[1]);
            EventCollectorAsStream evc = new EventCollectorAsStream((EventCollector)new TokEventCollector(datafr));
            GISModel tokMod = GIS.trainModel((EventStream)evc, (int)100, (int)10);
            new SuffixSensitiveGISModelWriter(tokMod, output).persist();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        TokenizerME.train(args);
    }
}

