package com.antbrains.nlp.wordseg;

import com.antbrains.crf.BESB1B2MTagConvertor;
import com.antbrains.crf.CrfModel;
import com.antbrains.crf.SgdCrf;
import com.antbrains.crf.TagConvertor;
import com.antbrains.nlp.datrie.DoubleArrayTrie;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/antbrains/nlp/wordseg/WordSeg.class */
public class WordSeg {
    protected static Logger logger = Logger.getLogger(WordSeg.class);
    private static WordSeg instance = new WordSeg();
    private CrfModel model;
    TagConvertor tc = new BESB1B2MTagConvertor();
    private ChineseSegmenter cs = ChineseSegmenter.getInstance();

    public ChineseSegmenter getCs() {
        return this.cs;
    }

    private WordSeg() {
        try {
            this.model = this.cs.getModel();
        } catch (Exception e) {
            logger.error(e.getMessage(), e);
        }
    }

    public static WordSeg getInstance() {
        return instance;
    }

    private List<String> tokenToList(List<Token> list) {
        ArrayList arrayList = new ArrayList(list.size());
        Iterator<Token> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getOrigText());
        }
        return arrayList;
    }

    public List<String> mmSeg(String str) {
        return mmSeg(str, null);
    }

    public List<String> mmSeg(String str, DoubleArrayTrie doubleArrayTrie) {
        return (str == null || str.equals("")) ? new ArrayList(0) : tokenToList(this.cs.getMmseg().seg(str, doubleArrayTrie));
    }

    public List<String> rmmSeg(String str) {
        return rmmSeg(str, null);
    }

    public List<String> rmmSeg(String str, DoubleArrayTrie doubleArrayTrie) {
        return (str == null || str.equals("")) ? new ArrayList(0) : tokenToList(this.cs.getRmmseg().seg(str, doubleArrayTrie));
    }

    public List<String> mmRmmSeg(String str) {
        return mmRmmSeg(str, null);
    }

    public List<String> mmRmmSeg(String str, DoubleArrayTrie doubleArrayTrie) {
        return (str == null || str.equals("")) ? new ArrayList(0) : tokenToList(this.cs.seg(str, doubleArrayTrie));
    }

    public List<String> crfSeg(String str) {
        return (str == null || str.equals("")) ? new ArrayList(0) : SgdCrf.segment(str, this.model, this.tc);
    }

    public List<String> segForIndex(String str) {
        return fineSeg(removeNonChars(mmRmmSeg(str)));
    }

    private List<String> removeNonChars(List<String> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.addAll(splitByNonChars(it.next()));
        }
        return arrayList;
    }

    public List<String> splitByNonChars(String str) {
        ArrayList arrayList = new ArrayList(2);
        int i = 0;
        for (int i2 = 0; i2 < str.length(); i2++) {
            char charAt = str.charAt(i2);
            if (!StringTools.isChinese(charAt) && !StringTools.isDigit(charAt) && !StringTools.isEnLetter(charAt)) {
                if (i2 > i) {
                    arrayList.add(str.substring(i, i2));
                }
                arrayList.add(str.substring(i2, i2 + 1));
                i = i2 + 1;
            }
        }
        if (i < str.length()) {
            arrayList.add(str.substring(i));
        }
        return arrayList;
    }

    private List<String> fineSeg(List<String> list) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (i < list.size()) {
            String str = list.get(i);
            if (needMerge(str)) {
                int i2 = i + 1;
                StringBuilder sb = new StringBuilder(str);
                while (i2 < list.size()) {
                    String str2 = list.get(i2);
                    if (!needMerge(str2)) {
                        break;
                    }
                    sb.append(str2);
                    i2++;
                }
                arrayList.add(sb.toString());
                i = i2;
            } else {
                if (str.length() > 3) {
                    arrayList.add(str);
                } else {
                    arrayList.add(str);
                }
                i++;
            }
        }
        return arrayList;
    }

    private boolean needMerge(String str) {
        if (str.length() > 1) {
            return false;
        }
        char charAt = str.charAt(0);
        return StringTools.isChinese(charAt) || StringTools.isDigit(charAt) || StringTools.isEnLetter(charAt);
    }
}
