package com.antbrains.nlp.wordseg;

import com.antbrains.nlp.datrie.DoubleArrayTrie;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/* loaded from: input_file:com/antbrains/nlp/wordseg/MMSeg.class */
public class MMSeg {
    private DoubleArrayTrie trie;
    private MaxMatchByHashset mm;
    private static final int MAX_LEN = 10;
    private static final int MAX_TRI_WORDS = 1000000;
    private boolean useHash;
    private ReadWriteLock lock;
    private String cnNumbers;
    public boolean processNumber;

    public MMSeg(List<String> list) {
        this(list, list.size() > MAX_TRI_WORDS);
    }

    public MMSeg(List<String> list, boolean z) {
        this.useHash = false;
        this.lock = new ReentrantReadWriteLock();
        this.cnNumbers = "零一二三四五六七八九十百千万亿";
        this.processNumber = false;
        this.mm = new MaxMatchByHashset(10);
        this.trie = new DoubleArrayTrie();
        this.useHash = z;
        addWordList(list);
    }

    public int getSize() {
        return this.mm.getTotalWords() + this.trie.size();
    }

    private void addWordList(List<String> list) {
        this.lock.writeLock().lock();
        try {
            if (this.useHash) {
                Iterator<String> it = list.iterator();
                while (it.hasNext()) {
                    String trim = it.next().trim();
                    if (trim.length() >= 2) {
                        if (trim.length() > 10) {
                            this.trie.coverInsert(trim, 0);
                        } else {
                            this.mm.add(trim);
                        }
                    }
                }
            } else {
                Iterator<String> it2 = list.iterator();
                while (it2.hasNext()) {
                    String trim2 = it2.next().trim();
                    if (trim2.length() >= 2) {
                        this.trie.coverInsert(trim2, 0);
                    }
                }
            }
        } finally {
            this.lock.writeLock().unlock();
        }
    }

    public MMSeg() {
        this.useHash = false;
        this.lock = new ReentrantReadWriteLock();
        this.cnNumbers = "零一二三四五六七八九十百千万亿";
        this.processNumber = false;
        this.trie = new DoubleArrayTrie();
        this.mm = new MaxMatchByHashset(10);
    }

    public void addWords(List<String> list) {
        addWordList(list);
    }

    public MMSeg(String str) {
        this.useHash = false;
        this.lock = new ReentrantReadWriteLock();
        this.cnNumbers = "零一二三四五六七八九十百千万亿";
        this.processNumber = false;
        try {
            addWordList(FileTools.readFile2List(str, "UTF-8"));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private int find(String str, int i) {
        this.lock.readLock().lock();
        try {
            int[] find = this.trie.find(str, i);
            if (find[0] > 0) {
                int i2 = find[0];
                this.lock.readLock().unlock();
                return i2;
            }
            int find2 = this.mm.find(str, i);
            this.lock.readLock().unlock();
            return find2;
        } catch (Throwable th) {
            this.lock.readLock().unlock();
            throw th;
        }
    }

    private int findInTrie(DoubleArrayTrie doubleArrayTrie, String str, int i) {
        if (doubleArrayTrie == null) {
            return 0;
        }
        return doubleArrayTrie.find(str, i)[0];
    }

    public List<Token> seg(String str, DoubleArrayTrie doubleArrayTrie) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (i < str.length()) {
            String substring = str.substring(i, i + 1);
            int max = Math.max(find(str, i), findInTrie(doubleArrayTrie, str, i));
            if (this.processNumber && this.cnNumbers.contains(substring)) {
                int i2 = i + 1;
                while (i2 < str.length()) {
                    if (!this.cnNumbers.contains(str.substring(i2, i2 + 1))) {
                        break;
                    }
                    i2++;
                }
                if (i2 - i > 1 && i2 - i > max) {
                    arrayList.add(new Token(str, i, i2));
                    i += (i2 - i) - 1;
                    i++;
                }
            }
            if (max > 1) {
                arrayList.add(new Token(str, i, i + max));
                i += max - 1;
            } else {
                arrayList.add(new Token(str, i, i + 1));
            }
            i++;
        }
        return arrayList;
    }
}
