package com.antbrains.nlp.wordseg;

import com.antbrains.nlp.wordseg.Token;
import com.antbrains.nlp.wordseg.luceneanalyzer.OffsetAttribute;
import com.antbrains.nlp.wordseg.luceneanalyzer.StandardTokenizer;
import com.antbrains.nlp.wordseg.luceneanalyzer.TypeAttribute;
import com.antbrains.nlp.wordseg.luceneanalyzer.Version;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:com/antbrains/nlp/wordseg/LuceneSeg.class */
public class LuceneSeg {
    public static List<List<Token>> processByLuceneAnalyzer(String str) {
        ArrayList arrayList = new ArrayList();
        StandardTokenizer standardTokenizer = new StandardTokenizer(Version.LUCENE_29, new StringReader(str));
        OffsetAttribute offsetAttribute = (OffsetAttribute) standardTokenizer.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAttribute = (TypeAttribute) standardTokenizer.addAttribute(TypeAttribute.class);
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        boolean z = false;
        while (standardTokenizer.incrementToken()) {
            try {
                int startOffset = offsetAttribute.startOffset();
                int endOffset = offsetAttribute.endOffset();
                if (i < startOffset) {
                    for (int i2 = i; i2 < startOffset; i2++) {
                        if (arrayList2.size() > 0) {
                            arrayList.add(arrayList2);
                        }
                        arrayList2 = new ArrayList();
                        arrayList2.add(new Token(null, str, i2, i2 + 1, Token.Type.PUNCT));
                    }
                    z = false;
                }
                i = endOffset;
                String type = typeAttribute.type();
                Token token = new Token(str, startOffset, endOffset);
                if (type.equals("<IDEOGRAPHIC>")) {
                    token.setType(Token.Type.CWORD);
                    if (!z) {
                        if (arrayList2.size() > 0) {
                            arrayList.add(arrayList2);
                            arrayList2 = new ArrayList();
                        }
                        z = true;
                    }
                } else {
                    z = false;
                    if (arrayList2.size() > 0) {
                        arrayList.add(arrayList2);
                        arrayList2 = new ArrayList();
                    }
                    if (type.equals("<ALPHANUM>")) {
                        token.setType(Token.Type.ALPHA);
                    } else if (type.equals("<NUM>")) {
                        token.setType(Token.Type.NUMBER);
                    }
                }
                arrayList2.add(token);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (arrayList2.size() > 0) {
            arrayList.add(arrayList2);
            new ArrayList();
        }
        for (int i3 = i; i3 < str.length(); i3++) {
            ArrayList arrayList3 = new ArrayList();
            arrayList3.add(new Token(null, str, i3, i3 + 1, Token.Type.PUNCT));
            arrayList.add(arrayList3);
        }
        try {
            standardTokenizer.close();
        } catch (IOException e2) {
        }
        return arrayList;
    }
}
