package org.dkpro.tc.features.ngram.util;

import de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.ngrams.util.NGramStringListIterable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.dkpro.tc.api.type.TextClassificationTarget;

/* loaded from: input_file:org/dkpro/tc/features/ngram/util/KeywordNGramUtils.class */
public class KeywordNGramUtils {
    public static final String SENTENCE_BOUNDARY = "SB";
    public static final String COMMA = "CA";
    public static final String GLUE = "_";
    public static final String MIDNGRAMGLUE = "_A";

    public static FrequencyDistribution<String> getDocumentKeywordNgrams(JCas jCas, Annotation annotation, int i, int i2, boolean z, boolean z2, boolean z3, Set<String> set) {
        FrequencyDistribution<String> frequencyDistribution = new FrequencyDistribution<>();
        ArrayList arrayList = new ArrayList();
        int i3 = 0;
        int size = JCasUtil.selectCovered(jCas, Sentence.class, annotation).size();
        Iterator it = JCasUtil.selectCovered(jCas, Sentence.class, annotation).iterator();
        while (it.hasNext()) {
            List selectCovered = JCasUtil.selectCovered(Token.class, (Sentence) it.next());
            for (int i4 = 0; i4 < selectCovered.size(); i4++) {
                String lowerCase = ((Token) selectCovered.get(i4)).getCoveredText().toLowerCase();
                String str = "";
                boolean z4 = false;
                for (int i5 = i4; i5 >= 0; i5--) {
                    str = ((Token) selectCovered.get(i5)).getCoveredText().toLowerCase() + " " + str;
                    if (str.endsWith(" ")) {
                        str = str.replace(" ", "");
                    }
                    if (set.contains(str)) {
                        arrayList.add(str.replace(" ", MIDNGRAMGLUE));
                        z4 = true;
                    }
                }
                if (!z4 && set.contains(lowerCase)) {
                    arrayList.add(lowerCase);
                } else if (z3 && lowerCase.equals(",")) {
                    arrayList.add(COMMA);
                }
            }
            String str2 = SENTENCE_BOUNDARY;
            if (z2) {
                str2 = ((double) i3) / ((double) size) < 0.25d ? str2 + "BEG" : ((double) i3) / ((double) size) > 0.75d ? str2 + "END" : str2 + "MID";
            }
            if (z) {
                arrayList.add(str2);
            }
            i3++;
        }
        Iterator it2 = new NGramStringListIterable((String[]) arrayList.toArray(new String[arrayList.size()]), i, i2).iterator();
        while (it2.hasNext()) {
            frequencyDistribution.inc(StringUtils.join((List) it2.next(), GLUE));
        }
        return frequencyDistribution;
    }

    public static FrequencyDistribution<String> getMultipleViewKeywordNgrams(List<JCas> list, int i, int i2, boolean z, boolean z2, boolean z3, Set<String> set) {
        FrequencyDistribution<String> frequencyDistribution = new FrequencyDistribution<>();
        for (JCas jCas : list) {
            FrequencyDistribution<String> documentKeywordNgrams = getDocumentKeywordNgrams(jCas, JCasUtil.selectSingle(jCas, TextClassificationTarget.class), i, i2, z, z2, z3, set);
            for (String str : documentKeywordNgrams.getKeys()) {
                frequencyDistribution.addSample(str, documentKeywordNgrams.getCount(str));
            }
        }
        return frequencyDistribution;
    }
}
