package org.dkpro.tc.features.ngram.meta;

import de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution;
import de.tudarmstadt.ukp.dkpro.core.ngrams.util.CharacterNGramStringIterable;
import java.util.Iterator;
import org.apache.uima.UimaContext;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.dkpro.tc.api.type.TextClassificationTarget;
import org.dkpro.tc.features.ngram.base.NGramFeatureExtractorBase;

/* loaded from: input_file:org/dkpro/tc/features/ngram/meta/CharacterNGramMC.class */
public class CharacterNGramMC extends LuceneMC {
    public static final String LUCENE_CHAR_NGRAM_FIELD = "charngram";

    @ConfigurationParameter(name = NGramFeatureExtractorBase.PARAM_NGRAM_MIN_N, mandatory = true, defaultValue = {"1"})
    private int ngramMinN;

    @ConfigurationParameter(name = NGramFeatureExtractorBase.PARAM_NGRAM_MAX_N, mandatory = true, defaultValue = {"3"})
    private int ngramMaxN;

    @ConfigurationParameter(name = NGramFeatureExtractorBase.PARAM_NGRAM_LOWER_CASE, mandatory = false, defaultValue = {"true"})
    private String stringLowerCase;
    boolean lowerCase = true;

    @Override // org.dkpro.tc.features.ngram.meta.LuceneMC
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.lowerCase = Boolean.valueOf(this.stringLowerCase).booleanValue();
    }

    @Override // org.dkpro.tc.features.ngram.meta.LuceneMC
    protected FrequencyDistribution<String> getNgramsFD(JCas jCas) {
        return getAnnotationCharacterNgrams(new TextClassificationTarget(jCas, 0, jCas.getDocumentText().length()), this.lowerCase, this.ngramMinN, this.ngramMaxN, '^', '$');
    }

    @Override // org.dkpro.tc.features.ngram.meta.LuceneMC
    protected String getFieldName() {
        return LUCENE_CHAR_NGRAM_FIELD + this.featureExtractorName;
    }

    public static FrequencyDistribution<String> getAnnotationCharacterNgrams(Annotation annotation, boolean z, int i, int i2, char c, char c2) {
        FrequencyDistribution<String> frequencyDistribution = new FrequencyDistribution<>();
        Iterator it = new CharacterNGramStringIterable(c + annotation.getCoveredText() + c2, i, i2).iterator();
        while (it.hasNext()) {
            String str = (String) it.next();
            if (z) {
                str = str.toLowerCase();
            }
            frequencyDistribution.inc(str);
        }
        return frequencyDistribution;
    }
}
