package org.dkpro.tc.features.ngram.meta;

import de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import java.io.IOException;
import java.util.Set;
import org.apache.uima.UimaContext;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.dkpro.tc.api.exception.TextClassificationException;
import org.dkpro.tc.api.features.util.FeatureUtil;
import org.dkpro.tc.api.type.TextClassificationTarget;
import org.dkpro.tc.features.ngram.base.LuceneFeatureExtractorBase;
import org.dkpro.tc.features.ngram.base.NGramFeatureExtractorBase;
import org.dkpro.tc.features.ngram.util.NGramUtils;

/* loaded from: input_file:org/dkpro/tc/features/ngram/meta/WordNGramMC.class */
public class WordNGramMC extends LuceneMC {

    @ConfigurationParameter(name = NGramFeatureExtractorBase.PARAM_NGRAM_MIN_N, mandatory = true, defaultValue = {"1"})
    private int ngramMinN;

    @ConfigurationParameter(name = NGramFeatureExtractorBase.PARAM_NGRAM_MAX_N, mandatory = true, defaultValue = {"3"})
    private int ngramMaxN;

    @ConfigurationParameter(name = NGramFeatureExtractorBase.PARAM_NGRAM_STOPWORDS_FILE, mandatory = false)
    private String ngramStopwordsFile;

    @ConfigurationParameter(name = NGramFeatureExtractorBase.PARAM_FILTER_PARTIAL_STOPWORD_MATCHES, mandatory = true, defaultValue = {"false"})
    private boolean filterPartialStopwordMatches;

    @ConfigurationParameter(name = NGramFeatureExtractorBase.PARAM_NGRAM_LOWER_CASE, mandatory = false, defaultValue = {"true"})
    private String stringNgramLowerCase;
    boolean ngramLowerCase = true;
    private Set<String> stopwords;

    @Override // org.dkpro.tc.features.ngram.meta.LuceneMC
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.ngramLowerCase = Boolean.valueOf(this.stringNgramLowerCase).booleanValue();
        try {
            this.stopwords = FeatureUtil.getStopwords(this.ngramStopwordsFile, this.ngramLowerCase);
        } catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
    }

    @Override // org.dkpro.tc.features.ngram.meta.LuceneMC
    protected FrequencyDistribution<String> getNgramsFD(JCas jCas) throws TextClassificationException {
        return NGramUtils.getDocumentNgrams(jCas, new TextClassificationTarget(jCas, 0, jCas.getDocumentText().length()), this.ngramLowerCase, this.filterPartialStopwordMatches, this.ngramMinN, this.ngramMaxN, this.stopwords, Token.class);
    }

    @Override // org.dkpro.tc.features.ngram.meta.LuceneMC
    protected String getFieldName() {
        return LuceneFeatureExtractorBase.LUCENE_NGRAM_FIELD + this.featureExtractorName;
    }
}
