package org.dkpro.tc.features.ngram.meta;

import de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution;
import java.io.IOException;
import java.util.Set;
import org.apache.uima.UimaContext;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.dkpro.tc.api.features.util.FeatureUtil;
import org.dkpro.tc.api.type.TextClassificationTarget;
import org.dkpro.tc.features.ngram.KeywordNGram;
import org.dkpro.tc.features.ngram.base.NGramFeatureExtractorBase;
import org.dkpro.tc.features.ngram.util.KeywordNGramUtils;

/* loaded from: input_file:org/dkpro/tc/features/ngram/meta/KeywordNGramMC.class */
public class KeywordNGramMC extends LuceneMC {
    public static final String KEYWORD_NGRAM_FIELD = "keywordngram";

    @ConfigurationParameter(name = NGramFeatureExtractorBase.PARAM_NGRAM_MIN_N, mandatory = true, defaultValue = {"1"})
    private int minN;

    @ConfigurationParameter(name = NGramFeatureExtractorBase.PARAM_NGRAM_MAX_N, mandatory = true, defaultValue = {"3"})
    private int maxN;

    @ConfigurationParameter(name = KeywordNGram.PARAM_NGRAM_KEYWORDS_FILE, mandatory = true)
    private String keywordsFile;

    @ConfigurationParameter(name = KeywordNGram.PARAM_KEYWORD_NGRAM_MARK_SENTENCE_BOUNDARY, mandatory = false, defaultValue = {"true"})
    private boolean markSentenceBoundary;

    @ConfigurationParameter(name = KeywordNGram.PARAM_KEYWORD_NGRAM_MARK_SENTENCE_LOCATION, mandatory = false, defaultValue = {"false"})
    private boolean markSentenceLocation;

    @ConfigurationParameter(name = KeywordNGram.PARAM_KEYWORD_NGRAM_INCLUDE_COMMAS, mandatory = false, defaultValue = {"false"})
    private boolean includeCommas;
    private Set<String> keywords;

    @Override // org.dkpro.tc.features.ngram.meta.LuceneMC
    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            this.keywords = FeatureUtil.getStopwords(this.keywordsFile, true);
        } catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
    }

    @Override // org.dkpro.tc.features.ngram.meta.LuceneMC
    protected FrequencyDistribution<String> getNgramsFD(JCas jCas) {
        return KeywordNGramUtils.getDocumentKeywordNgrams(jCas, new TextClassificationTarget(jCas, 0, jCas.getDocumentText().length()), this.minN, this.maxN, this.markSentenceBoundary, this.markSentenceLocation, this.includeCommas, this.keywords);
    }

    @Override // org.dkpro.tc.features.ngram.meta.LuceneMC
    protected String getFieldName() {
        return KEYWORD_NGRAM_FIELD + this.featureExtractorName;
    }
}
