package org.apache.ctakes.dictionary.lookup2.ae;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.ctakes.core.fsm.token.NumberToken;
import org.apache.ctakes.core.resource.FileResource;
import org.apache.ctakes.core.util.JCasUtil;
import org.apache.ctakes.dictionary.lookup2.concept.ConceptFactory;
import org.apache.ctakes.dictionary.lookup2.dictionary.DictionaryDescriptorParser;
import org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary;
import org.apache.ctakes.dictionary.lookup2.textspan.TextSpan;
import org.apache.ctakes.dictionary.lookup2.util.DictionarySpec;
import org.apache.ctakes.dictionary.lookup2.util.FastLookupToken;
import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
import org.apache.ctakes.dictionary.lookup2.util.collection.HashSetMap;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;

/* loaded from: input_file:org/apache/ctakes/dictionary/lookup2/ae/AbstractJCasTermAnnotator.class */
public abstract class AbstractJCasTermAnnotator extends JCasAnnotator_ImplBase implements JCasTermAnnotator, WindowProcessor {
    private static final Logger LOGGER = Logger.getLogger("AbstractJCasTermAnnotator");
    public static final String PARAM_WINDOW_ANNOT_PRP = "windowAnnotations";
    public static final String PARAM_EXC_TAGS_PRP = "exclusionTags";
    public static final String PARAM_MIN_SPAN_PRP = "minimumSpan";
    private static final String DEFAULT_LOOKUP_WINDOW = "org.apache.ctakes.typesystem.type.textspan.Sentence";
    private static final String DEFAULT_EXCLUSION_TAGS = "VB,VBD,VBG,VBN,VBP,VBZ,CC,CD,DT,EX,IN,LS,MD,PDT,POS,PP,PP$,PRP,PRP$,RP,TO,WDT,WP,WPS,WRB";
    private DictionarySpec _dictionarySpec;
    private int _lookupWindowType;
    private final Set<String> _exclusionPartsOfSpeech = new HashSet();
    protected int _minimumLookupSpan = 3;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            String str = (String) uimaContext.getConfigParameterValue(PARAM_WINDOW_ANNOT_PRP);
            if (str == null || str.isEmpty()) {
                str = DEFAULT_LOOKUP_WINDOW;
            }
            LOGGER.info("Using dictionary lookup window type: " + str);
            this._lookupWindowType = JCasUtil.getType(str);
            String str2 = (String) uimaContext.getConfigParameterValue(PARAM_EXC_TAGS_PRP);
            if (str2 == null) {
                str2 = DEFAULT_EXCLUSION_TAGS;
            }
            for (String str3 : str2.split(",")) {
                this._exclusionPartsOfSpeech.add(str3.toUpperCase());
            }
            ArrayList arrayList = new ArrayList(this._exclusionPartsOfSpeech);
            Collections.sort(arrayList);
            StringBuilder sb = new StringBuilder();
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                sb.append((String) it.next()).append(" ");
            }
            LOGGER.info("Exclusion tagset loaded: " + sb.toString());
            Object configParameterValue = uimaContext.getConfigParameterValue(PARAM_MIN_SPAN_PRP);
            if (configParameterValue != null) {
                this._minimumLookupSpan = parseInt(configParameterValue, PARAM_MIN_SPAN_PRP, this._minimumLookupSpan);
            }
            LOGGER.info("Using minimum term text span: " + this._minimumLookupSpan);
            this._dictionarySpec = DictionaryDescriptorParser.parseDescriptor(((FileResource) uimaContext.getResourceObject(JCasTermAnnotator.DICTIONARY_DESCRIPTOR_KEY)).getFile(), uimaContext);
        } catch (ResourceAccessException | AnnotatorContextException e) {
            throw new ResourceInitializationException(e);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        LOGGER.info("Starting processing");
        AnnotationIndex annotationIndex = jCas.getJFSIndexRepository().getAnnotationIndex(this._lookupWindowType);
        if (annotationIndex == null) {
            return;
        }
        HashMap hashMap = new HashMap(getDictionaries().size());
        Iterator<RareWordDictionary> it = getDictionaries().iterator();
        while (it.hasNext()) {
            hashMap.put(it.next(), new HashSetMap());
        }
        try {
            for (Object obj : annotationIndex) {
                if (isWindowOk((Annotation) obj)) {
                    processWindow(jCas, (Annotation) obj, hashMap);
                }
            }
        } catch (ArrayIndexOutOfBoundsException e) {
            LOGGER.warn(e.getMessage());
        }
        HashSet hashSet = new HashSet();
        HashSetMap hashSetMap = new HashSetMap();
        for (Map.Entry<RareWordDictionary, CollectionMap<TextSpan, Long, ? extends Collection<Long>>> entry : hashMap.entrySet()) {
            hashSet.clear();
            RareWordDictionary key = entry.getKey();
            CollectionMap<TextSpan, Long, ? extends Collection<Long>> value = entry.getValue();
            Iterator<? extends Collection<Long>> it2 = value.getAllCollections().iterator();
            while (it2.hasNext()) {
                hashSet.addAll(it2.next());
            }
            Collection<ConceptFactory> pairedConceptFactories = this._dictionarySpec.getPairedConceptFactories(key.getName());
            hashSetMap.clear();
            Iterator<ConceptFactory> it3 = pairedConceptFactories.iterator();
            while (it3.hasNext()) {
                hashSetMap.placeMap(it3.next().createConcepts(hashSet));
            }
            this._dictionarySpec.getConsumer().consumeHits(jCas, key, value, hashSetMap);
        }
        LOGGER.info("Finished processing");
    }

    @Override // org.apache.ctakes.dictionary.lookup2.ae.JCasTermAnnotator
    public Collection<RareWordDictionary> getDictionaries() {
        return this._dictionarySpec.getDictionaries();
    }

    @Override // org.apache.ctakes.dictionary.lookup2.ae.WindowProcessor
    public boolean isWindowOk(Annotation annotation) {
        String coveredText = annotation.getCoveredText();
        return (coveredText.equals("section id") || coveredText.startsWith("[start section id") || coveredText.startsWith("[end section id") || coveredText.startsWith("[meta rev_")) ? false : true;
    }

    @Override // org.apache.ctakes.dictionary.lookup2.ae.WindowProcessor
    public void processWindow(JCas jCas, Annotation annotation, Map<RareWordDictionary, CollectionMap<TextSpan, Long, ? extends Collection<Long>>> map) {
        List<FastLookupToken> arrayList = new ArrayList<>();
        List<Integer> arrayList2 = new ArrayList<>();
        getAnnotationsInWindow(jCas, annotation, arrayList, arrayList2);
        findTerms(getDictionaries(), arrayList, arrayList2, map);
    }

    private void findTerms(Iterable<RareWordDictionary> iterable, List<FastLookupToken> list, List<Integer> list2, Map<RareWordDictionary, CollectionMap<TextSpan, Long, ? extends Collection<Long>>> map) {
        for (RareWordDictionary rareWordDictionary : iterable) {
            findTerms(rareWordDictionary, list, list2, map.get(rareWordDictionary));
        }
    }

    protected void getAnnotationsInWindow(JCas jCas, AnnotationFS annotationFS, List<FastLookupToken> list, Collection<Integer> collection) {
        String partOfSpeech;
        for (BaseToken baseToken : org.apache.uima.fit.util.JCasUtil.selectCovered(jCas, BaseToken.class, annotationFS)) {
            if (!(baseToken instanceof NewlineToken)) {
                if (!((baseToken instanceof PunctuationToken) || (baseToken instanceof NumberToken) || (baseToken instanceof ContractionToken) || (baseToken instanceof SymbolToken)) && ((partOfSpeech = baseToken.getPartOfSpeech()) == null || !this._exclusionPartsOfSpeech.contains(partOfSpeech))) {
                    collection.add(Integer.valueOf(list.size()));
                }
                list.add(new FastLookupToken(baseToken));
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static int parseInt(Object obj, String str, int i) {
        if (obj instanceof Integer) {
            return ((Integer) obj).intValue();
        }
        if (obj instanceof String) {
            try {
                return Integer.parseInt((String) obj);
            } catch (NumberFormatException e) {
                LOGGER.warn("Could not parse " + str + " " + obj + " as an integer");
            }
        } else {
            LOGGER.warn("Could not parse " + str + " " + obj + " as an integer");
        }
        return i;
    }
}
