package org.apache.ctakes.dictionary.lookup2.dictionary;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
import org.apache.ctakes.dictionary.lookup2.util.CuiCodeUtil;
import org.apache.ctakes.dictionary.lookup2.util.LookupUtil;
import org.apache.ctakes.dictionary.lookup2.util.collection.ArrayListMap;
import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
import org.apache.log4j.Logger;

/* loaded from: input_file:org/apache/ctakes/dictionary/lookup2/dictionary/RareWordTermMapCreator.class */
public final class RareWordTermMapCreator {
    private static final Logger LOGGER = Logger.getLogger("RareWordTermMapCreator");
    private static final String[] PREFIXES = {"e-", "a-", "u-", "x-", "agro-", "ante-", "anti-", "arch-", "be-", "bi-", "bio-", "co-", "counter-", "cross-", "cyber-", "de-", "eco-", "ex-", "extra-", "inter-", "intra-", "macro-", "mega-", "micro-", "mid-", "mini-", "multi-", "neo-", "non-", "over-", "pan-", "para-", "peri-", "post-", "pre-", "pro-", "pseudo-", "quasi-", "re-", "semi-", "sub-", "super-", "tri-", "ultra-", "un-", "uni-", "vice-", "electro-", "gasto-", "homo-", "hetero-", "ortho-", "phospho-"};
    private static final String[] SUFFIXES = {"-esque", "-ette", "-fest", "-fold", "-gate", "-itis", "-less", "-most", "-o-torium", "-rama", "-wise"};
    private static final String[] BAD_POS_TERMS = {"zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "and", "or", "but", "for", "nor", "so", "yet", "this", "that", "these", "those", "the", "there", "can", "should", "will", "may", "might", "must", "could", "would", "some", "any", "all", "both", "half", "none", "twice", "at", "before", "after", "behind", "beneath", "beside", "between", "into", "through", "across", "of", "concerning", "like", "except", "with", "without", "toward", "to", "past", "against", "during", "until", "throughout", "below", "besides", "beyond", "from", "inside", "near", "outside", "since", "upon", "my", "our", "i", "you", "he", "she", "it", "mine", "yours", "his", "hers", "its", "ours", "theirs", "about", "off", "up", "along", "away", "back", "by", "down", "forward", "in", "on", "out", "over", "around", "under", "to", "what", "whatever", "which", "whichever", "who", "whom", "which", "that", "whoever", "whomever", "how", "where", "when", "however", "wherever", "whenever"};

    /* loaded from: input_file:org/apache/ctakes/dictionary/lookup2/dictionary/RareWordTermMapCreator$CuiTerm.class */
    public static class CuiTerm {
        private final String __term;
        private final Long __cui;
        private final int __hashcode;

        public CuiTerm(String str, String str2) {
            this.__term = RareWordTermMapCreator.getTokenizedTerm(str2);
            this.__cui = CuiCodeUtil.getCuiCode(str);
            this.__hashcode = (this.__cui + "_" + this.__term).hashCode();
        }

        public Long getCui() {
            return this.__cui;
        }

        public String getTerm() {
            return this.__term;
        }

        public boolean equals(Object obj) {
            return (obj instanceof CuiTerm) && this.__term.equals(((CuiTerm) obj).__term) && this.__cui.equals(((CuiTerm) obj).__cui);
        }

        public int hashCode() {
            return this.__hashcode;
        }
    }

    private RareWordTermMapCreator() {
    }

    public static CollectionMap<String, RareWordTerm, List<RareWordTerm>> createRareWordTermMap(Iterable<CuiTerm> iterable) {
        ArrayListMap arrayListMap = new ArrayListMap();
        Map<String, Integer> createTokenCountMap = createTokenCountMap(iterable);
        for (CuiTerm cuiTerm : iterable) {
            String rareWord = getRareWord(cuiTerm.getTerm(), createTokenCountMap);
            int wordIndex = getWordIndex(cuiTerm.getTerm(), rareWord);
            int tokenCount = getTokenCount(cuiTerm.getTerm());
            if (wordIndex < 0) {
                LOGGER.warn("Bad Rare Word Index for " + rareWord + " in " + cuiTerm.getTerm());
            } else {
                arrayListMap.placeValue(rareWord, new RareWordTerm(cuiTerm.getTerm(), cuiTerm.__cui, rareWord, wordIndex, tokenCount));
            }
        }
        return arrayListMap;
    }

    private static Map<String, Integer> createTokenCountMap(Iterable<CuiTerm> iterable) {
        HashMap hashMap = new HashMap();
        Iterator<CuiTerm> it = iterable.iterator();
        while (it.hasNext()) {
            for (String str : LookupUtil.fastSplit(it.next().getTerm(), ' ')) {
                if (isRarableToken(str)) {
                    Integer num = (Integer) hashMap.get(str);
                    if (num == null) {
                        num = 0;
                    }
                    hashMap.put(str, Integer.valueOf(num.intValue() + 1));
                }
            }
        }
        return hashMap;
    }

    private static String getRareWord(String str, Map<String, Integer> map) {
        Integer num;
        String[] fastSplit = LookupUtil.fastSplit(str, ' ');
        if (fastSplit.length == 1) {
            return fastSplit[0];
        }
        String str2 = fastSplit[0];
        int i = Integer.MAX_VALUE;
        for (String str3 : fastSplit) {
            if (isRarableToken(str3) && (num = map.get(str3)) != null && num.intValue() < i) {
                str2 = str3;
                i = num.intValue();
            }
        }
        return str2;
    }

    private static boolean isRarableToken(String str) {
        if (str.length() <= 1) {
            return false;
        }
        boolean z = false;
        int i = 0;
        while (true) {
            if (i >= str.length()) {
                break;
            }
            if (Character.isLetter(str.charAt(i))) {
                z = true;
                break;
            }
            i++;
        }
        if (!z) {
            return false;
        }
        for (String str2 : BAD_POS_TERMS) {
            if (str.equals(str2)) {
                return false;
            }
        }
        return true;
    }

    private static int getWordIndex(String str, String str2) {
        int i = 0;
        for (String str3 : LookupUtil.fastSplit(str, ' ')) {
            if (str3.equals(str2)) {
                return i;
            }
            i++;
        }
        return -1;
    }

    private static int getTokenCount(String str) {
        return LookupUtil.fastSplit(str, ' ').length;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String getTokenizedTerm(String str) {
        if (str.isEmpty()) {
            return str;
        }
        String[] split = str.split("\\s+");
        if (split.length == 0) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        for (String str2 : split) {
            Iterator<String> it = getTokens(str2).iterator();
            while (it.hasNext()) {
                sb.append(it.next()).append(" ");
            }
        }
        sb.setLength(Math.max(0, sb.length() - 1));
        return sb.toString();
    }

    private static List<String> getTokens(String str) {
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        int length = str.length();
        for (int i = 0; i < length; i++) {
            char charAt = str.charAt(i);
            if (Character.isLetterOrDigit(charAt)) {
                sb.append(charAt);
            } else if (charAt != '-') {
                if (sb.length() != 0) {
                    arrayList.add(sb.toString());
                    sb.setLength(0);
                }
                arrayList.add("" + charAt);
            } else if (isPrefix(sb.toString())) {
                sb.append('-');
            } else if (isSuffix(str, i + 1)) {
                sb.append('-');
            } else {
                if (sb.length() != 0) {
                    arrayList.add(sb.toString());
                    sb.setLength(0);
                }
                arrayList.add("" + charAt);
            }
        }
        if (sb.length() != 0) {
            arrayList.add(sb.toString());
        }
        return arrayList;
    }

    private static boolean isPrefix(String str) {
        String str2 = str + "-";
        for (String str3 : PREFIXES) {
            if (str3.equals(str2)) {
                return true;
            }
        }
        return false;
    }

    private static boolean isSuffix(String str, int i) {
        if (str.length() >= i) {
            return false;
        }
        String nextCharTerm = getNextCharTerm(str.substring(i));
        if (nextCharTerm.isEmpty()) {
            return false;
        }
        String str2 = "-" + nextCharTerm;
        for (String str3 : SUFFIXES) {
            if (str3.equals(str2)) {
                return true;
            }
        }
        return false;
    }

    private static String getNextCharTerm(String str) {
        StringBuilder sb = new StringBuilder();
        int length = str.length();
        for (int i = 0; i < length; i++) {
            char charAt = str.charAt(i);
            if (!Character.isLetterOrDigit(charAt)) {
                return sb.toString();
            }
            sb.append(charAt);
        }
        return sb.toString();
    }
}
