package org.jpmml.sparkml.feature;

import java.util.Collections;
import java.util.List;
import org.apache.spark.ml.feature.RegexTokenizer;
import org.dmg.pmml.DataType;
import org.dmg.pmml.DerivedField;
import org.dmg.pmml.Expression;
import org.dmg.pmml.OpType;
import org.jpmml.converter.Feature;
import org.jpmml.converter.FeatureUtil;
import org.jpmml.converter.PMMLUtil;
import org.jpmml.sparkml.DocumentFeature;
import org.jpmml.sparkml.FeatureConverter;
import org.jpmml.sparkml.SparkMLEncoder;

/* loaded from: input_file:org/jpmml/sparkml/feature/RegexTokenizerConverter.class */
public class RegexTokenizerConverter extends FeatureConverter<RegexTokenizer> {
    public RegexTokenizerConverter(RegexTokenizer regexTokenizer) {
        super(regexTokenizer);
    }

    @Override // org.jpmml.sparkml.FeatureConverter
    public List<Feature> encodeFeatures(SparkMLEncoder sparkMLEncoder) {
        RegexTokenizer regexTokenizer = (RegexTokenizer) getTransformer();
        if (!regexTokenizer.getGaps()) {
            throw new IllegalArgumentException("Expected splitter mode, got token matching mode");
        }
        if (regexTokenizer.getMinTokenLength() != 1) {
            throw new IllegalArgumentException("Expected 1 as minimum token length, got " + regexTokenizer.getMinTokenLength() + " as minimum token length");
        }
        Feature onlyFeature = sparkMLEncoder.getOnlyFeature(regexTokenizer.getInputCol());
        DerivedField field = onlyFeature.getField();
        if (regexTokenizer.getToLowercase()) {
            field = sparkMLEncoder.createDerivedField(FeatureUtil.createName("lowercase", onlyFeature), OpType.CATEGORICAL, DataType.STRING, PMMLUtil.createApply("lowercase", new Expression[]{onlyFeature.ref()}));
        }
        return Collections.singletonList(new DocumentFeature(sparkMLEncoder, field, regexTokenizer.getPattern()));
    }
}
