package io.basestar.spark;

import com.google.common.collect.ImmutableList;
import io.basestar.util.Nullsafe;
import java.util.List;
import java.util.stream.Stream;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;

/* loaded from: input_file:io/basestar/spark/MD5BucketTransform.class */
public class MD5BucketTransform implements Transform<Dataset<Row>, Dataset<Row>> {
    private final List<String> idColumns;
    private final String bucketColumnName;
    private final int len;

    /* loaded from: input_file:io/basestar/spark/MD5BucketTransform$Builder.class */
    public static class Builder {
        private List<String> idColumns;
        private String bucketColumnName;
        private Integer len;

        Builder() {
        }

        public Builder idColumns(List<String> list) {
            this.idColumns = list;
            return this;
        }

        public Builder bucketColumnName(String str) {
            this.bucketColumnName = str;
            return this;
        }

        public Builder len(Integer num) {
            this.len = num;
            return this;
        }

        public MD5BucketTransform build() {
            return new MD5BucketTransform(this.idColumns, this.bucketColumnName, this.len);
        }

        public String toString() {
            return "MD5BucketTransform.Builder(idColumns=" + this.idColumns + ", bucketColumnName=" + this.bucketColumnName + ", len=" + this.len + ")";
        }
    }

    MD5BucketTransform(List<String> list, String str, Integer num) {
        this.idColumns = (List) Nullsafe.option(list, ImmutableList.of("id"));
        this.bucketColumnName = (String) Nullsafe.require(str);
        this.len = ((Integer) Nullsafe.option(num, 1)).intValue();
        if (this.len < 1) {
            throw new IllegalStateException("MD5 substring length must be at least 1");
        }
        if (this.len > 4) {
            throw new IllegalStateException("MD5 substring longer than 4 will create over 1M buckets");
        }
    }

    @Override // io.basestar.spark.Transform
    public Dataset<Row> accept(Dataset<Row> dataset) {
        Stream<String> stream = this.idColumns.stream();
        dataset.getClass();
        return dataset.withColumn(this.bucketColumnName, functions.md5(functions.concat_ws("", (Column[]) stream.map(dataset::col).toArray(i -> {
            return new Column[i];
        }))).substr(0, this.len));
    }

    public static Builder builder() {
        return new Builder();
    }
}
