package org.apache.lucene.analysis.shingle;

import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;

/* loaded from: input_file:WEB-INF/lib/lucene-analyzers-common-8.5.0.jar:org/apache/lucene/analysis/shingle/ShingleFilter.class */
public final class ShingleFilter extends TokenFilter {
    public static final String DEFAULT_FILLER_TOKEN = "_";
    public static final int DEFAULT_MAX_SHINGLE_SIZE = 2;
    public static final int DEFAULT_MIN_SHINGLE_SIZE = 2;
    public static final String DEFAULT_TOKEN_TYPE = "shingle";
    public static final String DEFAULT_TOKEN_SEPARATOR = " ";
    private LinkedList<InputWindowToken> inputWindow;
    private CircularSequence gramSize;
    private StringBuilder gramBuilder;
    private String tokenType;
    private String tokenSeparator;
    private char[] fillerToken;
    private boolean outputUnigrams;
    private boolean outputUnigramsIfNoShingles;
    private int maxShingleSize;
    private int minShingleSize;
    private int numFillerTokensToInsert;
    private AttributeSource nextInputStreamToken;
    private boolean isNextInputStreamToken;
    private boolean isOutputHere;
    boolean noShingleOutput;
    private AttributeSource.State endState;
    private final CharTermAttribute termAtt;
    private final OffsetAttribute offsetAtt;
    private final PositionIncrementAttribute posIncrAtt;
    private final PositionLengthAttribute posLenAtt;
    private final TypeAttribute typeAtt;
    private boolean exhausted;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/lucene-analyzers-common-8.5.0.jar:org/apache/lucene/analysis/shingle/ShingleFilter$CircularSequence.class */
    public class CircularSequence {
        private int value;
        private int previousValue;
        private int minValue;

        public CircularSequence() {
            this.minValue = ShingleFilter.this.outputUnigrams ? 1 : ShingleFilter.this.minShingleSize;
            reset();
        }

        public int getValue() {
            return this.value;
        }

        public void advance() {
            this.previousValue = this.value;
            if (this.value == 1) {
                this.value = ShingleFilter.this.minShingleSize;
            } else if (this.value == ShingleFilter.this.maxShingleSize) {
                reset();
            } else {
                this.value++;
            }
        }

        public void reset() {
            int i = this.minValue;
            this.value = i;
            this.previousValue = i;
        }

        public boolean atMinValue() {
            return this.value == this.minValue;
        }

        public int getPreviousValue() {
            return this.previousValue;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/lucene-analyzers-common-8.5.0.jar:org/apache/lucene/analysis/shingle/ShingleFilter$InputWindowToken.class */
    public static class InputWindowToken {
        final AttributeSource attSource;
        final CharTermAttribute termAtt;
        final OffsetAttribute offsetAtt;
        boolean isFiller = false;

        public InputWindowToken(AttributeSource attributeSource) {
            this.attSource = attributeSource;
            this.termAtt = (CharTermAttribute) attributeSource.getAttribute(CharTermAttribute.class);
            this.offsetAtt = (OffsetAttribute) attributeSource.getAttribute(OffsetAttribute.class);
        }
    }

    public ShingleFilter(TokenStream tokenStream, int i, int i2) {
        super(tokenStream);
        this.inputWindow = new LinkedList<>();
        this.gramBuilder = new StringBuilder();
        this.tokenType = "shingle";
        this.tokenSeparator = " ";
        this.fillerToken = DEFAULT_FILLER_TOKEN.toCharArray();
        this.outputUnigrams = true;
        this.outputUnigramsIfNoShingles = false;
        this.isNextInputStreamToken = false;
        this.isOutputHere = false;
        this.noShingleOutput = true;
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        this.posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
        this.posLenAtt = (PositionLengthAttribute) addAttribute(PositionLengthAttribute.class);
        this.typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
        setMaxShingleSize(i2);
        setMinShingleSize(i);
    }

    public ShingleFilter(TokenStream tokenStream, int i) {
        this(tokenStream, 2, i);
    }

    public ShingleFilter(TokenStream tokenStream) {
        this(tokenStream, 2, 2);
    }

    public ShingleFilter(TokenStream tokenStream, String str) {
        this(tokenStream, 2, 2);
        setTokenType(str);
    }

    public void setTokenType(String str) {
        this.tokenType = str;
    }

    public void setOutputUnigrams(boolean z) {
        this.outputUnigrams = z;
        this.gramSize = new CircularSequence();
    }

    public void setOutputUnigramsIfNoShingles(boolean z) {
        this.outputUnigramsIfNoShingles = z;
    }

    public void setMaxShingleSize(int i) {
        if (i < 2) {
            throw new IllegalArgumentException("Max shingle size must be >= 2");
        }
        this.maxShingleSize = i;
    }

    public void setMinShingleSize(int i) {
        if (i < 2) {
            throw new IllegalArgumentException("Min shingle size must be >= 2");
        }
        if (i > this.maxShingleSize) {
            throw new IllegalArgumentException("Min shingle size must be <= max shingle size");
        }
        this.minShingleSize = i;
        this.gramSize = new CircularSequence();
    }

    public void setTokenSeparator(String str) {
        this.tokenSeparator = null == str ? "" : str;
    }

    public void setFillerToken(String str) {
        this.fillerToken = null == str ? new char[0] : str.toCharArray();
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public boolean incrementToken() throws IOException {
        boolean z = false;
        int i = 0;
        if (this.gramSize.atMinValue() || this.inputWindow.size() < this.gramSize.getValue()) {
            shiftInputWindow();
            this.gramBuilder.setLength(0);
        } else {
            i = this.gramSize.getPreviousValue();
        }
        if (this.inputWindow.size() >= this.gramSize.getValue()) {
            boolean z2 = true;
            InputWindowToken inputWindowToken = null;
            Iterator<InputWindowToken> it = this.inputWindow.iterator();
            int i2 = 1;
            while (it.hasNext() && i < this.gramSize.getValue()) {
                inputWindowToken = it.next();
                if (i < i2) {
                    if (i > 0) {
                        this.gramBuilder.append(this.tokenSeparator);
                    }
                    this.gramBuilder.append(inputWindowToken.termAtt.buffer(), 0, inputWindowToken.termAtt.length());
                    i++;
                }
                if (!z2 || !inputWindowToken.isFiller) {
                    z2 = false;
                } else if (i2 == this.gramSize.getValue()) {
                    this.gramSize.advance();
                }
                i2++;
            }
            if (!z2 && i == this.gramSize.getValue()) {
                this.inputWindow.getFirst().attSource.copyTo(this);
                this.posIncrAtt.setPositionIncrement(this.isOutputHere ? 0 : 1);
                this.termAtt.setEmpty().append(this.gramBuilder);
                if (this.gramSize.getValue() > 1) {
                    this.typeAtt.setType(this.tokenType);
                    this.noShingleOutput = false;
                }
                this.offsetAtt.setOffset(this.offsetAtt.startOffset(), inputWindowToken.offsetAtt.endOffset());
                if (this.outputUnigrams) {
                    this.posLenAtt.setPositionLength(i);
                } else {
                    this.posLenAtt.setPositionLength(Math.max(1, (i - this.minShingleSize) + 1));
                }
                this.isOutputHere = true;
                this.gramSize.advance();
                z = true;
            }
        }
        return z;
    }

    private InputWindowToken getNextToken(InputWindowToken inputWindowToken) throws IOException {
        InputWindowToken inputWindowToken2 = inputWindowToken;
        if (this.numFillerTokensToInsert > 0) {
            if (null == inputWindowToken) {
                inputWindowToken2 = new InputWindowToken(this.nextInputStreamToken.cloneAttributes());
            } else {
                this.nextInputStreamToken.copyTo(inputWindowToken.attSource);
            }
            inputWindowToken2.offsetAtt.setOffset(inputWindowToken2.offsetAtt.startOffset(), inputWindowToken2.offsetAtt.startOffset());
            inputWindowToken2.termAtt.copyBuffer(this.fillerToken, 0, this.fillerToken.length);
            inputWindowToken2.isFiller = true;
            this.numFillerTokensToInsert--;
        } else if (this.isNextInputStreamToken) {
            if (null == inputWindowToken) {
                inputWindowToken2 = new InputWindowToken(this.nextInputStreamToken.cloneAttributes());
            } else {
                this.nextInputStreamToken.copyTo(inputWindowToken.attSource);
            }
            this.isNextInputStreamToken = false;
            inputWindowToken2.isFiller = false;
        } else if (this.exhausted) {
            inputWindowToken2 = null;
        } else if (this.input.incrementToken()) {
            if (null == inputWindowToken) {
                inputWindowToken2 = new InputWindowToken(cloneAttributes());
            } else {
                copyTo(inputWindowToken.attSource);
            }
            if (this.posIncrAtt.getPositionIncrement() > 1) {
                this.numFillerTokensToInsert = Math.min(this.posIncrAtt.getPositionIncrement() - 1, this.maxShingleSize - 1);
                if (null == this.nextInputStreamToken) {
                    this.nextInputStreamToken = cloneAttributes();
                } else {
                    copyTo(this.nextInputStreamToken);
                }
                this.isNextInputStreamToken = true;
                inputWindowToken2.offsetAtt.setOffset(this.offsetAtt.startOffset(), this.offsetAtt.startOffset());
                inputWindowToken2.termAtt.copyBuffer(this.fillerToken, 0, this.fillerToken.length);
                inputWindowToken2.isFiller = true;
                this.numFillerTokensToInsert--;
            } else {
                inputWindowToken2.isFiller = false;
            }
        } else {
            this.exhausted = true;
            this.input.end();
            this.endState = captureState();
            this.numFillerTokensToInsert = Math.min(this.posIncrAtt.getPositionIncrement(), this.maxShingleSize - 1);
            if (this.numFillerTokensToInsert > 0) {
                this.nextInputStreamToken = new AttributeSource(getAttributeFactory());
                this.nextInputStreamToken.addAttribute(CharTermAttribute.class);
                ((OffsetAttribute) this.nextInputStreamToken.addAttribute(OffsetAttribute.class)).setOffset(this.offsetAtt.endOffset(), this.offsetAtt.endOffset());
                return getNextToken(inputWindowToken);
            }
            inputWindowToken2 = null;
        }
        return inputWindowToken2;
    }

    @Override // org.apache.lucene.analysis.TokenFilter, org.apache.lucene.analysis.TokenStream
    public void end() throws IOException {
        if (this.exhausted) {
            restoreState(this.endState);
        } else {
            super.end();
        }
    }

    private void shiftInputWindow() throws IOException {
        InputWindowToken inputWindowToken = null;
        if (this.inputWindow.size() > 0) {
            inputWindowToken = this.inputWindow.removeFirst();
        }
        while (this.inputWindow.size() < this.maxShingleSize) {
            if (null == inputWindowToken) {
                InputWindowToken nextToken = getNextToken(null);
                if (null == nextToken) {
                    break;
                } else {
                    this.inputWindow.add(nextToken);
                }
            } else {
                if (null == getNextToken(inputWindowToken)) {
                    break;
                }
                this.inputWindow.add(inputWindowToken);
                inputWindowToken = null;
            }
        }
        if (this.outputUnigramsIfNoShingles && this.noShingleOutput && this.gramSize.minValue > 1 && this.inputWindow.size() < this.minShingleSize) {
            this.gramSize.minValue = 1;
        }
        this.gramSize.reset();
        this.isOutputHere = false;
    }

    @Override // org.apache.lucene.analysis.TokenFilter, org.apache.lucene.analysis.TokenStream
    public void reset() throws IOException {
        super.reset();
        this.gramSize.reset();
        this.inputWindow.clear();
        this.nextInputStreamToken = null;
        this.isNextInputStreamToken = false;
        this.numFillerTokensToInsert = 0;
        this.isOutputHere = false;
        this.noShingleOutput = true;
        this.exhausted = false;
        this.endState = null;
        if (!this.outputUnigramsIfNoShingles || this.outputUnigrams) {
            return;
        }
        this.gramSize.minValue = this.minShingleSize;
    }
}
