package com.atlassian.confluence.extra.officeconnector.index.excel;

import com.atlassian.bonnie.search.SearchableAttachment;
import com.atlassian.bonnie.search.extractor.BaseAttachmentContentExtractor;
import com.atlassian.bonnie.search.extractor.ExtractorException;
import com.atlassian.confluence.extra.office.OfficeFile;
import com.atlassian.confluence.extra.officeconnector.index.powerpoint.SecureXmlUtils;
import com.atlassian.confluence.extra.officeconnector.index.util.AbstractLengthLimitedStringBuilder;
import com.atlassian.confluence.extra.officeconnector.index.util.LimitReachedException;
import com.atlassian.confluence.extra.officeconnector.index.util.StaticLengthLimitedStringBuilder;
import com.atlassian.util.profiling.UtilTimerStack;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.xml.parsers.SAXParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:com/atlassian/confluence/extra/officeconnector/index/excel/ExcelXMLTextExtractor.class */
public class ExcelXMLTextExtractor extends BaseAttachmentContentExtractor {
    private static final Logger log = LoggerFactory.getLogger(ExcelXMLTextExtractor.class);
    private static final String[] CONTENT_TYPES = (String[]) OfficeFile.getMimeTypesFor(OfficeFile.Type.Excel, OfficeFile.Version.V2007).toArray(new String[0]);
    private static final String[] EXTENSIONS = (String[]) OfficeFile.getExtensionsFor(OfficeFile.Type.Excel, OfficeFile.Version.V2007).toArray(new String[0]);
    public static final String MAX_LENGTH_PROPERTY = "officeconnector.excel.extractor.maxlength";
    public static final int DEFAULT_MAX_LENGTH = 1048576;

    /* loaded from: input_file:com/atlassian/confluence/extra/officeconnector/index/excel/ExcelXMLTextExtractor$CapturingType.class */
    private enum CapturingType {
        INDEXED,
        LITERAL
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/atlassian/confluence/extra/officeconnector/index/excel/ExcelXMLTextExtractor$PartHandler.class */
    public class PartHandler extends DefaultHandler {
        private final AbstractLengthLimitedStringBuilder outputBuffer;
        private List<String> strings;
        private String capturingElement = null;
        private CapturingType type = null;
        private StringBuilder buffer = new StringBuilder();

        public PartHandler(AbstractLengthLimitedStringBuilder abstractLengthLimitedStringBuilder, List<String> list) {
            this.outputBuffer = abstractLengthLimitedStringBuilder;
            this.strings = list;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if (!str3.equals("c") && !str3.equals("si")) {
                if (str3.equals("sheet")) {
                    appendToOutput(attributes.getValue("name"));
                    return;
                } else {
                    ExcelXMLTextExtractor.log.debug("skipping [ {} ]", str3);
                    return;
                }
            }
            if ("s".equals(attributes.getValue("t"))) {
                this.type = CapturingType.INDEXED;
            } else {
                this.type = CapturingType.LITERAL;
            }
            ExcelXMLTextExtractor.log.debug("capturing [ {} ]", str3);
            this.capturingElement = str3;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            if (this.capturingElement != null) {
                this.buffer.append(cArr, i, i2);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (this.capturingElement == null || !str3.equals(this.capturingElement)) {
                return;
            }
            this.capturingElement = null;
            switch (this.type) {
                case INDEXED:
                    appendToOutput(this.strings.get(Integer.parseInt(this.buffer.toString())));
                    break;
                case LITERAL:
                    appendToOutput(this.buffer.toString());
                    break;
            }
            this.type = null;
            this.buffer.setLength(0);
        }

        private void appendToOutput(String str) {
            this.outputBuffer.append(' ').append(str);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/atlassian/confluence/extra/officeconnector/index/excel/ExcelXMLTextExtractor$SharedStringsHandler.class */
    public class SharedStringsHandler extends DefaultHandler {
        private final ArrayList<String> strings;
        private int uniqueCount;
        private boolean capturingString = false;
        private boolean capturingStringPart = false;
        private final StringBuilder buffer = new StringBuilder();

        public SharedStringsHandler(ArrayList<String> arrayList) {
            this.strings = arrayList;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if (str3.equals("si")) {
                this.capturingString = true;
            }
            if (this.capturingString && str3.equals("t")) {
                this.capturingStringPart = true;
            } else if (str3.equals("t")) {
                ExcelXMLTextExtractor.log.error("expected <si> before <t>");
            }
            if (str3.equals("sst")) {
                this.uniqueCount = Integer.valueOf(attributes.getValue("uniqueCount")).intValue();
                this.strings.ensureCapacity(this.uniqueCount);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            if (this.capturingStringPart) {
                this.buffer.append(cArr, i, i2);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (str3.equals("t")) {
                this.capturingStringPart = false;
            }
            if (str3.equals("si")) {
                this.strings.add(this.buffer.toString());
                this.buffer.setLength(0);
                this.capturingString = false;
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endDocument() throws SAXException {
            if (this.strings.size() != this.uniqueCount) {
                ExcelXMLTextExtractor.log.error("expected [ {} ] entries but read [ {} ]", Integer.valueOf(this.uniqueCount), Integer.valueOf(this.strings.size()));
            } else {
                ExcelXMLTextExtractor.log.debug("read [ {} ] shared strings", Integer.valueOf(this.strings.size()));
            }
        }
    }

    protected String extractText(InputStream inputStream, SearchableAttachment searchableAttachment) throws ExtractorException {
        try {
            SAXParser newSAXParser = SecureXmlUtils.createSecureSAXParserFactory().newSAXParser();
            newSAXParser.getXMLReader().setEntityResolver(SecureXmlUtils.emptyEntityResolver);
            try {
                UtilTimerStack.push("extractText() -- Pass 1 - Read shared strings");
                ZipInputStream zipInputStream = new ZipInputStream(searchableAttachment.getContentsAsStream());
                ArrayList<String> arrayList = new ArrayList<>();
                while (true) {
                    ZipEntry nextEntry = zipInputStream.getNextEntry();
                    if (nextEntry == null) {
                        break;
                    }
                    newSAXParser.reset();
                    if (nextEntry.getName().equals("xl/sharedStrings.xml")) {
                        try {
                            parseSharedStrings(newSAXParser, nextEntry, zipInputStream, arrayList);
                        } catch (LimitReachedException e) {
                            log.info("Not enough memory to read '{}'. The attachment won't be searchable.", searchableAttachment.getFileName());
                            return "";
                        }
                    }
                }
                zipInputStream.close();
                UtilTimerStack.pop("extractText() -- Pass 1 - Read shared strings");
                UtilTimerStack.push("extractText() -- Pass 2 - Build output string");
                ZipInputStream zipInputStream2 = new ZipInputStream(searchableAttachment.getContentsAsStream());
                StaticLengthLimitedStringBuilder staticLengthLimitedStringBuilder = new StaticLengthLimitedStringBuilder(getMaxLength());
                while (true) {
                    ZipEntry nextEntry2 = zipInputStream2.getNextEntry();
                    if (nextEntry2 == null) {
                        break;
                    }
                    newSAXParser.reset();
                    if (!nextEntry2.getName().equals("xl/sharedStrings.xml")) {
                        parseEntry(newSAXParser, nextEntry2, zipInputStream2, staticLengthLimitedStringBuilder, arrayList);
                    }
                }
                zipInputStream2.close();
                UtilTimerStack.pop("extractText() -- Pass 2 - Build output string");
                if (staticLengthLimitedStringBuilder.isLimitReached()) {
                    log.info("Not enough memory to extract all text from '{}'. The attachment will be partially searchable.", searchableAttachment.getFileName());
                }
                return staticLengthLimitedStringBuilder.toString();
            } catch (IOException e2) {
                throw new ExtractorException("Error reading content of Excel document: " + e2.getMessage(), e2);
            }
        } catch (Exception e3) {
            throw new ExtractorException("could not create SAX parser", e3);
        }
    }

    private void parseSharedStrings(SAXParser sAXParser, ZipEntry zipEntry, final ZipInputStream zipInputStream, ArrayList<String> arrayList) throws ExtractorException {
        log.debug("parsing shared strings [ {} ]", zipEntry.getName());
        try {
            sAXParser.parse(new BufferedInputStream(zipInputStream) { // from class: com.atlassian.confluence.extra.officeconnector.index.excel.ExcelXMLTextExtractor.1
                @Override // java.io.BufferedInputStream, java.io.FilterInputStream, java.io.InputStream, java.io.Closeable, java.lang.AutoCloseable
                public void close() throws IOException {
                    zipInputStream.closeEntry();
                }
            }, new SharedStringsHandler(arrayList));
        } catch (LimitReachedException e) {
            throw e;
        } catch (Exception e2) {
            throw new ExtractorException("Could not create SAX Parser", e2);
        }
    }

    private void parseEntry(SAXParser sAXParser, ZipEntry zipEntry, final ZipInputStream zipInputStream, AbstractLengthLimitedStringBuilder abstractLengthLimitedStringBuilder, List<String> list) throws ExtractorException {
        if (!zipEntry.getName().endsWith(".xml")) {
            log.debug("skipping [ {} ]", zipEntry.getName());
            return;
        }
        log.debug("parsing [ {} ]", zipEntry.getName());
        try {
            sAXParser.parse(new BufferedInputStream(zipInputStream) { // from class: com.atlassian.confluence.extra.officeconnector.index.excel.ExcelXMLTextExtractor.2
                @Override // java.io.BufferedInputStream, java.io.FilterInputStream, java.io.InputStream, java.io.Closeable, java.lang.AutoCloseable
                public void close() throws IOException {
                    zipInputStream.closeEntry();
                }
            }, new PartHandler(abstractLengthLimitedStringBuilder, list));
        } catch (Exception e) {
            throw new ExtractorException("Could not create SAX Parser", e);
        }
    }

    protected String[] getMatchingContentTypes() {
        return CONTENT_TYPES;
    }

    protected String[] getMatchingFileExtensions() {
        return EXTENSIONS;
    }

    int getMaxLength() {
        int i = 1048576;
        String property = System.getProperty(MAX_LENGTH_PROPERTY);
        if (property != null) {
            try {
                i = Integer.parseInt(property);
            } catch (NumberFormatException e) {
                log.error("Failed to parse property officeconnector.excel.extractor.maxlength", e);
            }
        }
        return i / 2;
    }
}
