package com.atlassian.confluence.extra.officeconnector.index.excel;

import com.atlassian.bonnie.search.SearchableAttachment;
import com.atlassian.bonnie.search.extractor.BaseAttachmentContentExtractor;
import com.atlassian.bonnie.search.extractor.ExtractorException;
import com.atlassian.confluence.extra.office.OfficeFile;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.lang.time.StopWatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:com/atlassian/confluence/extra/officeconnector/index/excel/ExcelXMLTextExtractor.class */
public class ExcelXMLTextExtractor extends BaseAttachmentContentExtractor {
    private static final Logger log = LoggerFactory.getLogger(ExcelXMLTextExtractor.class);
    private static final String[] CONTENT_TYPES = (String[]) OfficeFile.getMimeTypesFor(OfficeFile.Type.Excel, OfficeFile.Version.V2007).toArray(new String[0]);
    private static final String[] EXTENSIONS = (String[]) OfficeFile.getExtensionsFor(OfficeFile.Type.Excel, OfficeFile.Version.V2007).toArray(new String[0]);

    /* loaded from: input_file:com/atlassian/confluence/extra/officeconnector/index/excel/ExcelXMLTextExtractor$CapturingType.class */
    private enum CapturingType {
        INDEXED,
        LITERAL
    }

    /* loaded from: input_file:com/atlassian/confluence/extra/officeconnector/index/excel/ExcelXMLTextExtractor$IndexedTextEntry.class */
    private class IndexedTextEntry implements TextEntry {
        private final int index;
        private final List<String> strings;

        public IndexedTextEntry(int i, List<String> list) {
            this.index = i;
            this.strings = list;
        }

        @Override // com.atlassian.confluence.extra.officeconnector.index.excel.ExcelXMLTextExtractor.TextEntry
        public String getText() {
            return this.strings.get(this.index);
        }
    }

    /* loaded from: input_file:com/atlassian/confluence/extra/officeconnector/index/excel/ExcelXMLTextExtractor$LiteralTextEntry.class */
    private class LiteralTextEntry implements TextEntry {
        private final String string;

        private LiteralTextEntry(String str) {
            this.string = str;
        }

        @Override // com.atlassian.confluence.extra.officeconnector.index.excel.ExcelXMLTextExtractor.TextEntry
        public String getText() {
            return this.string;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/atlassian/confluence/extra/officeconnector/index/excel/ExcelXMLTextExtractor$PartHandler.class */
    public class PartHandler extends DefaultHandler {
        private final List<TextEntry> entries;
        private List<String> strings;
        private String capturingElement = null;
        private CapturingType type = null;
        private final StringBuilder buffer = new StringBuilder();

        public PartHandler(List<TextEntry> list, List<String> list2) {
            this.entries = list;
            this.strings = list2;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if (!str3.equals("c") && !str3.equals("si")) {
                if (str3.equals("sheet")) {
                    this.entries.add(new LiteralTextEntry(attributes.getValue("name")));
                    return;
                } else {
                    ExcelXMLTextExtractor.log.debug("skipping [ " + str3 + " ]");
                    return;
                }
            }
            if ("s".equals(attributes.getValue("t"))) {
                this.type = CapturingType.INDEXED;
            } else {
                this.type = CapturingType.LITERAL;
            }
            ExcelXMLTextExtractor.log.debug("capturing [ " + str3 + " ]");
            this.capturingElement = str3;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            if (this.capturingElement != null) {
                this.buffer.append(cArr, i, i2);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (this.capturingElement == null || !str3.equals(this.capturingElement)) {
                return;
            }
            this.capturingElement = null;
            switch (this.type) {
                case INDEXED:
                    this.entries.add(new IndexedTextEntry(Integer.parseInt(this.buffer.toString()), this.strings));
                    break;
                case LITERAL:
                    this.entries.add(new LiteralTextEntry(this.buffer.toString()));
                    break;
            }
            this.type = null;
            this.buffer.setLength(0);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/atlassian/confluence/extra/officeconnector/index/excel/ExcelXMLTextExtractor$SharedStringsHandler.class */
    public class SharedStringsHandler extends DefaultHandler {
        private final List<String> strings;
        private int uniqueCount;
        private boolean capturingString = false;
        private final StringBuilder buffer = new StringBuilder();

        public SharedStringsHandler(List<String> list) {
            this.strings = list;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if (str3.equals("t")) {
                this.capturingString = true;
            }
            if (str3.equals("sst")) {
                this.uniqueCount = Integer.valueOf(attributes.getValue("uniqueCount")).intValue();
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            if (this.capturingString) {
                this.buffer.append(cArr, i, i2);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (str3.equals("t")) {
                this.strings.add(this.buffer.toString());
                this.buffer.setLength(0);
                this.capturingString = false;
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endDocument() throws SAXException {
            if (this.strings.size() != this.uniqueCount) {
                ExcelXMLTextExtractor.log.error("expected [ " + this.uniqueCount + " ] entries but read [ " + this.strings.size() + " ]");
            } else {
                ExcelXMLTextExtractor.log.debug("read [ " + this.strings.size() + " ] shared strings");
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/atlassian/confluence/extra/officeconnector/index/excel/ExcelXMLTextExtractor$TextEntry.class */
    public interface TextEntry {
        String getText();
    }

    public static void main(String[] strArr) throws Exception {
        StopWatch stopWatch = new StopWatch();
        stopWatch.start();
        new ExcelXMLTextExtractor().extractText(new FileInputStream("/Users/mjensen/Desktop/large+4.xlsx"), null);
        stopWatch.stop();
        System.out.println("Used memory: " + new DecimalFormat("#,###").format(Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()));
        System.out.println(stopWatch.toString());
    }

    protected String extractText(InputStream inputStream, SearchableAttachment searchableAttachment) throws ExtractorException {
        try {
            SAXParser newSAXParser = SAXParserFactory.newInstance().newSAXParser();
            ArrayList arrayList = new ArrayList();
            try {
                ZipInputStream zipInputStream = new ZipInputStream(inputStream);
                ArrayList arrayList2 = new ArrayList();
                while (true) {
                    ZipEntry nextEntry = zipInputStream.getNextEntry();
                    if (nextEntry == null) {
                        break;
                    }
                    newSAXParser.reset();
                    if (nextEntry.getName().equals("xl/sharedStrings.xml")) {
                        parseSharedStrings(newSAXParser, nextEntry, zipInputStream, arrayList2);
                    } else {
                        parseEntry(newSAXParser, nextEntry, zipInputStream, arrayList, arrayList2);
                    }
                }
                StringBuilder sb = new StringBuilder();
                Iterator<TextEntry> it = arrayList.iterator();
                while (it.hasNext()) {
                    sb.append(" ").append(it.next().getText());
                }
                return sb.toString();
            } catch (IOException e) {
                throw new ExtractorException("Error reading content of Excel document: " + e.getMessage(), e);
            }
        } catch (Exception e2) {
            throw new ExtractorException("could not create SAX parser", e2);
        }
    }

    private void parseSharedStrings(SAXParser sAXParser, ZipEntry zipEntry, final ZipInputStream zipInputStream, List<String> list) throws ExtractorException {
        log.debug("parsing shared strings [ " + zipEntry.getName() + " ]");
        try {
            sAXParser.parse(new BufferedInputStream(zipInputStream) { // from class: com.atlassian.confluence.extra.officeconnector.index.excel.ExcelXMLTextExtractor.1
                @Override // java.io.BufferedInputStream, java.io.FilterInputStream, java.io.InputStream, java.io.Closeable, java.lang.AutoCloseable
                public void close() throws IOException {
                    zipInputStream.closeEntry();
                }
            }, new SharedStringsHandler(list));
        } catch (Exception e) {
            throw new ExtractorException("Could not create SAX Parser", e);
        }
    }

    private void parseEntry(SAXParser sAXParser, ZipEntry zipEntry, final ZipInputStream zipInputStream, List<TextEntry> list, List<String> list2) throws ExtractorException {
        if (!zipEntry.getName().endsWith(".xml")) {
            log.debug("skipping [ " + zipEntry.getName() + " ]");
            return;
        }
        log.debug("parsing [ " + zipEntry.getName() + " ]");
        try {
            sAXParser.parse(new BufferedInputStream(zipInputStream) { // from class: com.atlassian.confluence.extra.officeconnector.index.excel.ExcelXMLTextExtractor.2
                @Override // java.io.BufferedInputStream, java.io.FilterInputStream, java.io.InputStream, java.io.Closeable, java.lang.AutoCloseable
                public void close() throws IOException {
                    zipInputStream.closeEntry();
                }
            }, new PartHandler(list, list2));
        } catch (Exception e) {
            throw new ExtractorException("Could not create SAX Parser", e);
        }
    }

    protected String[] getMatchingContentTypes() {
        return CONTENT_TYPES;
    }

    protected String[] getMatchingFileExtensions() {
        return EXTENSIONS;
    }
}
