package org.opencastproject.textextractor.tesseract;

import com.entwinemedia.fn.Pred;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Dictionary;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.opencastproject.textextractor.api.TextExtractor;
import org.opencastproject.textextractor.api.TextExtractorException;
import org.opencastproject.textextractor.api.TextFrame;
import org.opencastproject.util.ProcessRunner;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.cm.ManagedService;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/opencastproject/textextractor/tesseract/TesseractTextExtractor.class */
public class TesseractTextExtractor implements TextExtractor, ManagedService {
    public static final String TESSERACT_BINARY_DEFAULT = "tesseract";
    public static final String TESSERACT_BINARY_CONFIG_KEY = "org.opencastproject.textanalyzer.tesseract.path";
    public static final String TESSERACT_OPTS_CONFIG_KEY = "org.opencastproject.textanalyzer.tesseract.options";
    protected String binary;
    protected String addOptions;
    private static final Logger logger = LoggerFactory.getLogger(TesseractTextExtractor.class);
    private static final Pred<String> fnLogDebug = new Pred<String>() { // from class: org.opencastproject.textextractor.tesseract.TesseractTextExtractor.2
        public Boolean apply(String str) {
            TesseractTextExtractor.logger.debug(str);
            return true;
        }
    };

    public TesseractTextExtractor() {
        this(TESSERACT_BINARY_DEFAULT);
    }

    public TesseractTextExtractor(String str) {
        this.binary = null;
        this.addOptions = "";
        this.binary = str;
    }

    public String getBinary() {
        return this.binary;
    }

    public void setAdditionalOptions(String str) {
        this.addOptions = str;
    }

    public String getAdditionalOptions() {
        return this.addOptions;
    }

    public void setBinary(String str) {
        this.binary = str;
    }

    public TextFrame extract(File file) throws TextExtractorException {
        if (this.binary == null) {
            throw new IllegalStateException("Binary is not set");
        }
        File file2 = new File(file.getParentFile(), FilenameUtils.getBaseName(file.getName()));
        String analysisOptions = getAnalysisOptions(file, file2);
        logger.info("Running Tesseract: {} {}", this.binary, analysisOptions);
        try {
            try {
                int run = ProcessRunner.run(ProcessRunner.mk(this.binary, analysisOptions), fnLogDebug, new Pred<String>() { // from class: org.opencastproject.textextractor.tesseract.TesseractTextExtractor.1
                    public Boolean apply(String str) {
                        if (!str.trim().startsWith("Page") && !str.trim().startsWith("Tesseract Open Source OCR Engine")) {
                            TesseractTextExtractor.logger.warn(str);
                        }
                        return true;
                    }
                });
                if (run != 0) {
                    throw new TextExtractorException("Text analyzer " + this.binary + " exited with code " + run);
                }
                File file3 = new File(file2.getAbsolutePath() + ".txt");
                FileInputStream fileInputStream = new FileInputStream(file3);
                TextFrame parse = TesseractTextFrame.parse(fileInputStream);
                fileInputStream.close();
                IOUtils.closeQuietly(fileInputStream);
                FileUtils.deleteQuietly(file3);
                return parse;
            } catch (IOException e) {
                throw new TextExtractorException("Error running text extractor " + this.binary, e);
            }
        } catch (Throwable th) {
            IOUtils.closeQuietly((InputStream) null);
            FileUtils.deleteQuietly((File) null);
            throw th;
        }
    }

    protected String getAnalysisOptions(File file, File file2) {
        return file.getAbsolutePath() + " " + file2.getAbsolutePath() + " " + this.addOptions;
    }

    public void updated(Dictionary dictionary) throws ConfigurationException {
        String str = (String) dictionary.get(TESSERACT_BINARY_CONFIG_KEY);
        if (str != null) {
            logger.info("Setting Tesseract path to {}", str);
            this.binary = str;
        }
        String str2 = (String) dictionary.get(TESSERACT_OPTS_CONFIG_KEY);
        if (str2 != null) {
            logger.info("Setting additional options for Tesseract path to '{}'", str2);
            this.addOptions = str2;
        }
    }

    public void activate(ComponentContext componentContext) {
        String property = componentContext.getBundleContext().getProperty(TESSERACT_BINARY_CONFIG_KEY);
        if (property == null) {
            logger.debug("DEFAULT org.opencastproject.textanalyzer.tesseract.path: tesseract");
        } else {
            setBinary(property);
            logger.info("Setting Tesseract path to binary from config: {}", property);
        }
        String property2 = componentContext.getBundleContext().getProperty(TESSERACT_OPTS_CONFIG_KEY);
        if (property2 != null) {
            logger.info("Setting additional options for Tesseract to '{}'", property2);
            this.addOptions = property2;
        } else {
            logger.info("No additional options for Tesseract");
            this.addOptions = "";
        }
    }
}
