package org.opencastproject.textextractor.tesseract;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Dictionary;
import java.util.List;
import java.util.Objects;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencastproject.textextractor.api.TextExtractor;
import org.opencastproject.textextractor.api.TextExtractorException;
import org.osgi.service.cm.ManagedService;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/opencastproject/textextractor/tesseract/TesseractTextExtractor.class */
public class TesseractTextExtractor implements TextExtractor, ManagedService {
    public static final String TESSERACT_BINARY_DEFAULT = "tesseract";
    public static final String TESSERACT_BINARY_CONFIG_KEY = "org.opencastproject.textanalyzer.tesseract.path";
    public static final String TESSERACT_OPTS_CONFIG_KEY = "org.opencastproject.textanalyzer.tesseract.options";
    private String binary;
    private String addOptions;
    private static final Logger logger = LoggerFactory.getLogger(TesseractTextExtractor.class);
    private static final List<String> stderrFilter = Arrays.asList("Page", "Tesseract Open Source OCR Engine", "Warning: Invalid resolution 0 dpi. Using 70 instead.", "Estimating resolution as ");

    public TesseractTextExtractor() {
        this(TESSERACT_BINARY_DEFAULT);
    }

    public TesseractTextExtractor(String str) {
        this.addOptions = "";
        this.binary = str;
    }

    public void setAdditionalOptions(String str) {
        this.addOptions = str;
    }

    public String getAdditionalOptions() {
        return this.addOptions;
    }

    public List<String> extract(File file) throws TextExtractorException {
        if (this.binary == null) {
            throw new IllegalStateException("Binary is not set");
        }
        File file2 = new File(file.getParentFile(), FilenameUtils.getBaseName(file.getName()));
        List<String> tesseractCommand = getTesseractCommand(file, file2);
        logger.info("Running Tesseract: {}", tesseractCommand);
        try {
            try {
                ProcessBuilder processBuilder = new ProcessBuilder(tesseractCommand);
                processBuilder.redirectErrorStream(true);
                Process start = processBuilder.start();
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(start.getInputStream()));
                while (true) {
                    try {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        String trim = readLine.trim();
                        Stream<String> parallelStream = stderrFilter.parallelStream();
                        Objects.requireNonNull(trim);
                        if (parallelStream.noneMatch(trim::startsWith)) {
                            logger.info(readLine);
                        } else {
                            logger.debug(readLine);
                        }
                    } finally {
                    }
                }
                bufferedReader.close();
                int waitFor = start.waitFor();
                if (waitFor != 0) {
                    throw new TextExtractorException("Tesseract exited abnormally with status " + waitFor);
                }
                File file3 = new File(file2.getAbsolutePath() + ".txt");
                ArrayList arrayList = new ArrayList();
                bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file3), StandardCharsets.UTF_8));
                while (true) {
                    try {
                        String readLine2 = bufferedReader.readLine();
                        if (readLine2 == null) {
                            bufferedReader.close();
                            FileUtils.deleteQuietly(file3);
                            return arrayList;
                        }
                        String trim2 = readLine2.trim();
                        if (!trim2.isEmpty()) {
                            arrayList.add(trim2);
                        }
                    } finally {
                    }
                }
            } catch (IOException | InterruptedException e) {
                throw new TextExtractorException("Error running text extractor " + this.binary, e);
            }
        } catch (Throwable th) {
            FileUtils.deleteQuietly((File) null);
            throw th;
        }
    }

    private List<String> getTesseractCommand(File file, File file2) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(this.binary);
        arrayList.add(file.getAbsolutePath());
        arrayList.add(file2.getAbsolutePath());
        arrayList.addAll(Arrays.asList(StringUtils.split(this.addOptions)));
        return arrayList;
    }

    public void updated(Dictionary dictionary) {
        String str = (String) dictionary.get(TESSERACT_BINARY_CONFIG_KEY);
        if (str != null) {
            logger.info("Setting Tesseract path to {}", str);
            this.binary = str;
        }
        String str2 = (String) dictionary.get(TESSERACT_OPTS_CONFIG_KEY);
        if (str2 != null) {
            logger.info("Setting additional options for Tesseract path to '{}'", str2);
            this.addOptions = str2;
        }
    }

    public void activate(ComponentContext componentContext) {
        String property = componentContext.getBundleContext().getProperty(TESSERACT_BINARY_CONFIG_KEY);
        if (property == null) {
            logger.debug("DEFAULT org.opencastproject.textanalyzer.tesseract.path: tesseract");
        } else {
            this.binary = property;
            logger.info("Setting Tesseract path to binary from config: {}", property);
        }
        String property2 = componentContext.getBundleContext().getProperty(TESSERACT_OPTS_CONFIG_KEY);
        if (property2 != null) {
            logger.info("Setting additional options for Tesseract to '{}'", property2);
            this.addOptions = property2;
        } else {
            logger.info("No additional options for Tesseract");
            this.addOptions = "";
        }
    }
}
