package com.atlassian.confluence.plugins.sharelinks.metaextractor;

import com.atlassian.confluence.plugins.sharelinks.DOMMetadataExtractor;
import com.atlassian.confluence.plugins.sharelinks.LinkMetaData;
import com.atlassian.confluence.plugins.sharelinks.LinkMetaDataExtractor;
import com.atlassian.confluence.util.http.HttpRequest;
import com.atlassian.confluence.util.http.HttpResponse;
import com.atlassian.confluence.util.http.HttpRetrievalService;
import com.atlassian.plugins.whitelist.NotAuthorizedException;
import com.atlassian.plugins.whitelist.OutboundWhitelist;
import com.google.common.collect.ImmutableList;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.HeaderElement;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

/* loaded from: input_file:com/atlassian/confluence/plugins/sharelinks/metaextractor/MasterLinkMetaDataExtractor.class */
public class MasterLinkMetaDataExtractor implements LinkMetaDataExtractor {
    private static final int MAX_HEAD_SIZE = 131072;
    private static final int DESCRIPTION_MAX_LENGTH = 180;
    private static final int DOMAIN_MAX_LENGTH = 50;
    private static final int EXCERPT_URL_MAX_LENGTH = 30;
    private final OutboundWhitelist outboundWhitelist;
    private final List<DOMMetadataExtractor> metadataExtractors;
    private final HttpRetrievalService httpRetrievalService;
    private static final Pattern UNTIL_END_HEAD_OR_EOF_PATTERN = Pattern.compile(".*?</head>|.*", 34);
    private static final Logger log = Logger.getLogger(MasterLinkMetaDataExtractor.class);

    public MasterLinkMetaDataExtractor(HttpRetrievalService httpRetrievalService, OutboundWhitelist outboundWhitelist) {
        this.httpRetrievalService = httpRetrievalService;
        this.outboundWhitelist = outboundWhitelist;
        this.metadataExtractors = ImmutableList.of(new OpenGraphDOMMetadataExtractor(), new TwitterDOMMetadataExtractor(), new SimpleDOMMetadataExtractor(httpRetrievalService));
    }

    @Override // com.atlassian.confluence.plugins.sharelinks.LinkMetaDataExtractor
    public LinkMetaData parseMetaData(String str, boolean z) throws URISyntaxException, NotAuthorizedException {
        if (!str.startsWith("http://") && !str.startsWith("https://")) {
            str = "http://" + str;
        }
        LinkMetaData linkMetaData = new LinkMetaData(str);
        linkMetaData.setExcerptedURL(getExcerptedUrl(str));
        URI uri = new URI(str);
        linkMetaData.setDomain(getPreviewText(StringUtils.isBlank(uri.getHost()) ? str : uri.getHost(), DOMAIN_MAX_LENGTH));
        String headHtmlData = getHeadHtmlData(str, linkMetaData);
        Document parse = Jsoup.parse(headHtmlData);
        if (!headHtmlData.isEmpty()) {
            Iterator<DOMMetadataExtractor> it = this.metadataExtractors.iterator();
            while (it.hasNext()) {
                it.next().updateMetadata(linkMetaData, parse);
            }
        }
        if (z) {
            linkMetaData.setDescription(getPreviewText(linkMetaData.getDescription(), DESCRIPTION_MAX_LENGTH));
        }
        return linkMetaData;
    }

    private String getHeadHtmlData(String str, LinkMetaData linkMetaData) throws NotAuthorizedException {
        if (!this.outboundWhitelist.isAllowed(URI.create(str))) {
            log.error("Not authorized to access this url. Please contact admin to add this url to whitelist.");
            throw new NotAuthorizedException(str);
        }
        try {
            HttpRequest defaultRequestFor = this.httpRetrievalService.getDefaultRequestFor(str);
            defaultRequestFor.setHeader("accept-charset", "utf-8");
            HttpResponse httpResponse = this.httpRetrievalService.get(defaultRequestFor);
            linkMetaData.setResponseHost(getResponseURI(httpResponse, str));
            if (!isValidResponse(httpResponse)) {
                return "{}";
            }
            InputStream response = httpResponse.getResponse();
            try {
                String charset = httpResponse.getCharset();
                if (charset == null) {
                    response.mark(Integer.MAX_VALUE);
                    HeaderElement[] parseElements = HeaderElement.parseElements(Jsoup.parse(new Scanner(response, StandardCharsets.UTF_8.name()).findWithinHorizon(UNTIL_END_HEAD_OR_EOF_PATTERN, MAX_HEAD_SIZE)).select("meta[http-equiv=Content-Type][content]").attr("content"));
                    int length = parseElements.length;
                    int i = 0;
                    while (true) {
                        if (i >= length) {
                            break;
                        }
                        NameValuePair parameterByName = parseElements[i].getParameterByName("charset");
                        if (parameterByName != null) {
                            charset = parameterByName.getValue();
                            break;
                        }
                        i++;
                    }
                    response.reset();
                }
                linkMetaData.setCharset(charset == null ? StandardCharsets.UTF_8.name() : charset);
                String findWithinHorizon = new Scanner(response, linkMetaData.getCharset()).findWithinHorizon(UNTIL_END_HEAD_OR_EOF_PATTERN, MAX_HEAD_SIZE);
                if (response != null) {
                    response.close();
                }
                return findWithinHorizon;
            } catch (Throwable th) {
                if (response != null) {
                    try {
                        response.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        } catch (IOException e) {
            log.error("Error with io exception: ", e);
            return "{}";
        } catch (Exception e2) {
            log.error("Error in parse data: ", e2);
            return "{}";
        }
    }

    private URI getResponseURI(HttpResponse httpResponse, String str) throws URISyntaxException {
        URI responseURI = httpResponse.getResponseURI();
        if (responseURI.getHost() == null) {
            responseURI = new URI(str);
        }
        return responseURI;
    }

    private static String getPreviewText(String str, int i) {
        if (str == null || str.length() <= i) {
            return str;
        }
        String substring = str.substring(0, i);
        int lastIndexOf = substring.lastIndexOf(32);
        if (lastIndexOf != -1) {
            substring = substring.substring(0, lastIndexOf);
        }
        return substring + (char) 8230;
    }

    private String getExcerptedUrl(String str) {
        String substring = str.substring(str.indexOf("//") + "//".length());
        if (substring.length() > EXCERPT_URL_MAX_LENGTH) {
            substring = substring.substring(0, 29) + (char) 8230;
        }
        return substring;
    }

    private boolean isValidResponse(HttpResponse httpResponse) {
        int statusCode = httpResponse.getStatusCode();
        String mIMEType = httpResponse.getMIMEType();
        return statusCode >= 200 && statusCode < 300 && (mIMEType == null || mIMEType.startsWith("text/"));
    }
}
