package com.zoho.scrapy.server.crawler.zerodepth;

import com.zoho.scrapy.common.util.ScrapyLogger;
import com.zoho.scrapy.server.crawler.process.util.FunnelProcessUtil;
import com.zoho.scrapy.server.util.ScrapyUtil;
import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.crawler.exceptions.ParseException;
import edu.uci.ics.crawler4j.crawler.exceptions.RestrictedCrawlingException;
import edu.uci.ics.crawler4j.parser.NotAllowedContentException;
import edu.uci.ics.crawler4j.parser.Parser;
import edu.uci.ics.crawler4j.scrapy.dynamicpagefetcher.DynamicPage;
import edu.uci.ics.crawler4j.scrapy.dynamicpagefetcher.DynamicPageFetcher;
import edu.uci.ics.crawler4j.scrapy.dynamicpagefetcher.DynamicPageFetcherPoolHandler;
import edu.uci.ics.crawler4j.scrapy.util.CrawlLogger;
import edu.uci.ics.crawler4j.url.WebURL;
import java.util.Map;
import java.util.logging.Level;

/* loaded from: input_file:com/zoho/scrapy/server/crawler/zerodepth/PageFetch.class */
public class PageFetch {
    private static final String CLS = PageFetch.class.getName();

    public static Page fetchPage(String str, boolean z, CrawlConfig crawlConfig, Map map) throws Exception {
        ScrapyLogger.logMessage(Level.INFO, CLS, "fetchPage", new Object[]{"Entry", "Page URL", str, "IsDynamic", Boolean.valueOf(z)});
        WebURL webURL = new WebURL();
        webURL.setURL(str);
        webURL.setIsDynamic(z);
        Page page = new Page(webURL);
        String domainWithoutWWW = ScrapyUtil.getDomainWithoutWWW(str);
        while (!FunnelProcessUtil.canBeFunneled(domainWithoutWWW)) {
            try {
                Thread.sleep(2000L);
            } catch (Throwable th) {
                if (FunnelProcessUtil.isInFunnelDomain(domainWithoutWWW)) {
                    FunnelProcessUtil.removeFunnelProcess(domainWithoutWWW);
                }
            }
        }
        try {
            if (z) {
                try {
                    try {
                        ScrapyUtil.getUrlDomain(str);
                        ScrapyUtil.getUrlSubDomain(str);
                        crawlConfig.getDynamicFetcherConfig().setIsSeedUrl(true);
                        DynamicPageFetcher pageFetcher = DynamicPageFetcherPoolHandler.getPageFetcher();
                        if (crawlConfig.getDynamicFetcherConfig() != null) {
                            pageFetcher.setConfig(crawlConfig.getDynamicFetcherConfig());
                        }
                        DynamicPage page2 = pageFetcher.getPage(webURL.getURL(), crawlConfig.getMaxDownloadSize(), webURL.getLabel());
                        if (page2 != null) {
                            if (page2.getStatusCode() == 200 && page2.getHtml() != null) {
                                page.setContentData(page2.getHtml().getBytes());
                            }
                            page.setDynamic(z);
                            if (page2.getStatusCode() != 0) {
                                page.setStatusCode(page2.getStatusCode());
                            }
                            page.setContentType(page2.getContentType());
                            page.setDynamicPage(page2);
                        } else {
                            page.setStatusCode(3);
                            CrawlLogger.logMessage(Level.WARNING, CLS, "fetchPage", new Object[]{"Unable to render the URL with DPF. Hence setting the status as 3", "URL", webURL.getURL()});
                        }
                        if (pageFetcher != null) {
                            DynamicPageFetcherPoolHandler.returnPageFetcher(pageFetcher);
                        }
                    } catch (Throwable th2) {
                        if (0 != 0) {
                            DynamicPageFetcherPoolHandler.returnPageFetcher((DynamicPageFetcher) null);
                        }
                        throw th2;
                    }
                } catch (RestrictedCrawlingException e) {
                    page.setStatusCode(15);
                    ScrapyLogger.logMessage(Level.SEVERE, CLS, "fetchPage", new Object[]{"DPF exception for url - " + webURL.getURL()}, e);
                    if (0 != 0) {
                        DynamicPageFetcherPoolHandler.returnPageFetcher((DynamicPageFetcher) null);
                    }
                } catch (Error | Exception e2) {
                    ScrapyLogger.logMessage(Level.SEVERE, CLS, "fetchPage", new Object[]{"DPF exception for url - " + webURL.getURL()}, e2);
                    page.setStatusCode(3);
                    if (0 != 0) {
                        DynamicPageFetcherPoolHandler.returnPageFetcher((DynamicPageFetcher) null);
                    }
                }
            } else {
                ConnectionUtil connectionUtil = new ConnectionUtil(crawlConfig);
                try {
                    try {
                        connectionUtil.getStaticPage(page, map, crawlConfig.getMaxPagesToFetch());
                        connectionUtil.close();
                    } catch (Exception e3) {
                        ScrapyLogger.logMessage(Level.SEVERE, CLS, "fetchPage", (Object[]) null, e3);
                        page.setStatusCode(7);
                        connectionUtil.close();
                    }
                } catch (Throwable th3) {
                    connectionUtil.close();
                    throw th3;
                }
            }
            if (FunnelProcessUtil.isInFunnelDomain(domainWithoutWWW)) {
                FunnelProcessUtil.removeFunnelProcess(domainWithoutWWW);
            }
        } catch (Exception e4) {
            ScrapyLogger.logMessage(Level.SEVERE, CLS, "fetchPage", (Object[]) null, e4);
            if (FunnelProcessUtil.isInFunnelDomain(domainWithoutWWW)) {
                FunnelProcessUtil.removeFunnelProcess(domainWithoutWWW);
            }
        }
        if (page.getContentData() != null) {
            try {
                Parser parser = new Parser(crawlConfig);
                if (!str.startsWith("https://www.linkedin.com/organization-guest/company/")) {
                    page.setContentType("application/pdf");
                }
                parser.parse(page, str);
            } catch (NotAllowedContentException e5) {
                page.setStatusCode(9);
            } catch (ParseException e6) {
                page.setStatusCode(6);
            }
        }
        return page;
    }
}
