package com.zoho.scrapy.server.util;

import com.zoho.scrapy.server.constants.AutomationConstants;
import com.zoho.scrapy.server.crawler.process.constants.CrawlProcessConstants;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.Normalizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.json.JSONObject;

/* loaded from: input_file:com/zoho/scrapy/server/util/RegexUtil.class */
public class RegexUtil {
    public static final String EMAIL_SPLIT_REGEX = "[._\\-0-9]+";
    public static final String HTTPS = "https";
    public static final String HTTPS_PROTOCOL_REGEX = "https://";
    public static final String HTTP = "http";
    public static final String HTTP_PROTOCOL_REGEX = "http://";
    private static final String DOT_REGEX = "\\.";
    private static final String HASH_REGEX = "[#]+";
    private static final String CSV_LINE_SPLIT_REGEX = ",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))";
    public static final String CAMALCASE_REGEX = "(?<=[a-z])(?=[A-Z])";
    private static final String URL_LANGUAGE_CODE = "(ab|aa|af|sq|am|ar|an|hy|as|ay|az|ba|eu|bn|dz|bh|bi|br|bg|my|be|km|ca|zh|co|hr|cs|da|nl|en|eo|et|fo|fa|fj|fi|fr|fy|gd|gv|gl|global|ka|de|el|kl|gn|gu|ht|ha|he|iw|hi|hu|is|io|id|in|ia|ie|iu|ik|ga|it|ja|jv|kn|ks|kk|rw|ky|rn|ko|ku|lo|la|lv|li|ln|lt|mk|mg|ms|ml|mt|mi|mr|mo|mn|na|ne|no|oc|or|om|ps|pl|pt|pa|qu|rm|ro|ru|sm|sg|sa|sr|sh|st|tn|sn|ii|sd|si|ss|sk|sl|so|es|su|sw|sv|tl|tg|ta|tt|te|th|bo|ti|to|ts|tr|tk|tw|ug|uk|ur|uz|vi|vo|wa|cy|wo|xh|yi|ji|yo|zu)";
    private static final String URL_COUNTRY_CODE = "(af|afg|ax|ala|al|alb|dz|dza|as|asm|ad|and|ao|ago|ai|aia|aq|ata|ag|atg|ar|arg|am|arm|aw|abw|au|aus|at|aut|az|aze|bs|bhs|bh|bhr|bd|bgd|bb|brb|by|blr|be|bel|bz|blz|bj|ben|bm|bmu|bt|btn|bo|bol|ba|bih|bw|bwa|bv|bvt|br|bra|vg|vgb|io|iot|bn|brn|bg|bgr|bf|bfa|bi|bdi|kh|khm|cm|cmr|ca|can|cv|cpv|ky|cym|cf|caf|td|tcd|cl|chl|cn|chn|hk|hkg|mo|mac|cx|cxr|cc|cck|co|col|km|com|cg|cog|cd|cod|ck|cok|cr|cri|ci|civ|hr|hrv|cu|cub|cy|cyp|cz|cze|dk|dnk|dj|dji|dm|dma|do|dom|ec|ecu|eg|egy|sv|slv|gq|gnq|er|eri|ee|est|et|eth|fk|flk|fo|fro|fj|fji|fi|fin|fr|fra|gf|guf|pf|pyf|tf|atf|ga|gab|gm|gmb|ge|geo|de|deu|gh|gha|gi|gib|gr|grc|global|gl|grl|gd|grd|gp|glp|gu|gum|gt|gtm|gg|ggy|gn|gin|gw|gnb|gy|guy|ht|hti|hm|hmd|va|vat|hn|hnd|hu|hun|is|isl|in|ind|ic|id|idn|ir|irn|iq|irq|ie|irl|im|imn|il|isr|it|ita|jm|jam|jp|jpn|je|jey|jo|jor|kz|kaz|ke|ken|ki|kir|kp|prk|kr|kor|kw|kwt|kg|kgz|la|lao|lv|lva|lb|lbn|ls|lso|lr|lbr|ly|lby|li|lie|lt|ltu|lu|lux|mk|mkd|mg|mdg|mw|mwi|my|mys|mv|mdv|ml|mli|mt|mlt|mh|mhl|mq|mtq|mr|mrt|mu|mus|yt|myt|mx|mex|fm|fsm|md|mda|mc|mco|mn|mng|me|mne|ms|msr|ma|mar|mz|moz|mm|mmr|na|nam|nr|nru|np|npl|nl|nld|an|ant|nc|ncl|nz|nzl|ni|nic|ne|ner|ng|nga|nu|niu|nf|nfk|mp|mnp|no|nor|om|omn|pk|pak|pw|plw|ps|pse|pa|pan|pg|png|py|pry|pe|per|ph|phl|pn|pcn|pl|pol|pt|prt|pr|pri|qa|qat|re|reu|ro|rou|ru|rus|rw|rwa|bl|blm|sh|shn|kn|kna|lc|lca|mf|maf|pm|spm|vc|vct|ws|wsm|sm|smr|st|stp|sa|sau|sn|sen|rs|srb|sc|syc|sl|sle|sg|sgp|sk|svk|si|svn|sb|slb|so|som|za|zaf|gs|sgs|ss|ssd|es|esp|lk|lka|sd|sdn|sr|sur|sj|sjm|sz|swz|se|swe|ch|che|sy|syr|tw|twn|tj|tjk|tz|tza|th|tha|tl|tls|tg|tgo|tk|tkl|to|ton|tt|tto|tn|tun|tr|tur|tm|tkm|tc|tca|tv|tuv|ug|uga|ua|ukr|ae|are|gb|gbr|us|usa|um|umi|uy|ury|uz|uzb|vu|vut|ve|ven|vn|vnm|vi|vir|wf|wlf|eh|esh|ye|yem|zm|zmb|zw|zwe)";
    private static final String URL_CONTACT_WORDS = "(contact(s|us)?)|(kontakt(er)?)|(contato[s]?)|(contatt[io])";
    private static final String URL_SUPPORT_WORDS = "(support[s]?)|(suporte)";
    private static final String URL_ABOUT_WORDS = "(about(us)?)|(history)";
    private static final String URL_LOCATION_WORDS = "((all)?location[s]?)";
    private static final String BR_TAG_REGEX = "<br>";
    private static final String ANCHOR_ABOUT_REGEX = "(?i:about)";
    private static final String URL_ABOUT_REGEX = "(?i:about)";
    private static final String ANCHOR_ABOUT_PARTIAL_REGEX = "(?i:company)";
    private static final String URL_ABOUT_PARTIAL_REGEX = "(?i:company)";
    public static final String DESIGNATION_REGEX = "(\\bcao\\b)|(\\bcfo\\b)|(\\bcho\\b)|(\\bcco\\b)|(\\bceo\\b)|(\\bcmo\\b)|(\\bcto\\b)|(\\bcro\\b)|(\\bcvo\\b)|(\\bcio\\b)|(\\bcso\\b)|(\\bcoo\\b)|(\\bcdo\\b)|((?i)(Chief Accountability Officer)|(Chief Accountant)|(Chief Administrative Officer)|(Chief Financial Examiner)|(Chief Financial Officer)|(Chief Hearing Officer)|(Chief commercial officer)|(chief executive officer)|(Chief Marketing Officer)|(Chief Operating Officer)|(Chief Technology Officer)|Chief Technical Officer|(chief revenue officer)|(chief visionary officer)|(Chief Information Officer)|(Chief Science Officer)|(Chief Operation Officer)|(Chief data officer)|(CHIEF DATA SCIENTIST)|(Data Scientist)|(Scientist)|(Chief experience officer)|(Chief brand officer)|(Chief business officer)|(Chief channel officer)|(Chief communications officer)|(Chief compliance officer)|(Chief content officer)|(staff)|(Chief design officer)|(Chief innovation officer)|(Chief investment officer)|(Chief knowledge officer)|(Chief learning officer)|(Chief networking officer)|(Chief product officer)|(Chief quality officer)|(Chief research officer)|(Chief restructuring officer)|(Chief risk officer)|(Chief Scientific Officer)|(Chief security officer)|(Chief strategy officer)|(Chief sustainability officer)|(Chief visionary officer)|(Chief web officer)|(Chief privacy officer)|(Product Expert)|(engineer)|(President)|(vp\\s|\\svp)|(co-founder)|(founder)|(cofounder)|(Chief Human Resources Officer)|(Technician)|(Consultant)|(Representative)|(Dispatcher)|(Investigator)|(Designer)|(Help Desk)|(Attendant)|(Leader)|(Business Analyst)|(Developer)|(Architect)|(head)|(programmer)|(Marketing)|(marketer)|(Content Writer)|(Management)|(Advisor)|(Researcher)|(Associate)|(Lead)|(Manager)|(Chairman)|(Administrator)|(Project Coordinator)|(Application Developer)|(Executive Director)|(Director)|(Audit Officer)|(Supervisor)|(Auditor)|(Coordinator)|(Executive)|(Board Member)|(Secretary)|(Product Analyst)|(Business Analyst)|(Analyst))";
    private static final Pattern DESIGNATION_PATTERN = Pattern.compile(DESIGNATION_REGEX);
    public static final Pattern NUM_WITH_COMMA = Pattern.compile("[0-9,]+");
    public static final Pattern ACCENTED_LETTERS_PATTERN = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    private static final Pattern NON_ASCII_CHARS_PATTERN = Pattern.compile("[^\\p{ASCII}]");
    private static final Pattern URL_PROTOCOL_PATTERN = Pattern.compile("^http(s)?://");
    private static final Pattern HTTPS_PROTOCOL_PATTERN = Pattern.compile("https://");
    private static final Pattern HTTP_PROTOCOL_PATTERN = Pattern.compile("http://");
    private static final String NON_DOMAIN_CHARACTERS_REGEX = "[^0-9a-z\\- ]+";
    private static final Pattern NON_DOMAIN_CHARACTERS_REGEX_PATTERN = Pattern.compile(NON_DOMAIN_CHARACTERS_REGEX);
    private static final Pattern DOMAIN_PATTERN = Pattern.compile("(^[a-zA-Z0-9][a-zA-Z0-9\\.-]+[a-zA-Z0-9]$)");
    private static final Pattern THATSTHEM_1_PATTERN = Pattern.compile("(?is)p1 = \"(.+?)\"");
    private static final Pattern THATSTHEM_2_PATTERN = Pattern.compile("(?is)p2 = \"(.+?)\"");
    private static final Pattern THATSTHEM_3_PATTERN = Pattern.compile("(?is)p3 = \"(.+?)\"");
    private static final Pattern LEADTAG_PATTERN = Pattern.compile("(customer)|(feedback)|(review)|(investor)|(client)|(partner)|(Advisor)");
    private static final String ISSITEACTIVE_TITLEREGEX = "(?i)(under (construction|development|Maintenance)|((web)?(\\\\s)?(page|site)|domain) Not Found|((is|((is )?available|listed)|(may|might)( be)?) for (sale|transfer|lease))|(currently |temporarily )?unavailable|DOMAIN NAME FOR SALE|Hosted By|is now closed|(coming|launching) soon|working on our website|DomainNotListed|Placeholder Page|holding page|test page|is available at|unconfigured (web)?site|account suspended|domain[\\\\sa-zA-z]{0,10}expired)";
    private static final Pattern ISSITEACTIVE_TITLEPATTERN = Pattern.compile(ISSITEACTIVE_TITLEREGEX);
    private static final String ISSITEACTIVE_DOCREGEX = "(?i)(\\b)(([a-zA-Z0-9-_]{1,15}\\.[a-zA-Z]{2,8}|domain( name)?|domains|((web)?(\\s)?(page|site))) ((is |are )?(listed |available |(might |may )?be )?(for|on)([a-zA-Z0-9\\s]{0,10})?(sale(s)?|transfer|purchase|lease)|is in auction|is almost (here|done)|was generated by|([a-z\\-A-Z\\s)\\.(]{0,30}((under (construction|development))|(no longer available)|(suspended)|(registered)|(is (now )?(disabled|closed))|((coming|launching) soon)|((currently|temporarily) unavailable))))|has been registered (by|through)|Web Hosting - courtesy of|This domain is registered|Site Not Found|domain[\\sa-zA-z]{0,30}expired)(\\b)";
    private static final Pattern ISSITEACTIVE_DOCPATTERN = Pattern.compile(ISSITEACTIVE_DOCREGEX);
    private static final String PARKINGDOMAIN_TITLEREGEX = "(?i)(Claim This Domain|buy\\s?domains?|DomainNotListed|(Webserver|domain) Default Page|is parked|parked at|domain.{1,10}(parked|registered))";
    private static final Pattern PARKINGDOMAIN_TITLEPATTERN = Pattern.compile(PARKINGDOMAIN_TITLEREGEX);
    private static final String PARKINGDOMAIN_DOCREGEX = "(?i)(\\b)(([a-zA-Z0-9-_]{1,10}\\.[a-zA-Z]{2,8}|domain( name)?|(web(\\s)?(page|site))) (is parked)|buy this domain|DomainNotListed|page[\\sa-zA-z]{0,10}generated automatically|SERVER DEFAULT PAGE|default[\\sa-zA-z]{0,10}page.|website has been started|The Sponsored Listings displayed above are served automatically by a third party|(This domain )?maintains no relationship with third party advertisers|domain( name)? (is|was|has( been)?) registered (at|for|on|by|with)|parked page|domain[\\sa-zA-z]{1,20}park(ed|ing)|(currently )?being parked by|domain name[\\sa-zA-z]{1,20} is already registered|Page cannot be displayed|(web)?site[\\sa-zA-z]{0,5}offline|website[\\sa-zA-z]{0,10}disabled|domain(s)?( (might|may) be)? expired|domain has expired|(Web Server's|domain) Default Page|Account is [\\sa-zA-z]{0,10}Unavailable)(\\b)";
    private static final Pattern PARKINGDOMAIN_DOCPATTERN = Pattern.compile(PARKINGDOMAIN_DOCREGEX);
    private static final String SPACE_REGEX = "[ ]+";
    private static final Pattern SPACE_REGEX_PATTERN = Pattern.compile(SPACE_REGEX);
    private static final Pattern SPACE_PLUS_REGEX_PATTERN = Pattern.compile(" +");
    private static final String SLASH_REGEX = "[/]+";
    private static final Pattern SLASH_REGEX_PATTERN = Pattern.compile(SLASH_REGEX);
    private static final Pattern SLASH_AT_END_REGEX_PATTERN = Pattern.compile("[/]+$");
    private static final Pattern DOT_REGEX_PATTERN = Pattern.compile("[.]+");
    private static final Pattern LASTDOT_REGEX_PATTERN = Pattern.compile("\\.$");
    private static final Pattern HASH_AT_END_REGEX_PATTERN = Pattern.compile("[#]+$");
    private static final String HYPHEN_REGEX = "[\\-]";
    private static final Pattern HYPHEN_REGEX_PATTERN = Pattern.compile(HYPHEN_REGEX);
    private static final Pattern DOUBLE_QUOTE_PATTERN = Pattern.compile("\"");
    private static final Pattern CONSECUTIVE_DOUBLE_QUOTE_PATTERN = Pattern.compile("\"\"");
    private static final String SQUARE_BRACKETS_REGEX = "(\\[|])";
    private static final Pattern SQUARE_BRACKETS_REGEX_PATTERN = Pattern.compile(SQUARE_BRACKETS_REGEX);
    private static final Pattern AT_SIGN_REGEX_PATTERN = Pattern.compile("@");
    private static final Pattern AMP_SIGN_REGEX_PATTERN = Pattern.compile("&");
    private static final String NUMERIC_CHARACTERS_REGEX = "[0-9]+";
    private static final Pattern NUMERIC_CHARACTERS_REGEX_PATTERN = Pattern.compile(NUMERIC_CHARACTERS_REGEX);
    private static final String ALPHA_NUMERIC_CHARACTERS_REGEX = "[a-zA-Z0-9]+";
    private static final Pattern ALPHA_NUMERIC_CHARACTERS_PATTERN = Pattern.compile(ALPHA_NUMERIC_CHARACTERS_REGEX);
    public static final String NON_ALPHANUMERIC_CHARACTERS_REGEX = "[^0-9a-zA-Z]+";
    public static final Pattern NON_ALPHANUMERIC_CHARACTERS_REGEX_PATTERN = Pattern.compile(NON_ALPHANUMERIC_CHARACTERS_REGEX);
    private static final String ALPHABET_CHARACTERS_REGEX = "[a-zA-Z]+";
    public static final Pattern ALPHABET_CHARACTERS_REGEX_PATTERN = Pattern.compile(ALPHABET_CHARACTERS_REGEX);
    private static final String NON_ALPHABET_CHARACTERS_REGEX = "[^a-zA-Z]+";
    public static final Pattern NON_ALPHABET_CHARACTERS_REGEX_PATTERN = Pattern.compile(NON_ALPHABET_CHARACTERS_REGEX);
    private static final String NON_NUMERIC_CHARACTERS_REGEX = "[^0-9]+";
    private static final Pattern NON_NUMERIC_CHARACTERS_REGEX_PATTERN = Pattern.compile(NON_NUMERIC_CHARACTERS_REGEX);
    private static final String NON_NUMERIC_DOT_CHARACTERS_REGEX = "[^0-9.]+";
    private static final Pattern NON_NUMERIC_DOT_CHARACTERS_REGEX_PATTERN = Pattern.compile(NON_NUMERIC_DOT_CHARACTERS_REGEX);
    private static final String NEW_LINE_REGEX = "([\n]|[\r][\n]|[\r])";
    private static final Pattern NEW_LINE_REGEX_PATTERN = Pattern.compile(NEW_LINE_REGEX);
    private static final String MULTIPLE_NEW_LINE_REGEX = "([\n]|[\r][\n]|[\r])+";
    private static final Pattern MULTIPLE_NEW_LINE_REGEX_PATTERN = Pattern.compile(MULTIPLE_NEW_LINE_REGEX);
    private static final String COPYRIGHTS_REGEX = "(?i)(\\b)((Powered|CMS) by)(\\b)";
    public static final Pattern COPYRIGHTS_REGEX_PATTERN = Pattern.compile(COPYRIGHTS_REGEX);
    private static final String SUB_DOMAIN_WWW_WITH_DOT_REGEX = "www\\.";
    private static final Pattern SUB_DOMAIN_WWW_WITH_DOT_REGEX_PATTERN = Pattern.compile(SUB_DOMAIN_WWW_WITH_DOT_REGEX);
    private static final Pattern EM_EN_DASH_REGEX_PATTERN = Pattern.compile("\\u2013|\\u2014");
    private static final Pattern HTML_TAGS_PATTERN = Pattern.compile("<.*?>");
    public static final Pattern UNESCPAE_JAVA_PATTERN = Pattern.compile("[\\\\]u([0-9a-f]{4})");
    private static final Pattern URL_COUNTRY_AND_LANGUAGE_PATTERN = Pattern.compile("\\/(((ab|aa|af|sq|am|ar|an|hy|as|ay|az|ba|eu|bn|dz|bh|bi|br|bg|my|be|km|ca|zh|co|hr|cs|da|nl|en|eo|et|fo|fa|fj|fi|fr|fy|gd|gv|gl|global|ka|de|el|kl|gn|gu|ht|ha|he|iw|hi|hu|is|io|id|in|ia|ie|iu|ik|ga|it|ja|jv|kn|ks|kk|rw|ky|rn|ko|ku|lo|la|lv|li|ln|lt|mk|mg|ms|ml|mt|mi|mr|mo|mn|na|ne|no|oc|or|om|ps|pl|pt|pa|qu|rm|ro|ru|sm|sg|sa|sr|sh|st|tn|sn|ii|sd|si|ss|sk|sl|so|es|su|sw|sv|tl|tg|ta|tt|te|th|bo|ti|to|ts|tr|tk|tw|ug|uk|ur|uz|vi|vo|wa|cy|wo|xh|yi|ji|yo|zu)[\\/-](af|afg|ax|ala|al|alb|dz|dza|as|asm|ad|and|ao|ago|ai|aia|aq|ata|ag|atg|ar|arg|am|arm|aw|abw|au|aus|at|aut|az|aze|bs|bhs|bh|bhr|bd|bgd|bb|brb|by|blr|be|bel|bz|blz|bj|ben|bm|bmu|bt|btn|bo|bol|ba|bih|bw|bwa|bv|bvt|br|bra|vg|vgb|io|iot|bn|brn|bg|bgr|bf|bfa|bi|bdi|kh|khm|cm|cmr|ca|can|cv|cpv|ky|cym|cf|caf|td|tcd|cl|chl|cn|chn|hk|hkg|mo|mac|cx|cxr|cc|cck|co|col|km|com|cg|cog|cd|cod|ck|cok|cr|cri|ci|civ|hr|hrv|cu|cub|cy|cyp|cz|cze|dk|dnk|dj|dji|dm|dma|do|dom|ec|ecu|eg|egy|sv|slv|gq|gnq|er|eri|ee|est|et|eth|fk|flk|fo|fro|fj|fji|fi|fin|fr|fra|gf|guf|pf|pyf|tf|atf|ga|gab|gm|gmb|ge|geo|de|deu|gh|gha|gi|gib|gr|grc|global|gl|grl|gd|grd|gp|glp|gu|gum|gt|gtm|gg|ggy|gn|gin|gw|gnb|gy|guy|ht|hti|hm|hmd|va|vat|hn|hnd|hu|hun|is|isl|in|ind|ic|id|idn|ir|irn|iq|irq|ie|irl|im|imn|il|isr|it|ita|jm|jam|jp|jpn|je|jey|jo|jor|kz|kaz|ke|ken|ki|kir|kp|prk|kr|kor|kw|kwt|kg|kgz|la|lao|lv|lva|lb|lbn|ls|lso|lr|lbr|ly|lby|li|lie|lt|ltu|lu|lux|mk|mkd|mg|mdg|mw|mwi|my|mys|mv|mdv|ml|mli|mt|mlt|mh|mhl|mq|mtq|mr|mrt|mu|mus|yt|myt|mx|mex|fm|fsm|md|mda|mc|mco|mn|mng|me|mne|ms|msr|ma|mar|mz|moz|mm|mmr|na|nam|nr|nru|np|npl|nl|nld|an|ant|nc|ncl|nz|nzl|ni|nic|ne|ner|ng|nga|nu|niu|nf|nfk|mp|mnp|no|nor|om|omn|pk|pak|pw|plw|ps|pse|pa|pan|pg|png|py|pry|pe|per|ph|phl|pn|pcn|pl|pol|pt|prt|pr|pri|qa|qat|re|reu|ro|rou|ru|rus|rw|rwa|bl|blm|sh|shn|kn|kna|lc|lca|mf|maf|pm|spm|vc|vct|ws|wsm|sm|smr|st|stp|sa|sau|sn|sen|rs|srb|sc|syc|sl|sle|sg|sgp|sk|svk|si|svn|sb|slb|so|som|za|zaf|gs|sgs|ss|ssd|es|esp|lk|lka|sd|sdn|sr|sur|sj|sjm|sz|swz|se|swe|ch|che|sy|syr|tw|twn|tj|tjk|tz|tza|th|tha|tl|tls|tg|tgo|tk|tkl|to|ton|tt|tto|tn|tun|tr|tur|tm|tkm|tc|tca|tv|tuv|ug|uga|ua|ukr|ae|are|gb|gbr|us|usa|um|umi|uy|ury|uz|uzb|vu|vut|ve|ven|vn|vnm|vi|vir|wf|wlf|eh|esh|ye|yem|zm|zmb|zw|zwe))|((af|afg|ax|ala|al|alb|dz|dza|as|asm|ad|and|ao|ago|ai|aia|aq|ata|ag|atg|ar|arg|am|arm|aw|abw|au|aus|at|aut|az|aze|bs|bhs|bh|bhr|bd|bgd|bb|brb|by|blr|be|bel|bz|blz|bj|ben|bm|bmu|bt|btn|bo|bol|ba|bih|bw|bwa|bv|bvt|br|bra|vg|vgb|io|iot|bn|brn|bg|bgr|bf|bfa|bi|bdi|kh|khm|cm|cmr|ca|can|cv|cpv|ky|cym|cf|caf|td|tcd|cl|chl|cn|chn|hk|hkg|mo|mac|cx|cxr|cc|cck|co|col|km|com|cg|cog|cd|cod|ck|cok|cr|cri|ci|civ|hr|hrv|cu|cub|cy|cyp|cz|cze|dk|dnk|dj|dji|dm|dma|do|dom|ec|ecu|eg|egy|sv|slv|gq|gnq|er|eri|ee|est|et|eth|fk|flk|fo|fro|fj|fji|fi|fin|fr|fra|gf|guf|pf|pyf|tf|atf|ga|gab|gm|gmb|ge|geo|de|deu|gh|gha|gi|gib|gr|grc|global|gl|grl|gd|grd|gp|glp|gu|gum|gt|gtm|gg|ggy|gn|gin|gw|gnb|gy|guy|ht|hti|hm|hmd|va|vat|hn|hnd|hu|hun|is|isl|in|ind|ic|id|idn|ir|irn|iq|irq|ie|irl|im|imn|il|isr|it|ita|jm|jam|jp|jpn|je|jey|jo|jor|kz|kaz|ke|ken|ki|kir|kp|prk|kr|kor|kw|kwt|kg|kgz|la|lao|lv|lva|lb|lbn|ls|lso|lr|lbr|ly|lby|li|lie|lt|ltu|lu|lux|mk|mkd|mg|mdg|mw|mwi|my|mys|mv|mdv|ml|mli|mt|mlt|mh|mhl|mq|mtq|mr|mrt|mu|mus|yt|myt|mx|mex|fm|fsm|md|mda|mc|mco|mn|mng|me|mne|ms|msr|ma|mar|mz|moz|mm|mmr|na|nam|nr|nru|np|npl|nl|nld|an|ant|nc|ncl|nz|nzl|ni|nic|ne|ner|ng|nga|nu|niu|nf|nfk|mp|mnp|no|nor|om|omn|pk|pak|pw|plw|ps|pse|pa|pan|pg|png|py|pry|pe|per|ph|phl|pn|pcn|pl|pol|pt|prt|pr|pri|qa|qat|re|reu|ro|rou|ru|rus|rw|rwa|bl|blm|sh|shn|kn|kna|lc|lca|mf|maf|pm|spm|vc|vct|ws|wsm|sm|smr|st|stp|sa|sau|sn|sen|rs|srb|sc|syc|sl|sle|sg|sgp|sk|svk|si|svn|sb|slb|so|som|za|zaf|gs|sgs|ss|ssd|es|esp|lk|lka|sd|sdn|sr|sur|sj|sjm|sz|swz|se|swe|ch|che|sy|syr|tw|twn|tj|tjk|tz|tza|th|tha|tl|tls|tg|tgo|tk|tkl|to|ton|tt|tto|tn|tun|tr|tur|tm|tkm|tc|tca|tv|tuv|ug|uga|ua|ukr|ae|are|gb|gbr|us|usa|um|umi|uy|ury|uz|uzb|vu|vut|ve|ven|vn|vnm|vi|vir|wf|wlf|eh|esh|ye|yem|zm|zmb|zw|zwe)[\\/-](ab|aa|af|sq|am|ar|an|hy|as|ay|az|ba|eu|bn|dz|bh|bi|br|bg|my|be|km|ca|zh|co|hr|cs|da|nl|en|eo|et|fo|fa|fj|fi|fr|fy|gd|gv|gl|global|ka|de|el|kl|gn|gu|ht|ha|he|iw|hi|hu|is|io|id|in|ia|ie|iu|ik|ga|it|ja|jv|kn|ks|kk|rw|ky|rn|ko|ku|lo|la|lv|li|ln|lt|mk|mg|ms|ml|mt|mi|mr|mo|mn|na|ne|no|oc|or|om|ps|pl|pt|pa|qu|rm|ro|ru|sm|sg|sa|sr|sh|st|tn|sn|ii|sd|si|ss|sk|sl|so|es|su|sw|sv|tl|tg|ta|tt|te|th|bo|ti|to|ts|tr|tk|tw|ug|uk|ur|uz|vi|vo|wa|cy|wo|xh|yi|ji|yo|zu))|((ab|aa|af|sq|am|ar|an|hy|as|ay|az|ba|eu|bn|dz|bh|bi|br|bg|my|be|km|ca|zh|co|hr|cs|da|nl|en|eo|et|fo|fa|fj|fi|fr|fy|gd|gv|gl|global|ka|de|el|kl|gn|gu|ht|ha|he|iw|hi|hu|is|io|id|in|ia|ie|iu|ik|ga|it|ja|jv|kn|ks|kk|rw|ky|rn|ko|ku|lo|la|lv|li|ln|lt|mk|mg|ms|ml|mt|mi|mr|mo|mn|na|ne|no|oc|or|om|ps|pl|pt|pa|qu|rm|ro|ru|sm|sg|sa|sr|sh|st|tn|sn|ii|sd|si|ss|sk|sl|so|es|su|sw|sv|tl|tg|ta|tt|te|th|bo|ti|to|ts|tr|tk|tw|ug|uk|ur|uz|vi|vo|wa|cy|wo|xh|yi|ji|yo|zu))|((af|afg|ax|ala|al|alb|dz|dza|as|asm|ad|and|ao|ago|ai|aia|aq|ata|ag|atg|ar|arg|am|arm|aw|abw|au|aus|at|aut|az|aze|bs|bhs|bh|bhr|bd|bgd|bb|brb|by|blr|be|bel|bz|blz|bj|ben|bm|bmu|bt|btn|bo|bol|ba|bih|bw|bwa|bv|bvt|br|bra|vg|vgb|io|iot|bn|brn|bg|bgr|bf|bfa|bi|bdi|kh|khm|cm|cmr|ca|can|cv|cpv|ky|cym|cf|caf|td|tcd|cl|chl|cn|chn|hk|hkg|mo|mac|cx|cxr|cc|cck|co|col|km|com|cg|cog|cd|cod|ck|cok|cr|cri|ci|civ|hr|hrv|cu|cub|cy|cyp|cz|cze|dk|dnk|dj|dji|dm|dma|do|dom|ec|ecu|eg|egy|sv|slv|gq|gnq|er|eri|ee|est|et|eth|fk|flk|fo|fro|fj|fji|fi|fin|fr|fra|gf|guf|pf|pyf|tf|atf|ga|gab|gm|gmb|ge|geo|de|deu|gh|gha|gi|gib|gr|grc|global|gl|grl|gd|grd|gp|glp|gu|gum|gt|gtm|gg|ggy|gn|gin|gw|gnb|gy|guy|ht|hti|hm|hmd|va|vat|hn|hnd|hu|hun|is|isl|in|ind|ic|id|idn|ir|irn|iq|irq|ie|irl|im|imn|il|isr|it|ita|jm|jam|jp|jpn|je|jey|jo|jor|kz|kaz|ke|ken|ki|kir|kp|prk|kr|kor|kw|kwt|kg|kgz|la|lao|lv|lva|lb|lbn|ls|lso|lr|lbr|ly|lby|li|lie|lt|ltu|lu|lux|mk|mkd|mg|mdg|mw|mwi|my|mys|mv|mdv|ml|mli|mt|mlt|mh|mhl|mq|mtq|mr|mrt|mu|mus|yt|myt|mx|mex|fm|fsm|md|mda|mc|mco|mn|mng|me|mne|ms|msr|ma|mar|mz|moz|mm|mmr|na|nam|nr|nru|np|npl|nl|nld|an|ant|nc|ncl|nz|nzl|ni|nic|ne|ner|ng|nga|nu|niu|nf|nfk|mp|mnp|no|nor|om|omn|pk|pak|pw|plw|ps|pse|pa|pan|pg|png|py|pry|pe|per|ph|phl|pn|pcn|pl|pol|pt|prt|pr|pri|qa|qat|re|reu|ro|rou|ru|rus|rw|rwa|bl|blm|sh|shn|kn|kna|lc|lca|mf|maf|pm|spm|vc|vct|ws|wsm|sm|smr|st|stp|sa|sau|sn|sen|rs|srb|sc|syc|sl|sle|sg|sgp|sk|svk|si|svn|sb|slb|so|som|za|zaf|gs|sgs|ss|ssd|es|esp|lk|lka|sd|sdn|sr|sur|sj|sjm|sz|swz|se|swe|ch|che|sy|syr|tw|twn|tj|tjk|tz|tza|th|tha|tl|tls|tg|tgo|tk|tkl|to|ton|tt|tto|tn|tun|tr|tur|tm|tkm|tc|tca|tv|tuv|ug|uga|ua|ukr|ae|are|gb|gbr|us|usa|um|umi|uy|ury|uz|uzb|vu|vut|ve|ven|vn|vnm|vi|vir|wf|wlf|eh|esh|ye|yem|zm|zmb|zw|zwe)))\\/");
    private static final Pattern URL_EN_LANGUAGE_PATTERN = Pattern.compile("(\\/(en[-\\/](us|usa|gb|gbr)|((us|usa|gb|gbr)[-\\/](en)))\\/)");
    private static final Pattern URL_FILTER__PATTERN = Pattern.compile(".*(\\.(css|js|bmp|gif|jpe?g|svg|ico|png|tiff?|mid|mp2|mp3|mp4|wav|avi|mov|mpeg|ram|m4v|pdf|rm|smil|wmv|swf|wma|zip|rar|gz|pdf|doc(x|m|b)?|dot(x|m)?|wbk|xl(x|s(x|m|b)?|t(x|m)?|m|a(m)?|l|w)?|pp(t(x|m)?|s(x|m)?|am)?|pot(x|m)?|sld(x|m)?|json))$");
    private static final Pattern URL_CONTACTS_PATTERN = Pattern.compile("([^a-zA-Z](?i:((contact(s|us)?)|(kontakt(er)?)|(contato[s]?)|(contatt[io])|(support[s]?)|(suporte)|(about(us)?)|(history)|((all)?location[s]?)))($|[^a-zA-Z]))");
    private static final Pattern URL_SUBDOMAIN_CONTACTS_PATTERN = Pattern.compile("((^|[^a-zA-Z])(?i:((contact(s|us)?)|(kontakt(er)?)|(contato[s]?)|(contatt[io])|(support[s]?)|(suporte)|(about(us)?)|(history)|((all)?location[s]?)))($|[^a-zA-Z]))");
    public static final Pattern URL_CONTACT_PATTERN = Pattern.compile("([^a-zA-Z](?i:((contact(s|us)?)|(kontakt(er)?)|(contato[s]?)|(contatt[io])))($|[^a-zA-Z]))");
    public static final Pattern URL_SUPPORT_PATTERN = Pattern.compile("([^a-zA-Z](?i:((support[s]?)|(suporte)))($|[^a-zA-Z]))");
    public static final Pattern URL_ABOUT_PATTERN = Pattern.compile("([^a-zA-Z](?i:((about(us)?)|(history)))($|[^a-zA-Z]))");
    public static final Pattern URL_LOCATION_PATTERN = Pattern.compile("([^a-zA-Z](?i:(((all)?location[s]?)))($|[^a-zA-Z]))");
    public static final Pattern URL_PRIVACY_PATTERN = Pattern.compile("([^a-zA-Z](?i:(privacy)|(privacidade)|(personvern))($|[^a-zA-Z]))");
    private static final Pattern URL_CONTACT_FOLDER_PATTERN = Pattern.compile("(.*[^a-zA-Z]((contact(s|us)?)|(kontakt(er)?)|(contato[s]?)|(contatt[io])|(support[s]?)|(suporte)|(about(us)?)|(history)|((all)?location[s]?))(([^a-zA-Z\\/]([^\\/]+)?)?[\\/]))");
    private static final Pattern FOLDER_PATTERN = Pattern.compile("(.*\\/)");
    private static final Pattern SCRIPT_PATTERN = Pattern.compile("'(.*?)'");
    private static final Pattern TRAILING_WILDCARD_PATTERN = Pattern.compile("(?<=^|\\s)[\\*\\?]+");
    private static final Pattern TAGS_RESTRICTED_PATTERN = Pattern.compile("(title)|(head)|(html)|(meta)");
    private static final Pattern URL_DECODER_VALID_PATTERN = Pattern.compile("%[0-9][0-9A-F]");
    private static final Pattern URL_DECODER_ALL_UTF_VALID_PATTERN = Pattern.compile("%[0-9A-F][0-9A-F]");
    private static final Pattern URL_DECODER_VALID_ACCENTED_LETTERS_PATTERN = Pattern.compile("(%[0-9A-F][0-9A-F]){2}");
    private static final String ANCHOR_VISIT_REGEX = "(^((?i)(contact)|(about)|(company)|(privacy)))";
    private static final Pattern ANCHOR_VISIT_REGEX_PATTERN = Pattern.compile(ANCHOR_VISIT_REGEX);
    private static final Pattern ANCHOR_ABOUT_REGEX_PATTERN = Pattern.compile("(?i:about)");
    private static final Pattern URL_ABOUT_REGEX_PATTERN = Pattern.compile("(?i:about)");
    private static final Pattern ANCHOR_ABOUT_REGEX_PARTIAL_PATTERN = Pattern.compile("(?i:company)");
    private static final Pattern URL_ABOUT_REGEX_PARTIAL_PATTERN = Pattern.compile("(?i:company)");
    private static final String URL_LAST_FILE = "(?<=[\\/])[\\w-]+(?:$)";
    private static final Pattern URL_LAST_FILE_PATTERN = Pattern.compile(URL_LAST_FILE);
    private static final Pattern SCRIPT_COMMENTS_PATTERN = Pattern.compile("((?s)\\/\\*.*?\\*\\/)|((?<=^|\\s|;|\\(|\\)|\\[|\\]|\\{|\\})\\/\\/.*)|((?m)^((\\s)+)?(<!--).*)");
    private static final String START_WORDS_REGEX = "(?i)(^(who |what |when |where |why |how |is |can |does |do |WHICH |AM |ARE |WAS |WERE |MAY |MIGHT |COULD |WILL |SHALL |WOULD |SHOULD |HAS |HAVE |HAD |DID ))";
    public static final Pattern START_WORDS_PATTERN = Pattern.compile(START_WORDS_REGEX);
    private static final String END_WORDS_REGEX = "(?i)(\\?)$";
    public static final Pattern END_WORDS_PATTERN = Pattern.compile(END_WORDS_REGEX);
    public static final Pattern PLAIN_EMAIL_REGEX = Pattern.compile("([a-zA-Z0-9][a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\\.[a-zA-Z0-9._-]+[a-zA-Z0-9])");
    private static final Pattern LEADNAME_VALID_REGEX_PATTERN = Pattern.compile("^[A-Za-z0-9\\s\\-_&'\".,()!*#]*$");
    private static final Pattern WEBSITE_VALID_REGEX_PATTERN = Pattern.compile("^(http[s]?:\\/\\/)?(xn[-]+)?([a-zA-Z0-9_]+([-]+[a-zA-Z0-9]+)*\\.)+((((xn[-]+[a-zA-Z0-9]+)|(?=.*[a-zA-Z])[a-zA-Z]{2,}))+)([\\/]?|([\\/].*))$");
    private static final Pattern WEBSITE_PATTERN_VALID_REGEX_PATTERN = Pattern.compile("http(s)?[:][\\x2f]{2}(www[.])?[0-9a-z].*\\w+[.]\\w+[\\x2f]?$");
    private static final String EU_TLD_REGEX = "[.](eu|be|bg|cz|dk|de|ee|ie|gr|es|fr|hr|it|cy|lv|lt|lu|hu|mt|nl|at|pl|pt|ro|si|sk|fi|se|uk|alsace|amsterdam|bcn|barcelona|bayern|berlin|brussels|budapest|bzh|cat|cologne|corsica|cymru|eus|frl|gal|gent|hamburg|helsinki|irish|ist|istanbul|koeln|london|madrid|nrw|paris|ruhr|saarland|scot|stockholm|swiss|tirol|vlaanderen|wales|wien|zuerich|is|li|no|gp|gf|mq|yt|mf)$";
    private static final Pattern EU_TLD_PATTERN = Pattern.compile(EU_TLD_REGEX);
    private static final Pattern PREFIXSUFIX_SPECIALCHARS_PATTERN = Pattern.compile("[\\W]*$|^[\\W]*");
    private static final Pattern PEOPLENAME_PATTERN = Pattern.compile("[^a-zA-Z.\\s]");
    private static final Pattern INVALID_DESIGNATION_PATTERN = Pattern.compile("[^a-zA-Z.\\-\\s,:()]");
    private static final Pattern URLREGEX_PATTERN_WITH_SPACE = Pattern.compile("https?:\\/\\/([\\w]+\\.)?[-\\w]+\\.[\\w]{2,10}(\\.[\\w]{2,10})?(\\/[\\s\\w\\/\\.?=-]*)?");
    private static final Pattern URLREGEX_PATTERN = Pattern.compile("https?:\\/\\/([\\w]+\\.)?[-\\w]+\\.[\\w]{2,10}(\\.[\\w]{2,10})?(\\/[\\w\\/\\.?=-]*)?");
    private static final Pattern IMGURLREGEX_PATTERN = Pattern.compile("\\.(?i)(jpe?g|png|gif|tiff?|bmp|eps|raw|cr2|nef|orf|sr2)");
    private static final Pattern PEOPLE_NAMETAG_REPLACE_PATTERN = Pattern.compile("[^a-zA-Z0-9\\s]");
    private static final Pattern QUERYPARAMS_REGEX_PATTERN = Pattern.compile("(\\?.*)");
    private static final Pattern QUERY_AND_REFERENCE_PARM_REGEX_PATTERN = Pattern.compile("(\\?.*|\\#.*)");
    private static final Pattern EXTENSION_IN_URL_PATH_PATTERN = Pattern.compile("(\\.[a-z0-9\\-_$]{1,7})$");
    private static final Pattern DOUBLE_EXTENSION_IN_URL_PATH_PATTERN = Pattern.compile("(\\.[a-z0-9\\-._$]{1,7})$");
    private static final Pattern ALLOWED_CHARS_FOR_URL_EXTENSION = Pattern.compile("[a-zA-Z0-9\\-_$,.]+");
    private static final Pattern REGEX_WILDCARD_CHARS = Pattern.compile("[{}()\\[\\].+*?^$\\\\|]");
    private static final Pattern DOMAINS_URL_PATTERN = Pattern.compile("^(http[s]?:\\/\\/)?(xn[-]+)?([a-zA-Z0-9_]+([-]+[a-zA-Z0-9]+)*\\.)+((((xn[-]+[a-zA-Z0-9]+)|(?=.*[a-zA-Z])[a-zA-Z0-9]{2,}))+)([\\/]?|([\\/].*))$");
    private static final String IP_ADDRESS_AND_PORT_REGEX = "^(http[s]?:\\/\\/)?((((?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(:[0-9]{2,4})?)|((([a-zA-Z0-9]+(-[a-zA-Z0-9]+)*\\.)+[a-zA-Z]{2,})(:[0-9]{2,4})))([\\/]?|([\\/].*))$";
    private static final Pattern IP_ADDRESS_AND_PORT_PATTERN = Pattern.compile(IP_ADDRESS_AND_PORT_REGEX);
    private static final String IP_ADDRESS_REGEX = "(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)";
    private static final Pattern IP_ADDRESS_PATTERN = Pattern.compile(IP_ADDRESS_REGEX);
    private static final String PORT_REGEX = "(:[0-9]{2,4})";
    private static final Pattern PORT_PATTERN = Pattern.compile(PORT_REGEX);
    private static final String ALPHA_SPACE_NUM_REGEX = "^[a-zA-Z0-9 ]+$";
    private static final Pattern ALPHA_SPACE_NUM_REGEX_PATTERN = Pattern.compile(ALPHA_SPACE_NUM_REGEX);
    private static final String VALID_DOMAIN_REGEX = "(^[a-zA-Z0-9][a-zA-Z0-9\\.-]*[a-zA-Z0-9](?<!\\.\\.)\\.[a-zA-Z][a-zA-Z]*$)";
    private static final Pattern VALID_DOMAIN_REGEX_PATTERN = Pattern.compile(VALID_DOMAIN_REGEX);
    private static final String LATIN_REGEX = "[\\u0080-\\u024F]";
    private static final Pattern LATIN_REGEX_PATTERN = Pattern.compile(LATIN_REGEX);
    private static final String INVALID_URL_REGEX = "^(mailto|about|geo|magnet|file|sftp|irc|data|javascript|sms|rtsp|wss|ws|telnet|tel|ftp):.*$";
    private static final Pattern INVALID_URL_REGEX_PATTERN = Pattern.compile(INVALID_URL_REGEX);
    private static final String JSON_STRING_REGEX = "^```json|```$";
    private static final Pattern JSON_STRING_REGEX_PATTERN = Pattern.compile(JSON_STRING_REGEX);
    private static final String JSON_REGEX = "\\{[^{}]*\\}";
    private static final Pattern JSON_REGEX_PATTERN = Pattern.compile(JSON_REGEX);

    public static String removeScriptComments(String str) {
        return SCRIPT_COMMENTS_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING);
    }

    public static boolean hasMultiLanguage(String str) {
        return URL_COUNTRY_AND_LANGUAGE_PATTERN.matcher(str).find();
    }

    public static String removeMultiLanguageFolder(String str) {
        return URL_COUNTRY_AND_LANGUAGE_PATTERN.matcher(str).replaceAll("/");
    }

    public static boolean hasEnglishLanguage(String str) {
        return URL_EN_LANGUAGE_PATTERN.matcher(str).find();
    }

    public static URL getURL(String str) throws MalformedURLException {
        if (!str.startsWith("http://") && !str.startsWith("https://")) {
            if (!str.contains("http://") && !str.contains("https://")) {
                str = "http://" + str;
            } else if (str.contains("https://")) {
                str = str.replaceAll(".*https://", "https://");
            } else if (str.contains("http://")) {
                str = str.replaceAll(".*http://", "http://");
            }
        }
        return new URL(str);
    }

    public static String getBrTag() {
        return BR_TAG_REGEX;
    }

    public static Pattern getUrlFilterPattern() {
        return URL_FILTER__PATTERN;
    }

    public static String replaceMultipleNewLineWithSingleNewLine(String str) {
        return MULTIPLE_NEW_LINE_REGEX_PATTERN.matcher(str).replaceAll("\n");
    }

    public static String constructUsernameFromEmailUsername(String str) {
        String[] splitNonAlphabetChar = splitNonAlphabetChar(replaceNumericChar(str, AutomationConstants.SPACE));
        StringBuilder sb = new StringBuilder();
        for (String str2 : splitNonAlphabetChar) {
            if (str2.length() > 0) {
                if (sb.length() > 0) {
                    sb.append(AutomationConstants.SPACE);
                }
                sb.append(str2);
            }
        }
        return sb.toString();
    }

    public static String constructUrlWithHttpProtocol(String str) {
        if (!str.contains("http://") && !str.contains("https://")) {
            str = "http://" + str;
        }
        if (str.contains("https://")) {
            str = HTTPS_PROTOCOL_PATTERN.matcher(str).replaceFirst("http://");
        }
        return str;
    }

    public static String constructUrlWithProtocol(String str, boolean z) {
        if (!str.contains("http://") && !str.contains("https://")) {
            str = "http://" + str;
        }
        if (!ScrapyUtil.hasUrlSubDomain(str)) {
            Object obj = AutomationConstants.EMPTY_STRING;
            if (z) {
                obj = CrawlProcessConstants.WWW_DOT;
            }
            str = str.contains("https://") ? HTTPS_PROTOCOL_PATTERN.matcher(str).replaceFirst("https://" + obj) : HTTP_PROTOCOL_PATTERN.matcher(str).replaceFirst("http://" + obj);
        }
        return str;
    }

    public static String appendHttpProtocol(String str) {
        return (str.contains("http://") || str.contains("https://")) ? str : "http://" + str;
    }

    public static String extractUsernameFromEmail(String str) {
        return constructUsernameFromEmailUsername(splitAtSign(str)[0]);
    }

    public static String removeAccentedLetters(String str) {
        return ACCENTED_LETTERS_PATTERN.matcher(Normalizer.normalize(str, Normalizer.Form.NFD)).replaceAll(AutomationConstants.EMPTY_STRING);
    }

    public static String removeNonAsciiChars(String str) {
        return NON_ASCII_CHARS_PATTERN.matcher(str).replaceAll(AutomationConstants.SPACE);
    }

    public static String removeDotAtEnd(String str) {
        return LASTDOT_REGEX_PATTERN.matcher(str).replaceFirst(AutomationConstants.EMPTY_STRING);
    }

    public static String removeSpecialCharsAtStartEnd(String str) {
        return PREFIXSUFIX_SPECIALCHARS_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING);
    }

    public static String replaceNonAlphabetChar(String str, String str2) {
        return NON_ALPHABET_CHARACTERS_REGEX_PATTERN.matcher(str).replaceAll(str2).trim();
    }

    public static String replaceNumericChar(String str, String str2) {
        return NUMERIC_CHARACTERS_REGEX_PATTERN.matcher(str).replaceAll(str2).trim();
    }

    public static String replaceNonNumericChar(String str) {
        return NON_NUMERIC_CHARACTERS_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING).trim();
    }

    public static String replaceNonNumericDotChar(String str) {
        return NON_NUMERIC_DOT_CHARACTERS_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING).trim();
    }

    public static String replaceNonAlphaNumericChar(String str) {
        return NON_ALPHANUMERIC_CHARACTERS_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING).trim();
    }

    public static String replaceNonAlphaNumericChar(String str, String str2) {
        return NON_ALPHANUMERIC_CHARACTERS_REGEX_PATTERN.matcher(str).replaceAll(str2).trim();
    }

    public static String replaceNonAlphaNumericCharWithSpace(String str) {
        return NON_ALPHANUMERIC_CHARACTERS_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.SPACE).trim();
    }

    public static String replaceNonDomainChar(String str) {
        return NON_DOMAIN_CHARACTERS_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING).trim();
    }

    public static String replaceSlash(String str, String str2) {
        return SLASH_REGEX_PATTERN.matcher(str).replaceAll(str2).trim();
    }

    public static String replaceSlashAtEnd(String str) {
        return SLASH_AT_END_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING).trim();
    }

    public static String replaceHashAtEnd(String str) {
        return HASH_AT_END_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING).trim();
    }

    public static String replaceHyphen(String str) {
        return HYPHEN_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING).trim();
    }

    public static String replaceSubDomainWWW(String str) {
        return SUB_DOMAIN_WWW_WITH_DOT_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING).trim();
    }

    public static String replaceDoubleQuote(String str) {
        return DOUBLE_QUOTE_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING).trim();
    }

    public static String replaceSquareBrackets(String str) {
        return SQUARE_BRACKETS_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING).trim();
    }

    public static String replaceTrailingWildcardPattern(String str) {
        return TRAILING_WILDCARD_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING);
    }

    public static String replaceNewLine(String str, String str2) {
        return NEW_LINE_REGEX_PATTERN.matcher(str).replaceAll(str2);
    }

    public static String replaceSpacePlus(String str, String str2) {
        return SPACE_PLUS_REGEX_PATTERN.matcher(str).replaceAll(str2);
    }

    public static String replaceHtmlTags(String str) {
        return HTML_TAGS_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING);
    }

    public static String replaceAtSign(String str, String str2) {
        return AT_SIGN_REGEX_PATTERN.matcher(str).replaceAll(str2);
    }

    public static String replaceAmpSign(String str, String str2) {
        return AMP_SIGN_REGEX_PATTERN.matcher(str).replaceAll(str2);
    }

    public static String replaceEmEnDash(String str, String str2) {
        return EM_EN_DASH_REGEX_PATTERN.matcher(str).replaceAll(str2);
    }

    public static String replaceUrlProtocol(String str) {
        return URL_PROTOCOL_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING);
    }

    public static String replaceSpace(String str) {
        return replaceSpace(str, AutomationConstants.EMPTY_STRING);
    }

    public static String replaceSpace(String str, String str2) {
        return SPACE_REGEX_PATTERN.matcher(str).replaceAll(str2).trim();
    }

    public static String escapeSpecialRegexChars(String str) {
        return REGEX_WILDCARD_CHARS.matcher(str).replaceAll("\\\\$0");
    }

    public static String[] splitNonAlphabetChar(String str) {
        return str.split(NON_ALPHABET_CHARACTERS_REGEX);
    }

    public static String[] splitNonAlphabetNumericChar(String str) {
        return str.split(NON_ALPHANUMERIC_CHARACTERS_REGEX);
    }

    public static String[] splitDot(String str) {
        return str.split(DOT_REGEX);
    }

    public static String[] splitDot(String str, int i) {
        return str.split(DOT_REGEX, i);
    }

    public static String[] splitSpace(String str) {
        return str.split(SPACE_REGEX);
    }

    public static String[] splitAtSign(String str) {
        return str.split("@");
    }

    public static String[] splitComma(String str) {
        return str.split(",");
    }

    public static String[] splitCommaSpace(String str) {
        return str.split(", ");
    }

    public static String[] splitHyphen(String str) {
        return str.split("-");
    }

    public static String[] splitSourceUrlDelimiter(String str) {
        return str.split("~#~");
    }

    public static String[] splitSlash(String str) {
        return str.split(SLASH_REGEX);
    }

    public static String[] splitBrTag(String str) {
        return str.split(BR_TAG_REGEX);
    }

    public static String[] splitCSVLine(String str) {
        return str.split(CSV_LINE_SPLIT_REGEX, -1);
    }

    public static String[] splitLineEnd(String str) {
        return str.split(NEW_LINE_REGEX);
    }

    public static String[] splitbyCamalCase(String str) {
        return str.split(CAMALCASE_REGEX);
    }

    public static Matcher getThatsThem1PatternMatcher(String str) {
        return THATSTHEM_1_PATTERN.matcher(str);
    }

    public static Matcher getThatsThem2PatternMatcher(String str) {
        return THATSTHEM_2_PATTERN.matcher(str);
    }

    public static Matcher getThatsThem3PatternMatcher(String str) {
        return THATSTHEM_3_PATTERN.matcher(str);
    }

    public static Matcher getScriptPatternMatcher(String str) {
        return SCRIPT_PATTERN.matcher(str);
    }

    public static Matcher getDesignationPatternMatcher(String str) {
        return DESIGNATION_PATTERN.matcher(str);
    }

    public static Matcher getLeadTagPatternMatcher(String str) {
        return LEADTAG_PATTERN.matcher(str);
    }

    public static Matcher getUrlDecoderMatcher(String str) {
        return URL_DECODER_VALID_PATTERN.matcher(str);
    }

    public static Matcher getUrlDecoderAllUTFMatcher(String str) {
        return URL_DECODER_ALL_UTF_VALID_PATTERN.matcher(str);
    }

    public static Matcher getUrlDecoderAccentedLettersMatcher(String str) {
        return URL_DECODER_VALID_ACCENTED_LETTERS_PATTERN.matcher(str);
    }

    public static Matcher getAlphaNumCharMatcher(String str) {
        return ALPHA_NUMERIC_CHARACTERS_PATTERN.matcher(str);
    }

    public static Matcher getNonAlphaNumCharMatcher(String str) {
        return NON_ALPHANUMERIC_CHARACTERS_REGEX_PATTERN.matcher(str);
    }

    public static Matcher getIsSiteActiveDOCMatcher(String str) {
        return ISSITEACTIVE_DOCPATTERN.matcher(str);
    }

    public static Matcher getIsSiteActiveTitleMatcher(String str) {
        return ISSITEACTIVE_TITLEPATTERN.matcher(str);
    }

    public static Matcher getParkingDomainDocMatcher(String str) {
        return PARKINGDOMAIN_DOCPATTERN.matcher(str);
    }

    public static Matcher getParkingDomainTitleMatcher(String str) {
        return PARKINGDOMAIN_TITLEPATTERN.matcher(str);
    }

    public static Matcher getLastFile(String str) {
        return URL_LAST_FILE_PATTERN.matcher(str);
    }

    public static Matcher getURLMatcher(String str, boolean z) {
        return z ? URLREGEX_PATTERN_WITH_SPACE.matcher(str) : URLREGEX_PATTERN.matcher(str);
    }

    public static Matcher getNumericCharPatternMatcher(String str) {
        return NUMERIC_CHARACTERS_REGEX_PATTERN.matcher(str);
    }

    public static Matcher getPeopleNameTagReplacePatternMatcher(String str) {
        return PEOPLE_NAMETAG_REPLACE_PATTERN.matcher(str);
    }

    public static Matcher getConsecutiveDoubleQuoteMatcher(String str) {
        return CONSECUTIVE_DOUBLE_QUOTE_PATTERN.matcher(str);
    }

    public static Matcher getDoubleQuoteMatcher(String str) {
        return DOUBLE_QUOTE_PATTERN.matcher(str);
    }

    public static Matcher getDotMatcher(String str) {
        return DOT_REGEX_PATTERN.matcher(str);
    }

    public static String getValidDomainsAlloweded(String str) {
        StringBuilder sb = null;
        for (String str2 : str.split(",")) {
            if (!DOMAINS_URL_PATTERN.matcher(str2).matches()) {
                return null;
            }
            if (sb == null) {
                sb = new StringBuilder();
            } else {
                sb.append(",");
            }
            sb.append(str2);
        }
        if (sb != null) {
            return sb.toString();
        }
        return null;
    }

    public static String getValidDomainsAvoided(String str) {
        StringBuilder sb = null;
        for (String str2 : str.split(",")) {
            if (!DOMAINS_URL_PATTERN.matcher(str2).matches()) {
                return null;
            }
            if (sb == null) {
                sb = new StringBuilder();
            } else {
                sb.append(",");
            }
            sb.append(str2);
        }
        if (sb != null) {
            return sb.toString();
        }
        return null;
    }

    public static boolean isDomainPatternMatches(String str) {
        return DOMAIN_PATTERN.matcher(str).matches();
    }

    public static boolean isValidDomain(String str) {
        return VALID_DOMAIN_REGEX_PATTERN.matcher(str).matches();
    }

    public static boolean isUrlFilterPatternMatches(String str) {
        return URL_FILTER__PATTERN.matcher(str).matches();
    }

    public static boolean isTagRestictedPatternMatches(String str) {
        return TAGS_RESTRICTED_PATTERN.matcher(str).matches();
    }

    public static boolean isValidLeadName(String str) {
        return LEADNAME_VALID_REGEX_PATTERN.matcher(str).matches();
    }

    public static boolean isStringContainsDesignation(String str) {
        return DESIGNATION_PATTERN.matcher(str).find();
    }

    public static boolean isValidWebsite(String str) {
        return WEBSITE_VALID_REGEX_PATTERN.matcher(str).matches();
    }

    public static boolean isValidWebsitePattern(String str) {
        return WEBSITE_PATTERN_VALID_REGEX_PATTERN.matcher(str).matches();
    }

    public static boolean isNumericCharPatternMatches(String str) {
        return NUMERIC_CHARACTERS_REGEX_PATTERN.matcher(str).matches();
    }

    public static boolean isImgUrl(String str) {
        return IMGURLREGEX_PATTERN.matcher(str).find();
    }

    public static boolean isContainPortOrIp(String str) {
        return IP_ADDRESS_AND_PORT_PATTERN.matcher(str).matches();
    }

    public static boolean isUrlContactsPatternMatches(String str) {
        return URL_CONTACTS_PATTERN.matcher(str).find();
    }

    public static boolean isUrlSubDomainContactsPatternMatches(String str) {
        return URL_SUBDOMAIN_CONTACTS_PATTERN.matcher(str).find();
    }

    public static boolean isUrlContactPatternMatches(String str) {
        return URL_CONTACT_PATTERN.matcher(str).find();
    }

    public static boolean isUrlSupportPatternMatches(String str) {
        return URL_SUPPORT_PATTERN.matcher(str).find();
    }

    public static boolean isUrlAboutPatternMatches(String str) {
        return URL_ABOUT_PATTERN.matcher(str).find();
    }

    public static boolean isUrlLocationPatternMatches(String str) {
        return URL_LOCATION_PATTERN.matcher(str).find();
    }

    public static boolean isUrlPrivacyPatternMatches(String str) {
        return URL_PRIVACY_PATTERN.matcher(str).find();
    }

    public static boolean isAnchorPatternMatch(String str) {
        return ANCHOR_VISIT_REGEX_PATTERN.matcher(str).find();
    }

    public static boolean isAnchorAboutPatternMatch(String str) {
        return ANCHOR_ABOUT_REGEX_PATTERN.matcher(str).find();
    }

    public static boolean isUrlAboutPatternMatch(String str) {
        return URL_ABOUT_REGEX_PATTERN.matcher(str).find();
    }

    public static boolean isAnchorAboutPartialMatch(String str) {
        return ANCHOR_ABOUT_REGEX_PARTIAL_PATTERN.matcher(str).find();
    }

    public static boolean isUrlAboutPartialMatch(String str) {
        return URL_ABOUT_REGEX_PARTIAL_PATTERN.matcher(str).find();
    }

    public static String contactFolderMatches(String str) {
        String str2 = null;
        Matcher matcher = URL_CONTACT_FOLDER_PATTERN.matcher(str);
        if (matcher.find()) {
            str2 = matcher.group();
        }
        return str2;
    }

    public static String getUrlContactFolder(String str) {
        String str2 = null;
        Matcher matcher = URL_CONTACT_FOLDER_PATTERN.matcher(str);
        if (matcher.find()) {
            str2 = matcher.group();
        }
        return str2 != null ? str2 : getUrlFolder(str);
    }

    public static String getUrlFolder(String str) {
        String str2 = null;
        Matcher matcher = FOLDER_PATTERN.matcher(str);
        if (matcher.find()) {
            str2 = matcher.group();
        }
        return str2;
    }

    public static String getUrlExtensionFromPath(String str) {
        String str2 = null;
        Matcher matcher = EXTENSION_IN_URL_PATH_PATTERN.matcher(str);
        if (matcher.find()) {
            str2 = matcher.group().substring(1);
        }
        return str2;
    }

    public static String getUrlDoubleExtensionFromPath(String str) {
        String str2 = null;
        Matcher matcher = DOUBLE_EXTENSION_IN_URL_PATH_PATTERN.matcher(str);
        if (matcher.find()) {
            str2 = matcher.group().substring(1);
        }
        return str2;
    }

    public static boolean hasAlphaNumericChars(String str) {
        return ALPHA_NUMERIC_CHARACTERS_PATTERN.matcher(str).find();
    }

    public static boolean hasNumericChars(String str) {
        return NUMERIC_CHARACTERS_REGEX_PATTERN.matcher(str).find();
    }

    public static boolean hasAlphabetChars(String str) {
        return ALPHABET_CHARACTERS_REGEX_PATTERN.matcher(str).find();
    }

    public static boolean hasStartWords(String str) {
        return START_WORDS_PATTERN.matcher(str).find();
    }

    public static boolean hasEndWords(String str) {
        return END_WORDS_PATTERN.matcher(str).find();
    }

    public static boolean isEUTLDMatch(String str) {
        return EU_TLD_PATTERN.matcher(str).find();
    }

    public static boolean isPersonNameContainsSpecialChars(String str) {
        return PEOPLENAME_PATTERN.matcher(str).find();
    }

    public static boolean isDesignationInvalid(String str) {
        return INVALID_DESIGNATION_PATTERN.matcher(str).find();
    }

    public static Pattern compilePattern(String str) {
        return Pattern.compile(str);
    }

    public static boolean isRegexValid(String str) {
        try {
            Pattern.compile(str);
            return true;
        } catch (PatternSyntaxException e) {
            return false;
        }
    }

    public static String removeUrlQueryParams(String str) {
        return QUERYPARAMS_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING);
    }

    public static String removeUrlQueryAndReferenceParams(String str) {
        return QUERY_AND_REFERENCE_PARM_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING);
    }

    public static boolean isContainPort(String str) {
        return PORT_PATTERN.matcher(str).find();
    }

    public static boolean isContainIP(String str) {
        return IP_ADDRESS_PATTERN.matcher(str).find();
    }

    public static boolean isAlphaSpaceNum(String str) {
        return ALPHA_SPACE_NUM_REGEX_PATTERN.matcher(str).find();
    }

    public static String getIP(String str) {
        Matcher matcher = IP_ADDRESS_PATTERN.matcher(str);
        if (matcher.find()) {
            return matcher.group();
        }
        return null;
    }

    public static boolean validateEmail(String str) {
        return Pattern.compile("^[a-zA-Z0-9_+&*-]+(?:\\.[a-zA-Z0-9_+&*-]+)*@(?:[a-zA-Z0-9-]+\\.)+[a-zA-Z]{2,7}$").matcher(str).matches();
    }

    public static String getNumWithComma(String str) {
        String str2 = null;
        Matcher matcher = NUM_WITH_COMMA.matcher(str);
        if (matcher.find()) {
            str2 = matcher.group();
        }
        return str2;
    }

    public static String removeLatin(String str) {
        return LATIN_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING);
    }

    public static boolean isValidURL(String str) {
        if (str.startsWith("blob:")) {
            str = str.replace("blob:", AutomationConstants.EMPTY_STRING);
        }
        return (isInvalidURL(str) || str.startsWith("#") || str.isEmpty()) ? false : true;
    }

    public static boolean isInvalidURL(String str) {
        return INVALID_URL_REGEX_PATTERN.matcher(str).matches();
    }

    public static String getValidJSONString(String str) {
        return JSON_STRING_REGEX_PATTERN.matcher(str).replaceAll(AutomationConstants.EMPTY_STRING);
    }

    public static JSONObject getValidJSON(String str) {
        JSONObject jSONObject = null;
        Matcher matcher = JSON_REGEX_PATTERN.matcher(str);
        if (matcher.find()) {
            jSONObject = new JSONObject(matcher.group());
        }
        return jSONObject;
    }
}
