diff --git a/.github/workflows/data-update.yml b/.github/workflows/data-update.yml index 9d22bcc94..4deca2ffc 100644 --- a/.github/workflows/data-update.yml +++ b/.github/workflows/data-update.yml @@ -33,7 +33,6 @@ jobs: - update_engine_traits.py - update_wikidata_units.py - update_engine_descriptions.py - - update_tracker_patterns.py permissions: contents: write diff --git a/searx/cache.py b/searx/cache.py index 7ba5c8886..16386838f 100644 --- a/searx/cache.py +++ b/searx/cache.py @@ -10,6 +10,7 @@ from __future__ import annotations __all__ = ["ExpireCacheCfg", "ExpireCacheStats", "ExpireCache", "ExpireCacheSQLite"] import abc +from collections.abc import Iterator import dataclasses import datetime import hashlib @@ -396,6 +397,20 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache): return self.deserialize(row[0]) + def pairs(self, ctx: str) -> Iterator[tuple[str, typing.Any]]: + """Iterate over key/value pairs from table given by argument ``ctx``. + If ``ctx`` argument is ``None`` (the default), a table name is + generated from the :py:obj:`ExpireCacheCfg.name`.""" + table = ctx + self.maintenance() + + if not table: + table = self.normalize_name(self.cfg.name) + + if table in self.table_names: + for row in self.DB.execute(f"SELECT key, value FROM {table}"): + yield row[0], self.deserialize(row[1]) + def state(self) -> ExpireCacheStats: cached_items = {} for table in self.table_names: diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 5a859f8cd..d43879910 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -13,6 +13,7 @@ import typing from .core import log, data_dir from .currencies import CurrenciesDB +from .tracker_patterns import TrackerPatternsDB CURRENCIES: CurrenciesDB USER_AGENTS: dict[str, typing.Any] @@ -23,7 +24,7 @@ OSM_KEYS_TAGS: dict[str, typing.Any] ENGINE_DESCRIPTIONS: dict[str, typing.Any] ENGINE_TRAITS: dict[str, typing.Any] LOCALES: dict[str, typing.Any] -TRACKER_PATTERNS: list[dict[str, typing.Any]] +TRACKER_PATTERNS: TrackerPatternsDB lazy_globals = { "CURRENCIES": CurrenciesDB(), @@ -35,7 +36,7 @@ lazy_globals = { "ENGINE_DESCRIPTIONS": None, "ENGINE_TRAITS": None, "LOCALES": None, - "TRACKER_PATTERNS": None, + "TRACKER_PATTERNS": TrackerPatternsDB(), } data_json_files = { @@ -47,7 +48,6 @@ data_json_files = { "ENGINE_DESCRIPTIONS": "engine_descriptions.json", "ENGINE_TRAITS": "engine_traits.json", "LOCALES": "locales.json", - "TRACKER_PATTERNS": "tracker_patterns.json", } diff --git a/searx/data/tracker_patterns.json b/searx/data/tracker_patterns.json deleted file mode 100644 index f5e685add..000000000 --- a/searx/data/tracker_patterns.json +++ /dev/null @@ -1,1985 +0,0 @@ -[ - { - "exceptions": [ - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon(?:\\.[a-z]{2,}){1,}\\/gp\\/.*?(?:redirector.html|cart\\/ajax-update.html|video\\/api\\/)", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon(?:\\.[a-z]{2,}){1,}\\/(?:hz\\/reviews-render\\/ajax\\/|message-us\\?|s\\?)" - ], - "trackerParams": [ - "p[fd]_rd_[a-z]*", - "qid", - "srs?", - "__mk_[a-z]{1,3}_[a-z]{1,3}", - "spIA", - "ms3_c", - "[a-z%0-9]*ie", - "refRID", - "colii?d", - "[^a-z%0-9]adId", - "qualifier", - "_encoding", - "smid", - "field-lbr_brands_browse-bin", - "ref_?", - "th", - "sprefix", - "crid", - "keywords", - "cv_ct_[a-z]+", - "linkCode", - "creativeASIN", - "ascsubtag", - "aaxitk", - "hsa_cr_id", - "sb-ci-[a-z]+", - "rnid", - "dchild", - "camp", - "creative", - "s", - "content-id", - "dib", - "dib_tag", - "social_share", - "starsLeft", - "skipTwisterOG" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [], - "trackerParams": [ - "cvid", - "ocid" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?msn\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "p[fd]_rd_[a-z]*", - "qid", - "srs?", - "__mk_[a-z]{1,3}_[a-z]{1,3}", - "spIA", - "ms3_c", - "[a-z%0-9]*ie", - "refRID", - "colii?d", - "[^a-z%0-9]adId", - "qualifier", - "_encoding", - "smid", - "field-lbr_brands_browse-bin", - "ref_?", - "th", - "sprefix", - "crid", - "cv_ct_[a-z]+", - "linkCode", - "creativeASIN", - "ascsubtag", - "aaxitk", - "hsa_cr_id", - "sb-ci-[a-z]+", - "rnid", - "dchild", - "camp", - "creative" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon(?:\\.[a-z]{2,}){1,}\\/s\\?" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?fls-na\\.amazon(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [ - "^https?:\\/\\/mail\\.google\\.com\\/mail\\/u\\/", - "^https?:\\/\\/accounts\\.google\\.com\\/o\\/oauth2\\/", - "^https?:\\/\\/accounts\\.google\\.com\\/signin\\/oauth\\/", - "^https?:\\/\\/(?:docs|accounts)\\.google(?:\\.[a-z]{2,}){1,}", - "^https?:\\/\\/([a-z0-9-\\.])*(chat|drive)\\.google\\.com\\/videoplayback", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}(?:\\/upload)?\\/drive\\/", - "^https?:\\/\\/news\\.google\\.com.*\\?hl=.", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}\\/s\\?tbm=map.*?gs_[a-z]*=.", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}\\/(?:complete\\/search|setprefs|searchbyimage)", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}\\/(?:appsactivity|aclk\\?)", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}\\/safe[-]?browsing\\/([^&]+)" - ], - "trackerParams": [ - "ved", - "bi[a-z]*", - "gfe_[a-z]*", - "ei", - "source", - "gs_[a-z]*", - "site", - "oq", - "esrc", - "uact", - "cd", - "cad", - "gws_[a-z]*", - "atyp", - "vet", - "_u", - "je", - "dcr", - "ie", - "sei", - "sa", - "dpr", - "btn[a-z]*", - "usg", - "cd", - "cad", - "uact", - "aqs", - "sourceid", - "sxsrf", - "rlz", - "i-would-rather-use-firefox", - "pcampaignid", - "sca_(?:esv|upv)", - "iflsig", - "fbs", - "ictx" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?googlesyndication\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?doubleclick(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?googleadservices\\.com" - }, - { - "exceptions": [ - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?matrix\\.org\\/_matrix\\/", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?(?:cloudflare\\.com|prismic\\.io|tangerine\\.ca|gitlab\\.com)", - "^https?:\\/\\/myaccount.google(?:\\.[a-z]{2,}){1,}", - "^https?:\\/\\/accounts.google(?:\\.[a-z]{2,}){1,}", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?gcsip\\.(?:com|nl)[^?]*\\?.*?&?ref_?=.", - "^https?:\\/\\/[^/]+/[^/]+/[^/]+\\/-\\/refs\\/switch[^?]*\\?.*?&?ref_?=.", - "^https?:\\/\\/bugtracker\\.[^/]*\\/[^?]+\\?.*?&?ref_?=[^/?&]*", - "^https?:\\/\\/comment-cdn\\.9gag\\.com\\/.*?comment-list.json\\?", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?battle\\.net\\/login", - "^https?:\\/\\/blizzard\\.com\\/oauth2", - "^https?:\\/\\/kreditkarten-banking\\.lbb\\.de", - "^https?:\\/\\/www\\.tinkoff\\.ru", - "^https?:\\/\\/www\\.cyberport\\.de\\/adscript\\.php", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tweakers\\.net\\/ext\\/lt\\.dsp\\?.*?(?:%3F)?&?ref_?=.", - "^https?:\\/\\/git(lab)?\\.[^/]*\\/[^?]+\\?.*?&?ref_?=[^/?&]*", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon(?:\\.[a-z]{2,}){1,}\\/message-us\\?", - "^https?:\\/\\/authorization\\.td\\.com", - "^https?:\\/\\/support\\.steampowered\\.com", - "^https?:\\/\\/privacy\\.vakmedianet\\.nl\\/.*?ref=", - "^https?:\\/\\/sso\\.serverplan\\.com\\/manage2fa\\/check\\?ref=", - "^https?:\\/\\/login\\.meijer\\.com\\/.*?\\?ref=", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/(?:login_alerts|ajax|should_add_browser)/", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/groups\\/member_bio\\/bio_dialog\\/", - "^https?:\\/\\/api\\.taiga\\.io", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?gog\\.com\\/click\\.html", - "^https?:\\/\\/login\\.progressive\\.com", - "^https?:\\/\\/www\\.sephora\\.com\\/api\\/", - "^https?:\\/\\/www\\.contestgirl\\.com", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?agenciatributaria\\.gob\\.es", - "^https?:\\/\\/login\\.ingbank\\.pl", - "^wss?:\\/\\/(?:[a-z0-9-]+\\.)*?zoom\\.us", - "^https?:\\/\\/api\\.bilibili\\.com", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?onet\\.pl\\/[^?]*\\?.*?utm_campaign=.", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?stripe\\.com\\/[^?]+.*?&?referrer=[^/?&]*", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?lichess\\.org\\/login.*?&?referrer=.*?", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?microsoft\\.com\\/.*?research\\/redirect", - "^https?:\\/\\/like.co\\/api\\/like\\/likebutton\\/[^?]+.*?&?referrer=[^/?&]*", - "^https?:\\/\\/button.like.co\\/in\\/.*?&?referrer=[^/?&]*", - "^https?:\\/\\/www\\.mma\\.go\\.kr", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?github\\.com", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?billiger\\.de\\/.*?mc=", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?\\.youtrack\\.cloud", - "^https?:\\/\\/cu\\.bankid\\.com", - "^https?:\\/\\/login\\.aliexpress\\.us" - ], - "trackerParams": [ - "(?:%3F)?utm(?:_[a-z_]*)?", - "(?:%3F)?ga_[a-z_]+", - "(?:%3F)?yclid", - "(?:%3F)?_openstat", - "(?:%3F)?fb_action_(?:types|ids)", - "(?:%3F)?fb_(?:source|ref)", - "(?:%3F)?fbclid", - "(?:%3F)?action_(?:object|type|ref)_map", - "(?:%3F)?gs_l", - "(?:%3F)?mkt_tok", - "(?:%3F)?hmb_(?:campaign|medium|source)", - "(?:%3F)?gclid", - "(?:%3F)?srsltid", - "(?:%3F)?otm_[a-z_]*", - "(?:%3F)?cmpid", - "(?:%3F)?os_ehash", - "(?:%3F)?_ga", - "(?:%3F)?_gl", - "(?:%3F)?__twitter_impression", - "(?:%3F)?wt_?z?mc", - "(?:%3F)?wtrid", - "(?:%3F)?[a-z]?mc", - "(?:%3F)?dclid", - "Echobox", - "(?:%3F)?spm", - "(?:%3F)?vn(?:_[a-z]*)+", - "(?:%3F)?tracking_source", - "(?:%3F)?ceneo_spo", - "(?:%3F)?itm_(?:campaign|medium|source)", - "(?:%3F)?__hsfp", - "(?:%3F)?__hssc", - "(?:%3F)?__hstc", - "(?:%3F)?_hsenc", - "(?:%3F)?__s", - "(?:%3F)?hsCtaTracking", - "(?:%3F)?mc_(?:eid|cid|tc)", - "(?:%3F)?ml_subscriber", - "(?:%3F)?ml_subscriber_hash", - "(?:%3F)?msclkid", - "(?:%3F)?oly_anon_id", - "(?:%3F)?oly_enc_id", - "(?:%3F)?rb_clickid", - "(?:%3F)?s_cid", - "(?:%3F)?vero_conv", - "(?:%3F)?vero_id", - "(?:%3F)?wickedid", - "(?:%3F)?twclid" - ], - "urlPattern": ".*" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?adtech(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bf-ad(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [ - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon-adsystem(?:\\.[a-z]{2,}){1,}\\/v3\\/oor\\?" - ], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon-adsystem(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?adsensecustomsearchads(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [ - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?youtube\\.com\\/signin\\?.*?" - ], - "trackerParams": [ - "feature", - "gclid", - "kw", - "si", - "pp" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?(youtube\\.com|youtu\\.be)" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?youtube\\.com\\/pagead" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?youtube\\.com\\/api\\/stats\\/ads" - }, - { - "exceptions": [ - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/.*?(plugins|ajax)\\/", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/dialog\\/(?:share|send)", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/groups\\/member_bio\\/bio_dialog\\/", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/photo\\.php\\?", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/privacy\\/specific_audience_selector_dialog\\/", - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/photo\\/download\\/" - ], - "trackerParams": [ - "hc_[a-z_%\\[\\]0-9]*", - "[a-z]*ref[a-z]*", - "__tn__", - "eid", - "__(?:xts|cft)__(?:\\[|%5B)\\d(?:\\]|%5D)", - "comment_tracking", - "dti", - "app", - "video_source", - "ftentidentifier", - "pageid", - "padding", - "ls_ref", - "action_history", - "tracking", - "referral_code", - "referral_story_type", - "eav", - "sfnsn", - "idorvanity", - "wtsid", - "rdc", - "rdr", - "paipv", - "_nc_x", - "_rdr", - "mibextid" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com" - }, - { - "exceptions": [ - "^https?:\\/\\/twitter.com\\/i\\/redirect" - ], - "trackerParams": [ - "(?:ref_?)?src", - "s", - "cn", - "ref_url", - "t" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?twitter.com" - }, - { - "exceptions": [ - "^https?:\\/\\/x.com\\/i\\/redirect" - ], - "trackerParams": [ - "(?:ref_?)?src", - "s", - "cn", - "ref_url", - "t" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?x.com" - }, - { - "exceptions": [], - "trackerParams": [ - "%24deep_link", - "\\$deep_link", - "correlation_id", - "ref_campaign", - "ref_source", - "%243p", - "rdt", - "\\$3p", - "%24original_url", - "\\$original_url", - "_branch_match_id", - "share_id" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?reddit.com" - }, - { - "exceptions": [], - "trackerParams": [ - "trackId", - "tctx", - "jb[a-z]*?" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?netflix.com" - }, - { - "exceptions": [], - "trackerParams": [ - "ncid", - "sr", - "sr_share", - "guccounter", - "guce_referrer", - "guce_referrer_sig" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?techcrunch\\.com" - }, - { - "exceptions": [ - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bing(?:\\.[a-z]{2,}){1,}\\/WS\\/redirect\\/" - ], - "trackerParams": [ - "cvid", - "form", - "sk", - "sp", - "sc", - "qs", - "qp" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bing(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [], - "trackerParams": [ - "nb", - "u" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tweakers\\.net" - }, - { - "exceptions": [], - "trackerParams": [ - "tt_medium", - "tt_content" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?twitch\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "pk_campaign", - "pk_kwd" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?vivaldi\\.com" - }, - { - "exceptions": [ - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?indeed\\.com\\/rc\\/clk" - ], - "trackerParams": [ - "from", - "alid", - "[a-z]*tk" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?indeed\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "vss", - "t", - "swnt", - "grpos", - "ptl", - "stl", - "exp", - "plim" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?hh\\.ru" - }, - { - "exceptions": [], - "trackerParams": [ - "_trkparms", - "_trksid", - "_from", - "hash" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?ebay(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [], - "trackerParams": [ - "ftag" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?cnet\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "ref_", - "pf_rd_[a-z]*" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?imdb\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?govdelivery\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "u1", - "ath[a-z]*" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?walmart\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "pl" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?net\\-parade\\.it" - }, - { - "exceptions": [], - "trackerParams": [ - "xid" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?prvnizpravy\\.cz" - }, - { - "exceptions": [], - "trackerParams": [ - "tpa" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?youku\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "smid" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?nytimes\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "wbdcd" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tchibo\\.de" - }, - { - "exceptions": [], - "trackerParams": [ - "snr" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?steampowered\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?steamcommunity\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "https?:\\/\\/outgoing\\.prod\\.mozaws\\.net\\/" - }, - { - "exceptions": [], - "trackerParams": [ - "src" - ], - "urlPattern": "https?:\\/\\/([a-z0-9-.]*\\.)shutterstock\\.com" - }, - { - "exceptions": [ - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mozilla.org\\/api" - ], - "trackerParams": [ - "src", - "platform", - "redirect_source" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mozilla\\.org" - }, - { - "exceptions": [], - "trackerParams": [ - "ref" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?readdc\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "email" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?dailycodingproblem\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "email_token", - "email_source" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?github\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?deviantart\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?site2\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?site\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?site3\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "ws_ab_test", - "btsid", - "algo_expid", - "algo_pvid", - "gps-id", - "scm[_a-z-]*", - "cv", - "af", - "mall_affr", - "sk", - "dp", - "terminal_id", - "aff_request_id" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?aliexpress(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [], - "trackerParams": [ - "sid" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mozillazine\\.org" - }, - { - "exceptions": [ - "^https?:\\/\\/comment-cdn\\.9gag\\.com\\/.*?comment-list.json\\?" - ], - "trackerParams": [ - "ref" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?9gag\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?linksynergy\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "ref" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?giphy\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?gate\\.sc" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/vk\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "ref_?" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?woot\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "_requestid", - "cid", - "dl", - "di", - "sd", - "bi" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?vitamix\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?curseforge\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?messenger\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "__twitter_impression" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?nypost\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "partner" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?ozon\\.ru" - }, - { - "exceptions": [], - "trackerParams": [ - "link_id", - "can_id", - "source", - "email_referrer", - "email_subject" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?norml\\.org" - }, - { - "exceptions": [], - "trackerParams": [ - "refId", - "trk", - "li[a-z]{2}", - "trackingId" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?linkedin\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "u" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?linkedin\\.com\\/learning" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?smartredirect\\.de" - }, - { - "exceptions": [], - "trackerParams": [ - "b" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?spiegel\\.de" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?rutracker\\.org" - }, - { - "exceptions": [], - "trackerParams": [ - "igshid", - "igsh" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?instagram\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?imgsrc\\.ru" - }, - { - "exceptions": [], - "trackerParams": [ - "h" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?boredpanda\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?awstrack\\.me" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?exactag\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "dbkanal_[0-9]{3}" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bahn\\.de" - }, - { - "exceptions": [], - "trackerParams": [ - "cuid" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?disq\\.us" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?anonym\\.to" - }, - { - "exceptions": [], - "trackerParams": [ - "cm_lm", - "cm_mmc", - "webUserId", - "spMailingID", - "spUserID", - "spJobID", - "spReportId" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?moosejaw\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "int_campaign" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?80000hours\\.org" - }, - { - "exceptions": [], - "trackerParams": [ - "si" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?spotify\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "lr", - "redircnt" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?(?:yandex(?:\\.[a-z]{2,}){1,}|ya\\.ru)" - }, - { - "exceptions": [], - "trackerParams": [ - "ecp", - "m_bt" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?healio\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "iref" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?zoho\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "sc_referrer", - "sc_ua" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?snapchat\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "source" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?medium\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "source" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?swp\\.de" - }, - { - "exceptions": [], - "trackerParams": [ - "from" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?wps\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "entrypoint", - "form_type" - ], - "urlPattern": "^https?:\\/\\/(?:accounts\\.)?firefox\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "as" - ], - "urlPattern": "^https?:\\/\\/(?:support\\.)?mozilla\\.org" - }, - { - "exceptions": [], - "trackerParams": [ - "test" - ], - "urlPattern": "^https?:\\/\\/kevinroebert\\.gitlab\\.io\\/ClearUrls\\/void\\/index\\.html" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/kevinroebert\\.gitlab\\.io\\/ClearUrls\\/void\\/block\\.svg" - }, - { - "exceptions": [], - "trackerParams": [ - "test" - ], - "urlPattern": "^https?:\\/\\/test\\.clearurls\\.xyz\\/void\\/index\\.html" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/test\\.clearurls\\.xyz\\/void\\/block\\.svg" - }, - { - "exceptions": [], - "trackerParams": [ - "from", - "xtor", - "xt_at" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?diepresse\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "x" - ], - "urlPattern": "^https?:\\/\\/newsletter\\.lidl(?:\\.[a-z]{2,}){1,}" - }, - { - "exceptions": [], - "trackerParams": [ - "reco_id", - "sid" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?allegro\\.pl" - }, - { - "exceptions": [], - "trackerParams": [ - "CMP_SKU", - "MER", - "mr:trackingCode", - "mr:device", - "mr:adType", - "iv_", - "CMP_ID", - "k_clickid", - "rmatt", - "INT_ID", - "ti", - "fl" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?backcountry\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "rv", - "_xtd" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?meetup\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "app", - "ign-itsc[a-z]+" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?apple\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?alabout\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "source", - "bxid", - "cndid", - "esrc", - "mbid" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?newyorker\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "track_click", - "link_id" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?gog\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tradedoubler\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "CMP" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?theguardian\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?srvtrck\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/mysku\\.ru" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?admitad\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "price", - "sourceType", - "suid", - "ut_sk", - "un", - "share_crt_v", - "sp_tk", - "cpp", - "shareurl", - "short_name", - "app", - "scm[_a-z-]*", - "pvid", - "algo_expid", - "algo_pvid", - "ns", - "abbucket", - "ali_refid", - "ali_trackid", - "acm", - "utparam", - "pos", - "abtest", - "trackInfo", - "utkn", - "scene", - "mytmenu", - "turing_bucket", - "lygClk", - "impid", - "bftTag", - "bftRwd", - "spm", - "_u" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?taobao\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "price", - "sourceType", - "suid", - "ut_sk", - "un", - "share_crt_v", - "sp_tk", - "cpp", - "shareurl", - "short_name", - "app", - "scm[_a-z-]*", - "pvid", - "algo_expid", - "algo_pvid", - "ns", - "abbucket", - "ali_refid", - "ali_trackid", - "acm", - "utparam", - "pos", - "abtest", - "trackInfo", - "user_number_id", - "utkn", - "scene", - "mytmenu", - "turing_bucket", - "lygClk", - "impid", - "bftTag", - "bftRwd", - "activity_id" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tmall\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "sm" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tb\\.cn" - }, - { - "exceptions": [ - "^https?:\\/\\/api\\.bilibili\\.com", - "^https?:\\/\\/space\\.bilibili\\.com" - ], - "trackerParams": [ - "callback", - "spm_id_from", - "from_source", - "from", - "seid", - "mid", - "share_source", - "msource", - "refer_from", - "share_from", - "share_medium", - "share_source", - "share_plat", - "share_tag", - "share_session_id", - "timestamp", - "unique_k", - "vd_source", - "plat_id", - "buvid", - "is_story_h5", - "up_id" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bilibili\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "spm_id_from" - ], - "urlPattern": "^https?:\\/\\/space\\.bilibili\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "bbid", - "ts" - ], - "urlPattern": "^https?:\\/\\/m\\.bilibili\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "visit_id", - "session_id", - "broadcast_type", - "is_room_feed" - ], - "urlPattern": "^https?:\\/\\/live\\.bilibili\\.com" - }, - { - "exceptions": [ - "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?marketscreener\\.com\\/search\\/\\?" - ], - "trackerParams": [ - "type_recherche", - "mots", - "noredirect", - "RewriteLast", - "lien", - "aComposeInputSearch", - "type_recherche_forum", - "add_mots", - "countview" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?marketscreener\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "type_recherche", - "noredirect", - "RewriteLast", - "lien", - "aComposeInputSearch", - "type_recherche_forum", - "countview" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?marketscreener\\.com\\/search\\/\\?" - }, - { - "exceptions": [], - "trackerParams": [ - "irclickid", - "irgwc", - "loc", - "acampID", - "mpid", - "intl" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bestbuy\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?digidip\\.net" - }, - { - "exceptions": [], - "trackerParams": [ - "u_code", - "preview_pb", - "_d", - "_t", - "_r", - "timestamp", - "user_id", - "share_app_name", - "share_iid", - "source" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tiktok\\.com" - }, - { - "exceptions": [ - "^https?:\\/\\/edith\\.xiaohongshu\\.com\\/api\\/sns\\/web\\/v1\\/user\\/hover_card" - ], - "trackerParams": [ - "xhsshare", - "author_share", - "type", - "xsec_source", - "share_from_user_hidden", - "app_version", - "ignoreEngage", - "app_platform", - "apptime", - "appuid", - "shareRedId", - "share_id", - "exSource", - "verifyUuid", - "verifyType", - "verifyBiz" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?xiaohongshu\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "idprob", - "hash", - "sending_id", - "site_id", - "dr_tracker" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?autoplus\\.fr" - }, - { - "exceptions": [], - "trackerParams": [ - "pc", - "npc", - "npv[0-9]+", - "npi" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bigfishgames\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?dpbolvw\\.net" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?humblebundle\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "actId", - "actCampaignType", - "actSource" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?cafepedagogique\\.net" - }, - { - "exceptions": [], - "trackerParams": [ - "tl_[a-z_]+" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bloculus\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mailpanion\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?signtr\\.website" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mailtrack\\.io" - }, - { - "exceptions": [], - "trackerParams": [ - "rtoken" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?zillow\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "ex", - "identityID", - "MID", - "RID" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?realtor\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "riftinfo" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?redfin\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "epic_affiliate", - "epic_gameId" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?epicgames\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "srcc", - "utm_v", - "utm_medium", - "utm_source" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?onet\\.pl" - }, - { - "exceptions": [], - "trackerParams": [ - "internalSource", - "referringId", - "referringContentType", - "clickId" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?allrecipes\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "xtor" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?europe1\\.fr" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?effiliation\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "istCompanyId", - "istFeedId", - "istItemId", - "istBid", - "clickOrigin" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?argos\\.co\\.uk" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?hlserve\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "src" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?thunderbird\\.net" - }, - { - "exceptions": [], - "trackerParams": [ - "__source" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?cnbc\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "refPageId" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?roblox\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "_returnURL" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?cell\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "redirectedFrom" - ], - "urlPattern": "^https?:\\/\\/academic\\.oup\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?flexlinkspro\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "source" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?agata88\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "share" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?hs\\.fi" - }, - { - "exceptions": [], - "trackerParams": [ - "origin" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?yle\\.fi" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/refer\\.ccbill\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "otracker.?", - "ssid", - "[cilp]id", - "marketplace", - "store", - "srno", - "store", - "ppn", - "ppt", - "fm", - "collection-tab-name", - "sattr\\[\\]", - "p\\[\\]", - "st" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?flipkart\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "sid", - "src", - "siteId", - "lcb", - "leadOutUrl", - "offerListId", - "osId", - "cancelUrl", - "disc" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?idealo\\.de" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?idealo-partner\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "internal" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?teletrader\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?webgains\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "ecid", - "_hsmi", - "_hsenc" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?deeplearning\\.ai" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?getpocket\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "PostType", - "ServiceType", - "ftag", - "UniqueID", - "TheTime" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?gamespot\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "src", - "trkid", - "whid" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tokopedia\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "ddw", - "ds_ch" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?wkorea\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "source", - "medium", - "content" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?eonline\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "taid" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?reuters\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/app\\.adjust\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "source_location", - "psf_variant", - "share_intent" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?change\\.org" - }, - { - "exceptions": [], - "trackerParams": [ - "tag" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?ceneo\\.pl" - }, - { - "exceptions": [], - "trackerParams": [ - "intcid" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?wired\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "pid", - "uid", - "tag", - "release", - "environment", - "sample", - "behavior", - "enableSPA", - "enableLinkTrace", - "page", - "begin", - "c2", - "c3", - "success", - "code", - "msg", - "api", - "traceId", - "pv_id", - "flag", - "sr", - "vp", - "ct", - "_v", - "sampling", - "dl", - "post_res" - ], - "urlPattern": "^https?:\\/\\/arms-retcode\\.aliyuncs\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "adid", - "i_cid", - "n_cid", - "waad" - ], - "urlPattern": "^https?://(?:[a-z0-9-]+\\.)*?nikkei\\.co(?:m|\\.jp)" - }, - { - "exceptions": [], - "trackerParams": [ - "weibo_id", - "dt_dapp" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?weibo\\.(cn|com)" - }, - { - "exceptions": [], - "trackerParams": [ - "context_referrer", - "source", - "ref_ctx_id", - "funnel" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?fiverr\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "click_key", - "click_sum", - "organic_search_click" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?etsy\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "itm_campaign", - "itm_medium", - "itm_source", - "itm_term" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?magento\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "dop_ab_variant", - "dop_source_zone_name", - "dop_req_id", - "dop_id", - "source", - "seq_no" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?novinky\\.cz" - }, - { - "exceptions": [], - "trackerParams": [ - "dop_ab_variant", - "dop_source_zone_name", - "dop_req_id", - "dop_id" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?aktualne\\.cz" - }, - { - "exceptions": [], - "trackerParams": [ - "dop_ab_variant", - "dop_source_zone_name", - "dop_req_id", - "dop_id", - "source", - "seq_no" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?seznamzpravy\\.cz" - }, - { - "exceptions": [], - "trackerParams": [ - "log", - "p" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?billiger\\.de" - }, - { - "exceptions": [], - "trackerParams": [ - "sznclid", - "dop_ab_variant", - "dop_source_zone_name", - "dop_req_id", - "dop_id" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?respekt\\.cz" - }, - { - "exceptions": [], - "trackerParams": [ - "sznclid", - "dop_ab_variant", - "dop_source_zone_name", - "dop_req_id", - "dop_id" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?faei\\.cz" - }, - { - "exceptions": [], - "trackerParams": [ - "sznclid", - "dop_ab_variant", - "dop_source_zone_name", - "dop_req_id", - "dop_id" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?iprima\\.cz" - }, - { - "exceptions": [], - "trackerParams": [ - "sznclid", - "dop_ab_variant", - "dop_source_zone_name", - "dop_req_id", - "dop_id" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?nova\\.cz" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?duckduckgo\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "DEAL_ID", - "L", - "S", - "T", - "V", - "pdp_filters", - "position", - "search_layout", - "tracking_id", - "type", - "c_[_a-zA-Z]+", - "me\\.[_a-zA-Z]+", - "reco_[_a-zA-Z]+" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mercadolibre\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "funnelUUID" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?quizlet\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "xtor", - "at_[a-z_]+" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bbc\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "federated_search_id", - "search_type", - "source", - "source_impression_id" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?airbnb\\.(com|ae|ca|co\\.in|co\\.nz|co\\.uk|co\\.za|com\\.au|com\\.mt|com\\.sg|de|gy|ie)" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?partner-ads\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "refer_method" - ], - "urlPattern": "^https?://(?:[a-z0-9-]+\\.)*?kahoot\\.it" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?href\\.li" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?adform\\.net" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?artefact\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?awin1\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?telekom\\.de" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?loginfra\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?umblr\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "from_search", - "from_srp", - "qid", - "rank", - "ac" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?goodreads\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "pvid", - "scm" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?sohu\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "publish_id", - "sp_atk", - "xptdk" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?shopee\\.(com|co\\.th)" - }, - { - "exceptions": [], - "trackerParams": [ - "clickTrackInfo", - "abid", - "pvid", - "ad_src", - "spm", - "src", - "from", - "scm", - "pa", - "pid_pvid", - "did", - "mp", - "cid", - "impsrc", - "pos" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?lazada\\.(com|co\\.th|co\\.id|com\\.my|com\\.ph|sg|vn)" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?pantip\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?skimresources\\.com" - }, - { - "exceptions": [], - "trackerParams": [ - "sPartner", - "campaign" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?office-partner\\.de" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mozgcp\\.net" - }, - { - "exceptions": [], - "trackerParams": [ - "shareToken" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?thetimes\\.co\\.uk" - }, - { - "exceptions": [], - "trackerParams": [ - "ito" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?metro\\.co\\.uk" - }, - { - "exceptions": [], - "trackerParams": [ - "sh" - ], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?forbes\\.com" - }, - { - "exceptions": [], - "trackerParams": [], - "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?viglink\\.com" - } -] \ No newline at end of file diff --git a/searx/data/tracker_patterns.py b/searx/data/tracker_patterns.py new file mode 100644 index 000000000..f269b8395 --- /dev/null +++ b/searx/data/tracker_patterns.py @@ -0,0 +1,142 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Simple implementation to store TrackerPatterns data in a SQL database.""" + +from __future__ import annotations +import typing + +__all__ = ["TrackerPatternsDB"] + +import re +import pathlib +from collections.abc import Iterator +from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode + +import httpx + +from searx.data.core import get_cache, log + +RuleType = tuple[str, list[str], list[str]] + + +class TrackerPatternsDB: + # pylint: disable=missing-class-docstring + + ctx_name = "data_tracker_patterns" + json_file = pathlib.Path(__file__).parent / "tracker_patterns.json" + + CLEAR_LIST_URL = [ + # ClearURL rule lists, the first one that responds HTTP 200 is used + "https://rules1.clearurls.xyz/data.minify.json", + "https://rules2.clearurls.xyz/data.minify.json", + "https://raw.githubusercontent.com/ClearURLs/Rules/refs/heads/master/data.min.json", + ] + + class Fields: + # pylint: disable=too-few-public-methods, invalid-name + url_regexp: typing.Final = 0 # URL (regular expression) match condition of the link + url_ignore: typing.Final = 1 # URL (regular expression) to ignore + del_args: typing.Final = 2 # list of URL arguments (regular expression) to delete + + def __init__(self): + self.cache = get_cache() + + def init(self): + if self.cache.properties("tracker_patterns loaded") != "OK": + self.load() + self.cache.properties.set("tracker_patterns loaded", "OK") + # F I X M E: + # do we need a maintenance .. rember: database is stored + # in /tmp and will be rebuild during the reboot anyway + + def load(self): + log.debug("init searx.data.TRACKER_PATTERNS") + for rule in self.iter_clear_list(): + self.add(rule) + + def add(self, rule: RuleType): + self.cache.set( + key=rule[self.Fields.url_regexp], + value=( + rule[self.Fields.url_ignore], + rule[self.Fields.del_args], + ), + ctx=self.ctx_name, + expire=None, + ) + + def rules(self) -> Iterator[RuleType]: + self.init() + for key, value in self.cache.pairs(ctx=self.ctx_name): + yield key, value[0], value[1] + + def iter_clear_list(self) -> Iterator[RuleType]: + resp = None + for url in self.CLEAR_LIST_URL: + resp = httpx.get(url, timeout=3) + if resp.status_code == 200: + break + log.warning(f"TRACKER_PATTERNS: ClearURL ignore HTTP {resp.status_code} {url}") + + if resp is None: + log.error("TRACKER_PATTERNS: failed fetching ClearURL rule lists") + return + + for rule in resp.json()["providers"].values(): + yield ( + rule["urlPattern"].replace("\\\\", "\\"), # fix javascript regex syntax + [exc.replace("\\\\", "\\") for exc in rule.get("exceptions", [])], + rule.get("rules", []), + ) + + def clean_url(self, url: str) -> bool | str: + """The URL arguments are normalized and cleaned of tracker parameters. + + Returns bool ``True`` to use URL unchanged (``False`` to ignore URL). + If URL should be modified, the returned string is the new URL to use. + """ + + new_url = url + parsed_new_url = urlparse(url=new_url) + + for rule in self.rules(): + + if not re.match(rule[self.Fields.url_regexp], new_url): + # no match / ignore pattern + continue + + do_ignore = False + for pattern in rule[self.Fields.url_ignore]: + if re.match(pattern, new_url): + do_ignore = True + break + + if do_ignore: + # pattern is in the list of exceptions / ignore pattern + # HINT: + # we can't break the outer pattern loop since we have + # overlapping urlPattern like ".*" + continue + + # remove tracker arguments from the url-query part + query_args: list[tuple[str, str]] = list(parse_qsl(parsed_new_url.query)) + + for name, val in query_args.copy(): + # remove URL arguments + for pattern in rule[self.Fields.del_args]: + if re.match(pattern, name): + log.debug("TRACKER_PATTERNS: %s remove tracker arg: %s='%s'", parsed_new_url.netloc, name, val) + query_args.remove((name, val)) + + parsed_new_url = parsed_new_url._replace(query=urlencode(query_args)) + new_url = urlunparse(parsed_new_url) + + if new_url != url: + return new_url + + return True + + +if __name__ == "__main__": + db = TrackerPatternsDB() + for r in db.rules(): + print(r) diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index efc593775..b7e8e25f3 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -2,17 +2,15 @@ # pylint: disable=missing-module-docstring, unused-argument from __future__ import annotations -import typing -import re -from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode +import logging +import typing from flask_babel import gettext from searx.data import TRACKER_PATTERNS from . import Plugin, PluginInfo -from ._core import log if typing.TYPE_CHECKING: from searx.search import SearchWithPlugins @@ -21,13 +19,16 @@ if typing.TYPE_CHECKING: from searx.plugins import PluginCfg +log = logging.getLogger("searx.plugins.tracker_url_remover") + + class SXNGPlugin(Plugin): """Remove trackers arguments from the returned URL.""" id = "tracker_url_remover" - log = log.getChild(id) def __init__(self, plg_cfg: "PluginCfg") -> None: + super().__init__(plg_cfg) self.info = PluginInfo( id=self.id, @@ -47,42 +48,7 @@ class SXNGPlugin(Plugin): If URL should be modified, the returned string is the new URL to use.""" if not url_src: - cls.log.debug("missing a URL in field %s", field_name) + log.debug("missing a URL in field %s", field_name) return True - new_url = url_src - parsed_new_url = urlparse(url=new_url) - - for rule in TRACKER_PATTERNS: - - if not re.match(rule["urlPattern"], new_url): - # no match / ignore pattern - continue - - in_exceptions = False - for exception in rule["exceptions"]: - if re.match(exception, new_url): - in_exceptions = True - break - if in_exceptions: - # pattern is in the list of exceptions / ignore pattern - # hint: we can't break the outer pattern loop since we have - # overlapping urlPattern like ".*" - continue - - # remove tracker arguments from the url-query part - query_args: list[tuple[str, str]] = list(parse_qsl(parsed_new_url.query)) - - for name, val in query_args.copy(): - for reg in rule["trackerParams"]: - if re.match(reg, name): - cls.log.debug("%s remove tracker arg: %s='%s'", parsed_new_url.netloc, name, val) - query_args.remove((name, val)) - - parsed_new_url = parsed_new_url._replace(query=urlencode(query_args)) - new_url = urlunparse(parsed_new_url) - - if new_url != url_src: - return new_url - - return True + return TRACKER_PATTERNS.clean_url(url=url_src) diff --git a/searxng_extra/update/update_tracker_patterns.py b/searxng_extra/update/update_tracker_patterns.py deleted file mode 100644 index f8928d354..000000000 --- a/searxng_extra/update/update_tracker_patterns.py +++ /dev/null @@ -1,36 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -"""Fetch trackers""" - -import json -import httpx - -from searx.data import data_dir - -DATA_FILE = data_dir / "tracker_patterns.json" -CLEAR_LIST_URL = "https://raw.githubusercontent.com/ClearURLs/Rules/refs/heads/master/data.min.json" - - -def fetch_clear_url_filters(): - resp = httpx.get(CLEAR_LIST_URL) - if resp.status_code != 200: - # pylint: disable=broad-exception-raised - raise Exception(f"Error fetching ClearURL filter lists, HTTP code {resp.status_code}") - - providers = resp.json()["providers"] - rules = [] - for rule in providers.values(): - rules.append( - { - "urlPattern": rule["urlPattern"].replace("\\\\", "\\"), # fix javascript regex syntax - "exceptions": [exc.replace("\\\\", "\\") for exc in rule["exceptions"]], - "trackerParams": rule["rules"], - } - ) - - return rules - - -if __name__ == '__main__': - filter_list = fetch_clear_url_filters() - with DATA_FILE.open("w", encoding='utf-8') as f: - json.dump(filter_list, f, indent=4, sort_keys=True, ensure_ascii=False)