Add ClearURLs functionality.
Fix Proxy functionality (remove www).
This commit is contained in:
parent
4d03d6e16e
commit
8fbe97e357
5 changed files with 382 additions and 51 deletions
235
assets/queries.yaml
Normal file
235
assets/queries.yaml
Normal file
|
@ -0,0 +1,235 @@
|
||||||
|
# The purpose of this list is to remove queries that
|
||||||
|
# are mostly utilized as trackers for advertising.
|
||||||
|
|
||||||
|
trackers:
|
||||||
|
- ad
|
||||||
|
- ad_medium
|
||||||
|
- ad_name
|
||||||
|
- ad_pvid
|
||||||
|
- ad_sub
|
||||||
|
# ad_tags
|
||||||
|
- advertising-id
|
||||||
|
# aem_p4p_detail
|
||||||
|
- af
|
||||||
|
- aff
|
||||||
|
- aff_fcid
|
||||||
|
- aff_fsk
|
||||||
|
- aff_platform
|
||||||
|
- aff_trace_key
|
||||||
|
- affparams
|
||||||
|
- afSmartRedirect
|
||||||
|
- afftrack
|
||||||
|
- affparams
|
||||||
|
# aid
|
||||||
|
- algo_exp_id
|
||||||
|
- algo_pvid
|
||||||
|
- ar
|
||||||
|
# ascsubtag
|
||||||
|
# asc_contentid
|
||||||
|
- asgtbndr
|
||||||
|
- atc
|
||||||
|
- ats
|
||||||
|
- autostart
|
||||||
|
# b64e# breaks yandex
|
||||||
|
- bizType
|
||||||
|
# block
|
||||||
|
- bta
|
||||||
|
- businessType
|
||||||
|
- campaign
|
||||||
|
- campaignId
|
||||||
|
# __cf_chl_rt_tk
|
||||||
|
# cid# breaks sacred magick
|
||||||
|
- ck
|
||||||
|
# clickid
|
||||||
|
# client_id
|
||||||
|
# cm_ven
|
||||||
|
# cmd
|
||||||
|
- content-id
|
||||||
|
- crid
|
||||||
|
- cst
|
||||||
|
- cts
|
||||||
|
- curPageLogUid
|
||||||
|
# data# breaks yandex
|
||||||
|
# dchild
|
||||||
|
# dclid
|
||||||
|
- deals-widget
|
||||||
|
- dgcid
|
||||||
|
- dicbo
|
||||||
|
# dt
|
||||||
|
- edd
|
||||||
|
- edm_click_module
|
||||||
|
# ei
|
||||||
|
# embed
|
||||||
|
# _encoding
|
||||||
|
# etext# breaks yandex
|
||||||
|
- eventSource
|
||||||
|
- fbclid
|
||||||
|
- feature
|
||||||
|
- field-lbr_brands_browse-bin
|
||||||
|
- forced_click
|
||||||
|
# fr
|
||||||
|
- frs
|
||||||
|
# from# breaks yandex
|
||||||
|
- _ga
|
||||||
|
- ga_order
|
||||||
|
- ga_search_query
|
||||||
|
- ga_search_type
|
||||||
|
- ga_view_type
|
||||||
|
- gatewayAdapt
|
||||||
|
# gclid
|
||||||
|
# gclsrc
|
||||||
|
- gh_jid
|
||||||
|
- gps-id
|
||||||
|
# gs_lcp
|
||||||
|
- gt
|
||||||
|
- guccounter
|
||||||
|
- hdtime
|
||||||
|
- hosted_button_id
|
||||||
|
- ICID
|
||||||
|
- ico
|
||||||
|
- ig_rid
|
||||||
|
# idzone
|
||||||
|
# iflsig
|
||||||
|
- intcmp
|
||||||
|
- irclickid
|
||||||
|
# irgwc
|
||||||
|
# irpid
|
||||||
|
- is_from_webapp
|
||||||
|
- itid
|
||||||
|
# itok
|
||||||
|
# katds_labels
|
||||||
|
# keywords
|
||||||
|
- keyno
|
||||||
|
- l10n
|
||||||
|
- linkCode
|
||||||
|
- mc
|
||||||
|
- mid
|
||||||
|
- __mk_de_DE
|
||||||
|
- mp
|
||||||
|
- nats
|
||||||
|
- nci
|
||||||
|
- obOrigUrl
|
||||||
|
- offer_id
|
||||||
|
- optout
|
||||||
|
- oq
|
||||||
|
- organic_search_click
|
||||||
|
- pa
|
||||||
|
- Partner
|
||||||
|
- partner
|
||||||
|
- partner_id
|
||||||
|
- partner_ID
|
||||||
|
- pcampaignid
|
||||||
|
- pd_rd_i
|
||||||
|
- pd_rd_r
|
||||||
|
- pd_rd_w
|
||||||
|
- pd_rd_wg
|
||||||
|
- pdp_npi
|
||||||
|
- pf_rd_i
|
||||||
|
- pf_rd_m
|
||||||
|
- pf_rd_p
|
||||||
|
- pf_rd_r
|
||||||
|
- pf_rd_s
|
||||||
|
- pf_rd_t
|
||||||
|
- pg
|
||||||
|
- PHPSESSID
|
||||||
|
- pk_campaign
|
||||||
|
- pdp_ext_f
|
||||||
|
- pkey
|
||||||
|
- platform
|
||||||
|
- plkey
|
||||||
|
- pqr
|
||||||
|
- pr
|
||||||
|
- pro
|
||||||
|
- prod
|
||||||
|
- prom
|
||||||
|
- promo
|
||||||
|
- promocode
|
||||||
|
- promoid
|
||||||
|
- psc
|
||||||
|
- psprogram
|
||||||
|
- pvid
|
||||||
|
- qid
|
||||||
|
# r
|
||||||
|
- realDomain
|
||||||
|
- recruiter_id
|
||||||
|
- redirect
|
||||||
|
- ref
|
||||||
|
- ref_
|
||||||
|
- ref_src
|
||||||
|
- refcode
|
||||||
|
- referrer
|
||||||
|
- refinements
|
||||||
|
- reftag
|
||||||
|
- rf
|
||||||
|
- rnid
|
||||||
|
- rowan_id1
|
||||||
|
- rowan_msg_id
|
||||||
|
# rss
|
||||||
|
# sCh
|
||||||
|
- sclient
|
||||||
|
- scm
|
||||||
|
- scm_id
|
||||||
|
- scm-url
|
||||||
|
# sd
|
||||||
|
- sender_device
|
||||||
|
- sh
|
||||||
|
- shareId
|
||||||
|
- showVariations
|
||||||
|
- si
|
||||||
|
# sid# breaks whatsup.org.il
|
||||||
|
- ___SID
|
||||||
|
# site_id
|
||||||
|
- sk
|
||||||
|
- smid
|
||||||
|
- social_params
|
||||||
|
- source
|
||||||
|
- sourceId
|
||||||
|
- sp_csd
|
||||||
|
- spLa
|
||||||
|
- spm
|
||||||
|
- spreadType
|
||||||
|
# sprefix
|
||||||
|
- sr
|
||||||
|
- src
|
||||||
|
- _src
|
||||||
|
- src_cmp
|
||||||
|
- src_player
|
||||||
|
- src_src
|
||||||
|
- srcSns
|
||||||
|
- su
|
||||||
|
# sxin_0_pb
|
||||||
|
- _t
|
||||||
|
# tag
|
||||||
|
- tcampaign
|
||||||
|
- td
|
||||||
|
- terminal_id
|
||||||
|
# text
|
||||||
|
- th# Sometimes restored after page load
|
||||||
|
# title
|
||||||
|
- tracelog
|
||||||
|
- traffic_id
|
||||||
|
- traffic_source
|
||||||
|
- traffic_type
|
||||||
|
- tt
|
||||||
|
- uact
|
||||||
|
- ug_edm_item_id
|
||||||
|
- utm
|
||||||
|
# utm1
|
||||||
|
# utm2
|
||||||
|
# utm3
|
||||||
|
# utm4
|
||||||
|
# utm5
|
||||||
|
# utm6
|
||||||
|
# utm7
|
||||||
|
# utm8
|
||||||
|
# utm9
|
||||||
|
- utm_campaign
|
||||||
|
- utm_content
|
||||||
|
- utm_medium
|
||||||
|
- utm_source
|
||||||
|
- utm_term
|
||||||
|
- uuid
|
||||||
|
# utype
|
||||||
|
# ve
|
||||||
|
# ved
|
||||||
|
# zone'
|
|
@ -14,6 +14,10 @@ TODO
|
||||||
|
|
||||||
2) Check also for HTML, not only feed.bozo.
|
2) Check also for HTML, not only feed.bozo.
|
||||||
|
|
||||||
|
3) Add "if is_feed(url, feed)" to view_entry and view_feed
|
||||||
|
|
||||||
|
4) Refactor view_entry and view_feed - Why "if" twice?
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from aiohttp import ClientError, ClientSession, ClientTimeout
|
from aiohttp import ClientError, ClientSession, ClientTimeout
|
||||||
|
@ -193,7 +197,6 @@ async def download_updates(db_file, url=None):
|
||||||
read_status = 1
|
read_status = 1
|
||||||
entry = (
|
entry = (
|
||||||
title,
|
title,
|
||||||
summary,
|
|
||||||
link,
|
link,
|
||||||
eid,
|
eid,
|
||||||
source,
|
source,
|
||||||
|
@ -446,18 +449,7 @@ async def add_feed(db_file, url):
|
||||||
if res[0]:
|
if res[0]:
|
||||||
feed = parse(res[0])
|
feed = parse(res[0])
|
||||||
title = get_title(url, feed)
|
title = get_title(url, feed)
|
||||||
if not feed.entries:
|
if is_feed(url, feed):
|
||||||
try:
|
|
||||||
feed["feed"]["title"]
|
|
||||||
except:
|
|
||||||
msg = await probe_page(add_feed, url, res[0], db_file=db_file)
|
|
||||||
elif feed.bozo:
|
|
||||||
bozo = (
|
|
||||||
"Bozo detected. Failed to load: {}"
|
|
||||||
).format(url)
|
|
||||||
print(bozo)
|
|
||||||
msg = await probe_page(add_feed, url, res[0], db_file=db_file)
|
|
||||||
else:
|
|
||||||
status = res[1]
|
status = res[1]
|
||||||
msg = await sqlite.insert_feed(
|
msg = await sqlite.insert_feed(
|
||||||
db_file,
|
db_file,
|
||||||
|
@ -466,6 +458,13 @@ async def add_feed(db_file, url):
|
||||||
status
|
status
|
||||||
)
|
)
|
||||||
await download_updates(db_file, [url])
|
await download_updates(db_file, [url])
|
||||||
|
else:
|
||||||
|
msg = await probe_page(
|
||||||
|
add_feed,
|
||||||
|
url,
|
||||||
|
res[0],
|
||||||
|
db_file=db_file
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
status = res[1]
|
status = res[1]
|
||||||
msg = (
|
msg = (
|
||||||
|
@ -673,7 +672,7 @@ async def feed_mode_request(url, tree):
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
if len(feeds) > 1:
|
if len(feeds) > 1:
|
||||||
positive = 0
|
counter = 0
|
||||||
msg = (
|
msg = (
|
||||||
"RSS URL discovery has found {} feeds:\n```\n"
|
"RSS URL discovery has found {} feeds:\n```\n"
|
||||||
).format(len(feeds))
|
).format(len(feeds))
|
||||||
|
@ -689,7 +688,13 @@ async def feed_mode_request(url, tree):
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
if feed_amnt:
|
if feed_amnt:
|
||||||
positive = 1
|
# NOTE Because there could be many false positives
|
||||||
|
# which are revealed in second phase of scan, we
|
||||||
|
# could end with a single feed, which would be
|
||||||
|
# listed instead of fetched, so feed_mark is
|
||||||
|
# utilized in order to make fetch possible.
|
||||||
|
feed_mark = [feed_addr]
|
||||||
|
counter += 1
|
||||||
msg += (
|
msg += (
|
||||||
"Title: {}\n"
|
"Title: {}\n"
|
||||||
"Link : {}\n"
|
"Link : {}\n"
|
||||||
|
@ -700,10 +705,13 @@ async def feed_mode_request(url, tree):
|
||||||
feed_addr,
|
feed_addr,
|
||||||
feed_amnt
|
feed_amnt
|
||||||
)
|
)
|
||||||
msg += (
|
if counter > 1:
|
||||||
"```\nThe above feeds were extracted from\n{}"
|
msg += (
|
||||||
).format(url)
|
"```\nThe above feeds were extracted from\n{}"
|
||||||
if not positive:
|
).format(url)
|
||||||
|
elif feed_mark:
|
||||||
|
return feed_mark
|
||||||
|
else:
|
||||||
msg = (
|
msg = (
|
||||||
"No feeds were found for {}"
|
"No feeds were found for {}"
|
||||||
).format(url)
|
).format(url)
|
||||||
|
@ -887,3 +895,41 @@ async def feed_mode_auto_discovery(url, tree):
|
||||||
elif feeds:
|
elif feeds:
|
||||||
feed_addr = join_url(url, feeds[0].xpath('@href')[0])
|
feed_addr = join_url(url, feeds[0].xpath('@href')[0])
|
||||||
return [feed_addr]
|
return [feed_addr]
|
||||||
|
|
||||||
|
|
||||||
|
def is_feed(url, feed):
|
||||||
|
"""
|
||||||
|
Determine whether document is feed or not.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
feed : dict
|
||||||
|
Parsed feed.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
val : boolean
|
||||||
|
True or False.
|
||||||
|
"""
|
||||||
|
if not feed.entries:
|
||||||
|
try:
|
||||||
|
feed["feed"]["title"]
|
||||||
|
except:
|
||||||
|
val = False
|
||||||
|
msg = (
|
||||||
|
"No entries nor title for {}"
|
||||||
|
).format(url)
|
||||||
|
elif feed.bozo:
|
||||||
|
val = False
|
||||||
|
msg = (
|
||||||
|
"Bozo detected for {}"
|
||||||
|
).format(url)
|
||||||
|
else:
|
||||||
|
val = True
|
||||||
|
msg = (
|
||||||
|
"Good feed for {}"
|
||||||
|
).format(url)
|
||||||
|
print(msg)
|
||||||
|
return val
|
||||||
|
|
|
@ -24,6 +24,7 @@ import confighandler as config
|
||||||
import datahandler as datahandler
|
import datahandler as datahandler
|
||||||
from datetimehandler import current_time, rfc2822_to_iso8601
|
from datetimehandler import current_time, rfc2822_to_iso8601
|
||||||
from sqlite3 import connect, Error
|
from sqlite3 import connect, Error
|
||||||
|
from urlhandler import remove_tracking_parameters
|
||||||
|
|
||||||
# from eliot import start_action, to_file
|
# from eliot import start_action, to_file
|
||||||
# # with start_action(action_type="list_feeds()", db=db_file):
|
# # with start_action(action_type="list_feeds()", db=db_file):
|
||||||
|
@ -88,7 +89,6 @@ def create_tables(db_file):
|
||||||
"CREATE TABLE IF NOT EXISTS entries ("
|
"CREATE TABLE IF NOT EXISTS entries ("
|
||||||
"id INTEGER PRIMARY KEY,"
|
"id INTEGER PRIMARY KEY,"
|
||||||
"title TEXT NOT NULL,"
|
"title TEXT NOT NULL,"
|
||||||
"summary TEXT NOT NULL,"
|
|
||||||
"link TEXT NOT NULL,"
|
"link TEXT NOT NULL,"
|
||||||
"entry_id TEXT,"
|
"entry_id TEXT,"
|
||||||
"source TEXT NOT NULL,"
|
"source TEXT NOT NULL,"
|
||||||
|
@ -100,7 +100,6 @@ def create_tables(db_file):
|
||||||
"CREATE TABLE IF NOT EXISTS archive ("
|
"CREATE TABLE IF NOT EXISTS archive ("
|
||||||
"id INTEGER PRIMARY KEY,"
|
"id INTEGER PRIMARY KEY,"
|
||||||
"title TEXT NOT NULL,"
|
"title TEXT NOT NULL,"
|
||||||
"summary TEXT NOT NULL,"
|
|
||||||
"link TEXT NOT NULL,"
|
"link TEXT NOT NULL,"
|
||||||
"entry_id TEXT,"
|
"entry_id TEXT,"
|
||||||
"source TEXT NOT NULL,"
|
"source TEXT NOT NULL,"
|
||||||
|
@ -434,11 +433,11 @@ async def get_entry_unread(db_file, num=None):
|
||||||
# "DESC LIMIT :num"
|
# "DESC LIMIT :num"
|
||||||
# )
|
# )
|
||||||
sql = (
|
sql = (
|
||||||
"SELECT id, title, summary, link, source, timestamp "
|
"SELECT id, title, link, source, timestamp "
|
||||||
"FROM entries "
|
"FROM entries "
|
||||||
"WHERE read = 0 "
|
"WHERE read = 0 "
|
||||||
"UNION ALL "
|
"UNION ALL "
|
||||||
"SELECT id, title, summary, link, source, timestamp "
|
"SELECT id, title, link, source, timestamp "
|
||||||
"FROM archive "
|
"FROM archive "
|
||||||
"ORDER BY timestamp "
|
"ORDER BY timestamp "
|
||||||
"DESC LIMIT :num"
|
"DESC LIMIT :num"
|
||||||
|
@ -469,41 +468,45 @@ async def get_entry_unread(db_file, num=None):
|
||||||
for result in results:
|
for result in results:
|
||||||
ix = result[0]
|
ix = result[0]
|
||||||
title = result[1]
|
title = result[1]
|
||||||
summary = result[2]
|
# # TODO Retrieve summary from feed
|
||||||
# Remove HTML tags
|
# # See datahandler.view_entry
|
||||||
try:
|
# summary = result[2]
|
||||||
summary = BeautifulSoup(summary, "lxml").text
|
# # Remove HTML tags
|
||||||
except:
|
# try:
|
||||||
print(result[2])
|
# summary = BeautifulSoup(summary, "lxml").text
|
||||||
# TODO Limit text length
|
# except:
|
||||||
summary = summary.replace("\n\n\n", "\n\n")
|
# print(result[2])
|
||||||
length = await get_settings_value(db_file, "length")
|
# breakpoint()
|
||||||
summary = summary[:length] + " […]"
|
# # TODO Limit text length
|
||||||
summary = summary.strip().split('\n')
|
# summary = summary.replace("\n\n\n", "\n\n")
|
||||||
summary = ["> " + line for line in summary]
|
# length = await get_settings_value(db_file, "length")
|
||||||
summary = "\n".join(summary)
|
# summary = summary[:length] + " […]"
|
||||||
link = result[3]
|
# summary = summary.strip().split('\n')
|
||||||
|
# summary = ["> " + line for line in summary]
|
||||||
|
# summary = "\n".join(summary)
|
||||||
|
link = result[2]
|
||||||
|
link = await remove_tracking_parameters(link)
|
||||||
sql = (
|
sql = (
|
||||||
"SELECT name "
|
"SELECT name "
|
||||||
"FROM feeds "
|
"FROM feeds "
|
||||||
"WHERE address = :source "
|
"WHERE address = :source "
|
||||||
)
|
)
|
||||||
source = result[4]
|
source = result[3]
|
||||||
feed = cur.execute(sql, (source,))
|
feed = cur.execute(sql, (source,))
|
||||||
feed = feed.fetchone()[0]
|
feed = feed.fetchone()[0]
|
||||||
if num > 1:
|
if num > 1:
|
||||||
news_list += (
|
news_list += (
|
||||||
"\n{}\n{}\n"
|
"\n{}\n{}\n{}\n"
|
||||||
).format(
|
).format(
|
||||||
str(title),
|
str(title),
|
||||||
str(link)
|
str(link),
|
||||||
|
str(feed)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
news_list = (
|
news_list = (
|
||||||
"{}\n\n{}\n\n{}\n{}"
|
"{}\n{}\n{}"
|
||||||
).format(
|
).format(
|
||||||
str(title),
|
str(title),
|
||||||
str(summary),
|
|
||||||
str(link),
|
str(link),
|
||||||
str(feed)
|
str(feed)
|
||||||
)
|
)
|
||||||
|
@ -532,7 +535,7 @@ async def mark_entry_as_read(cur, ix):
|
||||||
"""
|
"""
|
||||||
sql = (
|
sql = (
|
||||||
"UPDATE entries "
|
"UPDATE entries "
|
||||||
"SET summary = '', read = 1 "
|
"SET read = 1 "
|
||||||
"WHERE id = ?"
|
"WHERE id = ?"
|
||||||
)
|
)
|
||||||
cur.execute(sql, (ix,))
|
cur.execute(sql, (ix,))
|
||||||
|
@ -554,7 +557,7 @@ async def mark_source_as_read(db_file, source):
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
sql = (
|
sql = (
|
||||||
"UPDATE entries "
|
"UPDATE entries "
|
||||||
"SET summary = '', read = 1 "
|
"SET read = 1 "
|
||||||
"WHERE source = ?"
|
"WHERE source = ?"
|
||||||
)
|
)
|
||||||
cur.execute(sql, (source,))
|
cur.execute(sql, (source,))
|
||||||
|
@ -574,7 +577,7 @@ async def mark_all_as_read(db_file):
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
sql = (
|
sql = (
|
||||||
"UPDATE entries "
|
"UPDATE entries "
|
||||||
"SET summary = '', read = 1 "
|
"SET read = 1 "
|
||||||
)
|
)
|
||||||
cur.execute(sql)
|
cur.execute(sql)
|
||||||
sql = (
|
sql = (
|
||||||
|
@ -892,23 +895,23 @@ async def add_entry(cur, entry):
|
||||||
"INSERT "
|
"INSERT "
|
||||||
"INTO entries("
|
"INTO entries("
|
||||||
"title, "
|
"title, "
|
||||||
"summary, "
|
|
||||||
"link, "
|
"link, "
|
||||||
"entry_id, "
|
"entry_id, "
|
||||||
"source, "
|
"source, "
|
||||||
"timestamp, "
|
"timestamp, "
|
||||||
"read"
|
"read"
|
||||||
") "
|
") "
|
||||||
"VALUES(?, ?, ?, ?, ?, ?, ?)"
|
"VALUES(?, ?, ?, ?, ?, ?)"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
cur.execute(sql, entry)
|
cur.execute(sql, entry)
|
||||||
except:
|
except:
|
||||||
print(current_time(), "COROUTINE OBJECT NOW")
|
print(current_time(), "COROUTINE OBJECT NOW")
|
||||||
print(entry[6])
|
# for i in entry:
|
||||||
print(type(entry[6]))
|
# print(type(i))
|
||||||
|
# print(i)
|
||||||
|
# print(type(entry))
|
||||||
print(entry)
|
print(entry)
|
||||||
print(type(entry))
|
|
||||||
print(current_time(), "COROUTINE OBJECT NOW")
|
print(current_time(), "COROUTINE OBJECT NOW")
|
||||||
# breakpoint()
|
# breakpoint()
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,14 @@ TODO
|
||||||
from confighandler import get_list
|
from confighandler import get_list
|
||||||
from email.utils import parseaddr
|
from email.utils import parseaddr
|
||||||
import random
|
import random
|
||||||
from urllib.parse import urljoin, urlsplit, urlunsplit
|
from urllib.parse import (
|
||||||
|
parse_qs,
|
||||||
|
urlencode,
|
||||||
|
urljoin,
|
||||||
|
urlparse,
|
||||||
|
urlsplit,
|
||||||
|
urlunsplit
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# NOTE hostname and protocol are listed as one in file
|
# NOTE hostname and protocol are listed as one in file
|
||||||
|
@ -41,6 +48,7 @@ async def replace_hostname(url):
|
||||||
parted_url = urlsplit(url)
|
parted_url = urlsplit(url)
|
||||||
protocol = parted_url.scheme
|
protocol = parted_url.scheme
|
||||||
hostname = parted_url.netloc
|
hostname = parted_url.netloc
|
||||||
|
hostname = hostname.replace("www.","")
|
||||||
pathname = parted_url.path
|
pathname = parted_url.path
|
||||||
queries = parted_url.query
|
queries = parted_url.query
|
||||||
fragment = parted_url.fragment
|
fragment = parted_url.fragment
|
||||||
|
@ -62,6 +70,41 @@ async def replace_hostname(url):
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
async def remove_tracking_parameters(url):
|
||||||
|
"""
|
||||||
|
Remove queries with tracking parameters.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
"""
|
||||||
|
parted_url = urlsplit(url)
|
||||||
|
protocol = parted_url.scheme
|
||||||
|
hostname = parted_url.netloc
|
||||||
|
pathname = parted_url.path
|
||||||
|
queries = parse_qs(parted_url.query)
|
||||||
|
fragment = parted_url.fragment
|
||||||
|
trackers = await get_list("queries.yaml")
|
||||||
|
trackers = trackers["trackers"]
|
||||||
|
for tracker in trackers:
|
||||||
|
if tracker in queries: del queries[tracker]
|
||||||
|
queries_new = urlencode(queries, doseq=True)
|
||||||
|
url = urlunsplit([
|
||||||
|
protocol,
|
||||||
|
hostname,
|
||||||
|
pathname,
|
||||||
|
queries_new,
|
||||||
|
fragment
|
||||||
|
])
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
def feed_to_http(url):
|
def feed_to_http(url):
|
||||||
"""
|
"""
|
||||||
Replace scheme FEED by HTTP.
|
Replace scheme FEED by HTTP.
|
||||||
|
|
|
@ -3,6 +3,10 @@
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
1) Split into modules (e.g. slixfeed/xmpp/bookmarks.py)
|
||||||
|
|
||||||
FIXME
|
FIXME
|
||||||
|
|
||||||
1) Function check_readiness or event "changed_status" is causing for
|
1) Function check_readiness or event "changed_status" is causing for
|
||||||
|
|
Loading…
Reference in a new issue