From 57f429fff0233e4d97bc739538c86a1fbc2eb7ab Mon Sep 17 00:00:00 2001 From: Schimon Jehudah Date: Tue, 26 Dec 2023 11:22:45 +0000 Subject: [PATCH] Add proxy services. Merry Christmas to one and all! --- assets/proxies.yaml | 209 ++++++++++++++++++++++++++++++++++++++ slixfeed/confighandler.py | 13 +-- slixfeed/datahandler.py | 15 ++- slixfeed/sqlitehandler.py | 3 +- slixfeed/urlhandler.py | 44 ++++++++ slixfeed/xmpphandler.py | 28 +++-- 6 files changed, 291 insertions(+), 21 deletions(-) create mode 100644 assets/proxies.yaml diff --git a/assets/proxies.yaml b/assets/proxies.yaml new file mode 100644 index 0000000..3e25bd4 --- /dev/null +++ b/assets/proxies.yaml @@ -0,0 +1,209 @@ +invidious: + hostname: + - youtu.be + - youtube.com + clearnet: + - https://incogtube.com + - https://vid.puffyan.us + - https://yt.artemislena.eu + - https://invidious.snopyta.org + - https://youtube.076.ne.jp + - https://invidious.osi.kr + - https://invidious-us.kavin.rocks + - https://inv.cthd.icu + - https://invidious.namazso.eu + - https://yewtu.be + - https://invidio.xamh.de + - https://invidious.kavin.rocks + - https://monocles.live + - https://inv.riverside.rocks + - https://invidious.lunar.icu + - https://y.com.sb + - https://inv.bp.projectsegfau.lt + - https://invidious.flokinet.to + - https://invidious.sethforprivacy.com + - https://invidious.esmailelbob.xyz + - https://ytb.trom.tf + - https://invidious.domain.glass + - https://tube.cthd.icu + - https://inv.vern.cc + - https://invidious.garudalinux.org + - https://youtube.owacon.moe + - https://invidious.tinfoil-hat.net + - https://iv.melmac.space + - https://invidious.tiekoetter.com + - https://invidious.baczek.me + - https://invidious.no-logs.com + - https://invidious.0011.lt + - https://yt.funami.tech + - https://inv.tux.pizza + - https://vid.priv.au + - https://not-ytb.blocus.ch + - https://inv.creller.net + - https://inv.zzls.xyz + - https://yt.floss.media + - https://invidious.slipfox.xyz + - https://par1.iv.ggtyler.dev + - https://inv.citw.lgbt + - https://invidious.io.lol + - https://yt.oelrichsgarcia.de + - https://iv.nboeck.de + - https://invidious.protokolla.fi + - https://invidious.fi + - https://onion.tube + - https://inv.in.projectsegfau.lt + - https://invidious.privacydev.net + - https://invidious.takebackourtech.org + - https://qc1.iv.ggtyler.dev + - https://anontube.lvkaszus.pl + - https://invidious.asir.dev + - https://invidious.fdn.fr + - https://iv.datura.network + - https://invidious.private.coffee + - https://inv.pistasjis.net + - https://invidious.pavot.ca + - https://yt.cdaut.de + - https://yt.drgnz.club + - https://invidious.perennialte.ch + - https://yt.chaotic.ninja + - https://yt.omada.cafe + - https://super8.absturztau.be + - https://i.redsnake.io + - https://watch.supernets.org + - https://invidious.qwik.space + - https://farside.link/invidious + - https://inv.odyssey346.dev + - https://invidious.mutahar.rocks + - https://invidious.nerdvpn.de + - https://invidious.projectsegfau.lt + - https://invidious.weblibre.org + - https://iv.ggtyler.dev + - https://watch.thekitty.zone + - https://inv.us.projectsegfau.lt + - https://invidious.drgns.space + i2p: + - http://tube.i2p + - http://inv.cn.i2p + - http://jewtube.i2p + - http://ytmous.i2p + - http://pa7eextqat4wg35onzs4cnlhqa3gvzen243bcbrng67zyla4fqya.b32.i2p + - http://inv.vern.i2p + - http://inv.zzls.i2p + - http://verni6dr4qxjgjumnvesxerh5rvhv6oy5ddeibaqy5d7tgbiiyfa.b32.i2p + loki: [] + tor: + - http://tuberyps2pn6dor6h47brof3w2asmauahhk4ei42krugybzzzo55klad.onion + - http://qwikxxeiw4kgmml6vjw2bsxtviuwjce735dunai2djhu6q7qbacq73id.onion + - http://qwikxxt6jvggxzxe2v2fuzro5j7ibgphxmblmri6wkj5vpicdbo2kwad.onion + - http://c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid.onion + - http://grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad.onion + - http://invidious.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd.onion + - http://euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd.onion + - http://invidious.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid.onion + - http://iv.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd.onion + - http://kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad.onion + - http://ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid.onion + - http://osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd.onion + - http://u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad.onion + - http://w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd.onion + yggdrasil: + - http://[200:168a:c80a:b258:1dfe:f920:4414:6897] + +librarian: + hostname: + - odysee.com + clearnet: + - https://librarian.pussthecat.org + - https://odysee.076.ne.jp + - https://lbry.projectsegfau.lt + - https://librarian.esmailelbob.xyz + - https://lbry.mywire.org + - https://lbry.slipfox.xyz + - https://lbry.vern.cc + - https://lbry.ooguy.com + - https://lbn.frail.duckdns.org + - https://odysee.owacon.moe + - https://farside.link/librarian + i2p: [] + loki: [] + tor: + - http://librarian.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd.onion + - http://lbry.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion + - http://5znbzx2xcymhddzekfjib3isgqq4ilcyxa2bsq6vqmnvbtgu4f776lqd.onion + - http://bxewpsswttslepw27w2hhxhlizwm7l7y54x3jw5cfrb64hb6lgc557ad.onion + yggdrasil: [] + +nitter: + hostname: + - twitter.com + - x.com + clearnet: + - https://nitter.hu + - https://nitter.actionsack.com + - https://nitter.net + - https://nitter.1d4.us + - https://nitter.nixnet.services + - https://nitter.unixfox.eu + - https://nitter.sethforprivacy.com + - https://nitter.pussthecat.org + - https://nitter.it + - https://nitter.moomoo.me + - https://tw.artemislena.eu + - https://nitter.snopyta.org + - https://birdsite.xanny.family + - https://nitter.domain.glass + - https://read.whatever.social + - https://nitter.lacontrevoie.fr + - https://bird.trom.tf + - https://nitter.hostux.net + - https://nitter.sneed.network + - https://twitter.owacon.moe + - https://nitter.ggc-project.de + - https://unofficialbird.com + - https://nitter.fdn.fr + - https://nitter.no-logs.com + - https://nitter.slipfox.xyz + - https://nitter.one + - https://nitter.ungovernable.men + - https://nitter.private.coffee + - https://nitter.soopy.moe + - https://nitter.oksocial.net + - https://n.sneed.network + - https://nitter.qwik.space + - https://nitter.nohost.network + - https://de.nttr.stream + - https://farside.link/nitter + - https://nitter.42l.fr + - https://nitter.bus-hit.me + - https://nitter.ca + - https://nitter.eu + - https://nitter.grimneko.de + - https://nitter.kavin.rocks + - https://nitter.koyu.space + - https://nitter.namazso.eu + - https://nttr.stream + - https://twitter.076.ne.jp + - https://twitter.censors.us + - https://n.hyperborea.cloud + - https://n.biendeo.com + - https://n.opnxng.com + - https://nitter.adminforge.de + - https://nitter.catsarch.com + - https://nitter.cz + - https://nitter.esmailelbob.xyz + - https://nitter.in.projectsegfau.lt + - https://nitter.io.lol + - https://nitter.ktachibana.party + - https://nitter.kylrth.com + - https://nitter.poast.org + - https://nitter.privacydev.net + - https://nitter.salastil.com + - https://nitter.woodland.cafe + i2p: + - http://tm4rwkeysv3zz3q5yacyr4rlmca2c4etkdobfvuqzt6vsfsu4weq.b32.i2p + loki: [] + tor: + - http://qwikxxeiw4kgmml6vjw2bsxtviuwjce735dunai2djhu6q7qbacq73id.onion + - http://qwikxx2erhx6qrymued6ox2qkf2yeogjwypqvzoif4fqkljixasr6oid.onion + - http://n.sneed4fmhevap3ci4xhf4wgkf72lwk275lcgomnfgwniwmqvaxyluuid.onion + yggdrasil: [] diff --git a/slixfeed/confighandler.py b/slixfeed/confighandler.py index fa49aad..735d5bf 100644 --- a/slixfeed/confighandler.py +++ b/slixfeed/confighandler.py @@ -44,14 +44,14 @@ async def get_value_default(key, section): return result -async def get_list(key, file): +async def get_list(filename): """ Get settings default value. Parameters ---------- - key : str - Key: allow, deny, pathname, replace. + filename : str + filename of yaml file. Returns ------- @@ -61,8 +61,9 @@ async def get_list(key, file): config_dir = filehandler.get_default_confdir() if not os.path.isdir(config_dir): config_dir = '/usr/share/slixfeed/' - config_file = os.path.join(config_dir, file) + config_file = os.path.join(config_dir, filename) with open(config_file) as defaults: - default = yaml.safe_load(defaults) - result = default[key] + # default = yaml.safe_load(defaults) + # result = default[key] + result = yaml.safe_load(defaults) return result diff --git a/slixfeed/datahandler.py b/slixfeed/datahandler.py index 86f5850..afdb84c 100644 --- a/slixfeed/datahandler.py +++ b/slixfeed/datahandler.py @@ -12,6 +12,8 @@ TODO 1) Support Gemini and Gopher. +2) Check also for HTML, not only feed.bozo. + """ from aiohttp import ClientError, ClientSession, ClientTimeout @@ -444,7 +446,12 @@ async def add_feed(db_file, url): if res[0]: feed = parse(res[0]) title = get_title(url, feed) - if feed.bozo: + if not feed.entries: + try: + feed["feed"]["title"] + except: + msg = await probe_page(add_feed, url, res[0], db_file=db_file) + elif feed.bozo: bozo = ( "Bozo detected. Failed to load: {}" ).format(url) @@ -625,7 +632,8 @@ async def feed_mode_request(url, tree): """ feeds = {} parted_url = urlsplit(url) - paths = await get_list("pathnames", "lists.yaml") + paths = await get_list("lists.yaml") + paths = paths["pathnames"] for path in paths: address = urlunsplit([ parted_url.scheme, @@ -725,7 +733,8 @@ async def feed_mode_scan(url, tree): feeds = {} # paths = [] # TODO Test - paths = await get_list("pathnames", "lists.yaml") + paths = await get_list("lists.yaml") + paths = paths["pathnames"] for path in paths: # xpath_query = "//*[@*[contains(.,'{}')]]".format(path) # xpath_query = "//a[contains(@href,'{}')]".format(path) diff --git a/slixfeed/sqlitehandler.py b/slixfeed/sqlitehandler.py index 452d83e..f460ce5 100644 --- a/slixfeed/sqlitehandler.py +++ b/slixfeed/sqlitehandler.py @@ -1626,7 +1626,8 @@ async def set_filters_value_default(cur, key): ) cur.execute(sql, (key,)) if not cur.fetchone(): - val = await config.get_list(key, "lists.yaml") + val = await config.get_list("lists.yaml") + val = val[key] val = ",".join(val) sql = ( "INSERT " diff --git a/slixfeed/urlhandler.py b/slixfeed/urlhandler.py index c46e7c6..00bdac8 100644 --- a/slixfeed/urlhandler.py +++ b/slixfeed/urlhandler.py @@ -14,10 +14,54 @@ TODO """ +from confighandler import get_list from email.utils import parseaddr +import random from urllib.parse import urljoin, urlsplit, urlunsplit +# NOTE hostname and protocol are listed as one in file +# proxies.yaml. Perhaps a better practice would be to have +# them separated. File proxies.yaml will remainas is in order +# to be coordinated with the dataset of project LibRedirect. +async def replace_hostname(url): + """ + Replace hostname. + + Parameters + ---------- + url : str + URL. + + Returns + ------- + url : str + URL. + """ + parted_url = urlsplit(url) + protocol = parted_url.scheme + hostname = parted_url.netloc + pathname = parted_url.path + queries = parted_url.query + fragment = parted_url.fragment + proxies = await get_list("proxies.yaml") + for proxy in proxies: + proxy = proxies[proxy] + if hostname in proxy["hostname"]: + select_proxy = random.choice(proxy["clearnet"]) + parted_proxy = urlsplit(select_proxy) + protocol_new = parted_proxy.scheme + hostname_new = parted_proxy.netloc + url = urlunsplit([ + protocol_new, + hostname_new, + pathname, + queries, + fragment + ]) + return url + + def feed_to_http(url): """ Replace scheme FEED by HTTP. diff --git a/slixfeed/xmpphandler.py b/slixfeed/xmpphandler.py index 1596435..d36513b 100644 --- a/slixfeed/xmpphandler.py +++ b/slixfeed/xmpphandler.py @@ -52,15 +52,16 @@ NOTE """ import asyncio -import logging -# import os -import slixmpp -from slixmpp.exceptions import IqError, IqTimeout -from random import randrange +from confighandler import get_list import datahandler as fetcher from datetimehandler import current_time from filehandler import initdb import listhandler as lister +import logging +# import os +from random import randrange +import slixmpp +from slixmpp.exceptions import IqError, IqTimeout import sqlitehandler as sqlite import taskhandler as tasker import urlhandler as urlfixer @@ -907,8 +908,6 @@ class Slixfeed(slixmpp.ClientXMPP): case _ if (message_lowercase.startswith("http") or message_lowercase.startswith("feed:")): url = message - if url.startswith("feed:"): - url = urlfixer.feed_to_http(url) await tasker.clean_tasks_xmpp( jid, ["status"] @@ -917,6 +916,12 @@ class Slixfeed(slixmpp.ClientXMPP): "📫️ Processing request to fetch data from {}" ).format(url) process_task_message(self, jid, task) + if url.startswith("feed:"): + url = urlfixer.feed_to_http(url) + # url_alt = await urlfixer.replace_hostname(url) + # if url_alt: + # url = url_alt + url = (await urlfixer.replace_hostname(url)) or url action = await initdb( jid, fetcher.add_feed, @@ -1147,16 +1152,17 @@ class Slixfeed(slixmpp.ClientXMPP): data = message[5:] data = data.split() url = data[0] - task = ( - "📫️ Processing request to fetch data from {}" - ).format(url) - process_task_message(self, jid, task) await tasker.clean_tasks_xmpp( jid, ["status"] ) + task = ( + "📫️ Processing request to fetch data from {}" + ).format(url) + process_task_message(self, jid, task) if url.startswith("feed:"): url = urlfixer.feed_to_http(url) + url = (await urlfixer.replace_hostname(url)) or url match len(data): case 1: if url.startswith("http"):