From b77ef5346f2b2cbbb478ccd5c1688e156c9185ae Mon Sep 17 00:00:00 2001 From: Schimon Jehudah Date: Thu, 4 Jan 2024 12:38:22 +0000 Subject: [PATCH] More segregation of code --- assets/proxies.yaml | 937 ++++++++++++++++++++------------------- slixfeed/action.py | 126 +++++- slixfeed/config.py | 34 +- slixfeed/crawl.py | 6 +- slixfeed/fetch.py | 11 +- slixfeed/filter.py | 152 +++++++ slixfeed/sqlite.py | 365 +++++---------- slixfeed/task.py | 58 ++- slixfeed/url.py | 5 +- slixfeed/xmpp/client.py | 5 +- slixfeed/xmpp/process.py | 16 +- 11 files changed, 937 insertions(+), 778 deletions(-) create mode 100644 slixfeed/filter.py diff --git a/assets/proxies.yaml b/assets/proxies.yaml index 0a97889..792c686 100644 --- a/assets/proxies.yaml +++ b/assets/proxies.yaml @@ -1,477 +1,478 @@ -anonymousoverflow: - clearnet: - - https://ao.phreedom.club - - https://overflow.hostux.net - - https://ao.foss.wtf - - https://overflow.adminforge.de - - https://overflow.lunar.icu - - https://anonymousoverflow.esmailelbob.xyz - - https://overflow.smnz.de - - https://ao.vern.cc - - https://overflow.777.tf - - https://code.whatever.social - - https://stackoverflow.vern.cc - - https://anonymousoverflow.vern.cc - - https://ao.bloatcat.tk - - https://se.chaotic.ninja - - https://anonymousoverflow.privacyfucking.rocks - - https://overflow.projectsegfau.lt - - https://anonoverflow.frontendfriendly.xyz - - https://overflow.fascinated.cc - i2p: - - http://vernmzgraj6aaoafmehupvtkkynpaa67rxcdj2kinwiy6konn6rq.b32.i2p - loki: [] - tor: - - http://anonymousoverflow.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd.onion - - http://ao.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion - yggdrasil: - - http://[301:f69c:2017:b6b8::8] - hostname: - - stackoverflow.com - type: - - link +proxies: + anonymousoverflow: + clearnet: + - https://ao.phreedom.club + - https://overflow.hostux.net + - https://ao.foss.wtf + - https://overflow.adminforge.de + - https://overflow.lunar.icu + - https://anonymousoverflow.esmailelbob.xyz + - https://overflow.smnz.de + - https://ao.vern.cc + - https://overflow.777.tf + - https://code.whatever.social + - https://stackoverflow.vern.cc + - https://anonymousoverflow.vern.cc + - https://ao.bloatcat.tk + - https://se.chaotic.ninja + - https://anonymousoverflow.privacyfucking.rocks + - https://overflow.projectsegfau.lt + - https://anonoverflow.frontendfriendly.xyz + - https://overflow.fascinated.cc + i2p: + - http://vernmzgraj6aaoafmehupvtkkynpaa67rxcdj2kinwiy6konn6rq.b32.i2p + loki: [] + tor: + - http://anonymousoverflow.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd.onion + - http://ao.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion + yggdrasil: + - http://[301:f69c:2017:b6b8::8] + hostname: + - stackoverflow.com + type: + - link -dumb: - clearnet: - - https://dumb.privacydev.net - - https://dm.vern.cc - - https://dumb.lunar.icu - - https://dumb.esmailelbob.xyz - hostname: - - genius.com - type: - - link + dumb: + clearnet: + - https://dumb.privacydev.net + - https://dm.vern.cc + - https://dumb.lunar.icu + - https://dumb.esmailelbob.xyz + hostname: + - genius.com + type: + - link -invidious: - clearnet: - - https://incogtube.com - - https://vid.puffyan.us - - https://yt.artemislena.eu - - https://invidious.snopyta.org - - https://youtube.076.ne.jp - - https://invidious.osi.kr - - https://invidious-us.kavin.rocks - - https://inv.cthd.icu - - https://invidious.namazso.eu - - https://yewtu.be - - https://invidio.xamh.de - - https://invidious.kavin.rocks - - https://monocles.live - - https://inv.riverside.rocks - - https://invidious.lunar.icu - - https://y.com.sb - - https://inv.bp.projectsegfau.lt - - https://invidious.flokinet.to - - https://invidious.sethforprivacy.com - - https://invidious.esmailelbob.xyz - - https://ytb.trom.tf - - https://invidious.domain.glass - - https://tube.cthd.icu - - https://inv.vern.cc - - https://invidious.garudalinux.org - - https://youtube.owacon.moe - - https://invidious.tinfoil-hat.net - - https://iv.melmac.space - - https://invidious.tiekoetter.com - - https://invidious.baczek.me - - https://invidious.no-logs.com - - https://invidious.0011.lt - - https://yt.funami.tech - - https://inv.tux.pizza - - https://vid.priv.au - - https://not-ytb.blocus.ch - - https://inv.creller.net - - https://inv.zzls.xyz - - https://yt.floss.media - - https://invidious.slipfox.xyz - - https://par1.iv.ggtyler.dev - - https://inv.citw.lgbt - - https://invidious.io.lol - - https://yt.oelrichsgarcia.de - - https://iv.nboeck.de - - https://invidious.protokolla.fi - - https://invidious.fi - - https://onion.tube - - https://inv.in.projectsegfau.lt - - https://invidious.privacydev.net - - https://invidious.takebackourtech.org - - https://qc1.iv.ggtyler.dev - - https://anontube.lvkaszus.pl - - https://invidious.asir.dev - - https://invidious.fdn.fr - - https://iv.datura.network - - https://invidious.private.coffee - - https://inv.pistasjis.net - - https://invidious.pavot.ca - - https://yt.cdaut.de - - https://yt.drgnz.club - - https://invidious.perennialte.ch - - https://yt.chaotic.ninja - - https://yt.omada.cafe - - https://super8.absturztau.be - - https://i.redsnake.io - - https://watch.supernets.org - - https://invidious.qwik.space - - https://farside.link/invidious - - https://inv.odyssey346.dev - - https://invidious.mutahar.rocks - - https://invidious.nerdvpn.de - - https://invidious.projectsegfau.lt - - https://invidious.weblibre.org - - https://iv.ggtyler.dev - - https://watch.thekitty.zone - - https://inv.us.projectsegfau.lt - - https://invidious.drgns.space - i2p: - - http://tube.i2p - - http://inv.cn.i2p - - http://jewtube.i2p - - http://ytmous.i2p - - http://pa7eextqat4wg35onzs4cnlhqa3gvzen243bcbrng67zyla4fqya.b32.i2p - - http://inv.vern.i2p - - http://inv.zzls.i2p - - http://verni6dr4qxjgjumnvesxerh5rvhv6oy5ddeibaqy5d7tgbiiyfa.b32.i2p - loki: [] - tor: - - http://tuberyps2pn6dor6h47brof3w2asmauahhk4ei42krugybzzzo55klad.onion - - http://qwikxxeiw4kgmml6vjw2bsxtviuwjce735dunai2djhu6q7qbacq73id.onion - - http://qwikxxt6jvggxzxe2v2fuzro5j7ibgphxmblmri6wkj5vpicdbo2kwad.onion - - http://c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid.onion - - http://grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad.onion - - http://invidious.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd.onion - - http://euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd.onion - - http://invidious.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid.onion - - http://iv.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd.onion - - http://kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad.onion - - http://ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid.onion - - http://osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd.onion - - http://u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad.onion - - http://w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd.onion - yggdrasil: - - http://[200:168a:c80a:b258:1dfe:f920:4414:6897] - hostname: - - youtu.be - - youtube.com - type: - - feed - - link + invidious: + clearnet: + - https://incogtube.com + - https://vid.puffyan.us + - https://yt.artemislena.eu + - https://invidious.snopyta.org + - https://youtube.076.ne.jp + - https://invidious.osi.kr + - https://invidious-us.kavin.rocks + - https://inv.cthd.icu + - https://invidious.namazso.eu + - https://yewtu.be + - https://invidio.xamh.de + - https://invidious.kavin.rocks + - https://monocles.live + - https://inv.riverside.rocks + - https://invidious.lunar.icu + - https://y.com.sb + - https://inv.bp.projectsegfau.lt + - https://invidious.flokinet.to + - https://invidious.sethforprivacy.com + - https://invidious.esmailelbob.xyz + - https://ytb.trom.tf + - https://invidious.domain.glass + - https://tube.cthd.icu + - https://inv.vern.cc + - https://invidious.garudalinux.org + - https://youtube.owacon.moe + - https://invidious.tinfoil-hat.net + - https://iv.melmac.space + - https://invidious.tiekoetter.com + - https://invidious.baczek.me + - https://invidious.no-logs.com + - https://invidious.0011.lt + - https://yt.funami.tech + - https://inv.tux.pizza + - https://vid.priv.au + - https://not-ytb.blocus.ch + - https://inv.creller.net + - https://inv.zzls.xyz + - https://yt.floss.media + - https://invidious.slipfox.xyz + - https://par1.iv.ggtyler.dev + - https://inv.citw.lgbt + - https://invidious.io.lol + - https://yt.oelrichsgarcia.de + - https://iv.nboeck.de + - https://invidious.protokolla.fi + - https://invidious.fi + - https://onion.tube + - https://inv.in.projectsegfau.lt + - https://invidious.privacydev.net + - https://invidious.takebackourtech.org + - https://qc1.iv.ggtyler.dev + - https://anontube.lvkaszus.pl + - https://invidious.asir.dev + - https://invidious.fdn.fr + - https://iv.datura.network + - https://invidious.private.coffee + - https://inv.pistasjis.net + - https://invidious.pavot.ca + - https://yt.cdaut.de + - https://yt.drgnz.club + - https://invidious.perennialte.ch + - https://yt.chaotic.ninja + - https://yt.omada.cafe + - https://super8.absturztau.be + - https://i.redsnake.io + - https://watch.supernets.org + - https://invidious.qwik.space + - https://farside.link/invidious + - https://inv.odyssey346.dev + - https://invidious.mutahar.rocks + - https://invidious.nerdvpn.de + - https://invidious.projectsegfau.lt + - https://invidious.weblibre.org + - https://iv.ggtyler.dev + - https://watch.thekitty.zone + - https://inv.us.projectsegfau.lt + - https://invidious.drgns.space + i2p: + - http://tube.i2p + - http://inv.cn.i2p + - http://jewtube.i2p + - http://ytmous.i2p + - http://pa7eextqat4wg35onzs4cnlhqa3gvzen243bcbrng67zyla4fqya.b32.i2p + - http://inv.vern.i2p + - http://inv.zzls.i2p + - http://verni6dr4qxjgjumnvesxerh5rvhv6oy5ddeibaqy5d7tgbiiyfa.b32.i2p + loki: [] + tor: + - http://tuberyps2pn6dor6h47brof3w2asmauahhk4ei42krugybzzzo55klad.onion + - http://qwikxxeiw4kgmml6vjw2bsxtviuwjce735dunai2djhu6q7qbacq73id.onion + - http://qwikxxt6jvggxzxe2v2fuzro5j7ibgphxmblmri6wkj5vpicdbo2kwad.onion + - http://c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid.onion + - http://grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad.onion + - http://invidious.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd.onion + - http://euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd.onion + - http://invidious.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid.onion + - http://iv.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd.onion + - http://kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad.onion + - http://ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid.onion + - http://osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd.onion + - http://u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad.onion + - http://w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd.onion + yggdrasil: + - http://[200:168a:c80a:b258:1dfe:f920:4414:6897] + hostname: + - youtu.be + - youtube.com + type: + - feed + - link -librarian: - clearnet: - - https://librarian.pussthecat.org - - https://odysee.076.ne.jp - - https://lbry.projectsegfau.lt - - https://librarian.esmailelbob.xyz - - https://lbry.mywire.org - - https://lbry.slipfox.xyz - - https://lbry.vern.cc - - https://lbry.ooguy.com - - https://lbn.frail.duckdns.org - - https://odysee.owacon.moe - - https://farside.link/librarian - i2p: [] - loki: [] - tor: - - http://librarian.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd.onion - - http://lbry.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion - - http://5znbzx2xcymhddzekfjib3isgqq4ilcyxa2bsq6vqmnvbtgu4f776lqd.onion - - http://bxewpsswttslepw27w2hhxhlizwm7l7y54x3jw5cfrb64hb6lgc557ad.onion - yggdrasil: [] - hostname: - - odysee.com - type: - - feed - - link + librarian: + clearnet: + - https://librarian.pussthecat.org + - https://odysee.076.ne.jp + - https://lbry.projectsegfau.lt + - https://librarian.esmailelbob.xyz + - https://lbry.mywire.org + - https://lbry.slipfox.xyz + - https://lbry.vern.cc + - https://lbry.ooguy.com + - https://lbn.frail.duckdns.org + - https://odysee.owacon.moe + - https://farside.link/librarian + i2p: [] + loki: [] + tor: + - http://librarian.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd.onion + - http://lbry.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion + - http://5znbzx2xcymhddzekfjib3isgqq4ilcyxa2bsq6vqmnvbtgu4f776lqd.onion + - http://bxewpsswttslepw27w2hhxhlizwm7l7y54x3jw5cfrb64hb6lgc557ad.onion + yggdrasil: [] + hostname: + - odysee.com + type: + - feed + - link -libreddit: - clearnet: - - https://libreddit.spike.codes - - https://libreddit.hu - - https://libreddit.nl - - https://libreddit.bus-hit.me - - https://libreddit.strongthany.cc - - https://libreddit.esmailelbob.xyz - - https://lr.riverside.rocks - - https://libreddit.40two.app - - https://libreddit.albony.xyz - - https://libreddit.domain.glass - - https://discuss.whatever.social - - https://libreddit.kavin.rocks - - https://libreddit.privacy.com.de - - https://libreddit.eu.org - - https://libreddit.bloatcat.tk - - https://libreddit.pabloferreiro.es - - https://lr.foss.wtf - - https://libreddit.no-logs.com - - https://lr.slipfox.xyz - - https://lr.creller.net - - https://libreddit.dcs0.hu - - https://l.opnxng.com - - https://libreddit.tux.pizza - - https://reddit.leptons.xyz - - https://reddit.baby - - https://snoo.habedieeh.re - - https://lr.4201337.xyz - - https://libreddit.private.coffee - - https://lr.artemislena.eu - - https://libreddit.privacyfucking.rocks - - https://libreddit.qwik.space - - https://farside.link/libreddit - - https://de.leddit.xyz - - https://leddit.xyz - - https://libreddit.alefvanoon.xyz - - https://libreddit.autarkic.org - - https://libreddit.awesomehub.io - - https://libreddit.crewz.me - - https://libreddit.database.red - - https://libreddit.datatunnel.xyz - - https://libreddit.de - - https://libreddit.dothq.co - - https://libreddit.drivet.xyz - - https://libreddit.flux.industries - - https://libreddit.igna.rocks - - https://libredd.it - - https://libreddit.jamiethalacker.dev - - https://libreddit.kylrth.com - - https://libreddit.lunar.icu - - https://libreddit.mutahar.rocks - - https://libreddit.northboot.xyz - - https://libreddit.pussthecat.org - - https://libreddit.silkky.cloud - - https://libreddit.some-things.org - - https://libreddit.sugoma.tk - - https://libreddit.tiekoetter.com - - https://libreddit.totaldarkness.net - - https://libreddit.winscloud.net - - https://libreddit.yonalee.eu - - https://lr.cowfee.moe - - https://lr.mint.lgbt - - https://lr.oversold.host - - https://lr.stilic.ml - - https://r.nf - - https://r.walkx.org - - https://reddi.tk - - https://reddit.artemislena.eu - - https://reddit.invak.id - - https://reddit.phii.me - - https://reddit.rtrace.io - - https://reddit.stuehieyr.com - - https://safereddit.com - - https://libreddit.nohost.network - - https://libreddit.projectsegfau.lt - - https://reddit.simo.sh - i2p: - - http://woo5ugmoomzbtaq6z46q4wgei5mqmc6jkafqfi5c37zni7xc4ymq.b32.i2p - loki: [] - tor: - - http://spjmllawtheisznfs7uryhxumin26ssv2draj7oope3ok3wuhy43eoyd.onion - - http://qwikxxeiw4kgmml6vjw2bsxtviuwjce735dunai2djhu6q7qbacq73id.onion - - http://qwikxx4xqvhdyyazkrw7pwdpdppfnmn7j2n6cvq5zecm4atbppaslzad.onion - - http://ecue64ybzvn6vjzl37kcsnwt4ycmbsyf74nbttyg7rkc3t3qwnj7mcyd.onion - - http://fwhhsbrbltmrct5hshrnqlqygqvcgmnek3cnka55zj4y7nuus5muwyyd.onion - - http://inytumdgnri7xsqtvpntjevaelxtgbjqkuqhtf6txxhwbll2fwqtakqd.onion - - http://kphht2jcflojtqte4b4kyx7p2ahagv4debjj32nre67dxz7y57seqwyd.onion - - http://kzhfp3nvb4qp575vy23ccbrgfocezjtl5dx66uthgrhu7nscu6rcwjyd.onion - - http://lbrdtjaj7567ptdd4rv74lv27qhxfkraabnyphgcvptl64ijx2tijwid.onion - - http://libreddit.2syis2nnyytz6jnusnjurva4swlaizlnleiks5mjp46phuwjbdjqwgqd.onion - - http://ledditqo2mxfvlgobxnlhrkq4dh34jss6evfkdkb2thlvy6dn4f4gpyd.onion - - http://libreddit.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion - - http://libredoxhxwnmsb6dvzzd35hmgzmawsq5i764es7witwhddvpc2razid.onion - - http://ol5begilptoou34emq2sshf3may3hlblvipdjtybbovpb7c7zodxmtqd.onion - - http://liredejj74h5xjqr2dylnl5howb2bpikfowqoveub55ru27x43357iid.onion - yggdrasil: [] - hostname: - - reddit.com - type: - - link + libreddit: + clearnet: + - https://libreddit.spike.codes + - https://libreddit.hu + - https://libreddit.nl + - https://libreddit.bus-hit.me + - https://libreddit.strongthany.cc + - https://libreddit.esmailelbob.xyz + - https://lr.riverside.rocks + - https://libreddit.40two.app + - https://libreddit.albony.xyz + - https://libreddit.domain.glass + - https://discuss.whatever.social + - https://libreddit.kavin.rocks + - https://libreddit.privacy.com.de + - https://libreddit.eu.org + - https://libreddit.bloatcat.tk + - https://libreddit.pabloferreiro.es + - https://lr.foss.wtf + - https://libreddit.no-logs.com + - https://lr.slipfox.xyz + - https://lr.creller.net + - https://libreddit.dcs0.hu + - https://l.opnxng.com + - https://libreddit.tux.pizza + - https://reddit.leptons.xyz + - https://reddit.baby + - https://snoo.habedieeh.re + - https://lr.4201337.xyz + - https://libreddit.private.coffee + - https://lr.artemislena.eu + - https://libreddit.privacyfucking.rocks + - https://libreddit.qwik.space + - https://farside.link/libreddit + - https://de.leddit.xyz + - https://leddit.xyz + - https://libreddit.alefvanoon.xyz + - https://libreddit.autarkic.org + - https://libreddit.awesomehub.io + - https://libreddit.crewz.me + - https://libreddit.database.red + - https://libreddit.datatunnel.xyz + - https://libreddit.de + - https://libreddit.dothq.co + - https://libreddit.drivet.xyz + - https://libreddit.flux.industries + - https://libreddit.igna.rocks + - https://libredd.it + - https://libreddit.jamiethalacker.dev + - https://libreddit.kylrth.com + - https://libreddit.lunar.icu + - https://libreddit.mutahar.rocks + - https://libreddit.northboot.xyz + - https://libreddit.pussthecat.org + - https://libreddit.silkky.cloud + - https://libreddit.some-things.org + - https://libreddit.sugoma.tk + - https://libreddit.tiekoetter.com + - https://libreddit.totaldarkness.net + - https://libreddit.winscloud.net + - https://libreddit.yonalee.eu + - https://lr.cowfee.moe + - https://lr.mint.lgbt + - https://lr.oversold.host + - https://lr.stilic.ml + - https://r.nf + - https://r.walkx.org + - https://reddi.tk + - https://reddit.artemislena.eu + - https://reddit.invak.id + - https://reddit.phii.me + - https://reddit.rtrace.io + - https://reddit.stuehieyr.com + - https://safereddit.com + - https://libreddit.nohost.network + - https://libreddit.projectsegfau.lt + - https://reddit.simo.sh + i2p: + - http://woo5ugmoomzbtaq6z46q4wgei5mqmc6jkafqfi5c37zni7xc4ymq.b32.i2p + loki: [] + tor: + - http://spjmllawtheisznfs7uryhxumin26ssv2draj7oope3ok3wuhy43eoyd.onion + - http://qwikxxeiw4kgmml6vjw2bsxtviuwjce735dunai2djhu6q7qbacq73id.onion + - http://qwikxx4xqvhdyyazkrw7pwdpdppfnmn7j2n6cvq5zecm4atbppaslzad.onion + - http://ecue64ybzvn6vjzl37kcsnwt4ycmbsyf74nbttyg7rkc3t3qwnj7mcyd.onion + - http://fwhhsbrbltmrct5hshrnqlqygqvcgmnek3cnka55zj4y7nuus5muwyyd.onion + - http://inytumdgnri7xsqtvpntjevaelxtgbjqkuqhtf6txxhwbll2fwqtakqd.onion + - http://kphht2jcflojtqte4b4kyx7p2ahagv4debjj32nre67dxz7y57seqwyd.onion + - http://kzhfp3nvb4qp575vy23ccbrgfocezjtl5dx66uthgrhu7nscu6rcwjyd.onion + - http://lbrdtjaj7567ptdd4rv74lv27qhxfkraabnyphgcvptl64ijx2tijwid.onion + - http://libreddit.2syis2nnyytz6jnusnjurva4swlaizlnleiks5mjp46phuwjbdjqwgqd.onion + - http://ledditqo2mxfvlgobxnlhrkq4dh34jss6evfkdkb2thlvy6dn4f4gpyd.onion + - http://libreddit.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion + - http://libredoxhxwnmsb6dvzzd35hmgzmawsq5i764es7witwhddvpc2razid.onion + - http://ol5begilptoou34emq2sshf3may3hlblvipdjtybbovpb7c7zodxmtqd.onion + - http://liredejj74h5xjqr2dylnl5howb2bpikfowqoveub55ru27x43357iid.onion + yggdrasil: [] + hostname: + - reddit.com + type: + - link -neuters: - clearnet: - - https://neuters.de - - https://neuters.privacyfucking.rocks - hostname: - - reuters.com - type: - - link + neuters: + clearnet: + - https://neuters.de + - https://neuters.privacyfucking.rocks + hostname: + - reuters.com + type: + - link -nitter: - clearnet: - - https://nitter.hu - - https://nitter.actionsack.com - - https://nitter.net - - https://nitter.1d4.us - - https://nitter.nixnet.services - - https://nitter.unixfox.eu - - https://nitter.sethforprivacy.com - - https://nitter.pussthecat.org - - https://nitter.it - - https://nitter.moomoo.me - - https://tw.artemislena.eu - - https://nitter.snopyta.org - - https://birdsite.xanny.family - - https://nitter.domain.glass - - https://read.whatever.social - - https://nitter.lacontrevoie.fr - - https://bird.trom.tf - - https://nitter.hostux.net - - https://nitter.sneed.network - - https://twitter.owacon.moe - - https://nitter.ggc-project.de - - https://unofficialbird.com - - https://nitter.fdn.fr - - https://nitter.no-logs.com - - https://nitter.slipfox.xyz - - https://nitter.one - - https://nitter.ungovernable.men - - https://nitter.private.coffee - - https://nitter.soopy.moe - - https://nitter.oksocial.net - - https://n.sneed.network - - https://nitter.qwik.space - - https://nitter.nohost.network - - https://de.nttr.stream - - https://farside.link/nitter - - https://nitter.42l.fr - - https://nitter.bus-hit.me - - https://nitter.ca - - https://nitter.eu - - https://nitter.grimneko.de - - https://nitter.kavin.rocks - - https://nitter.koyu.space - - https://nitter.namazso.eu - - https://nttr.stream - - https://twitter.076.ne.jp - - https://twitter.censors.us - - https://n.hyperborea.cloud - - https://n.biendeo.com - - https://n.opnxng.com - - https://nitter.adminforge.de - - https://nitter.catsarch.com - - https://nitter.cz - - https://nitter.esmailelbob.xyz - - https://nitter.in.projectsegfau.lt - - https://nitter.io.lol - - https://nitter.ktachibana.party - - https://nitter.kylrth.com - - https://nitter.poast.org - - https://nitter.privacydev.net - - https://nitter.salastil.com - - https://nitter.woodland.cafe - i2p: - - http://tm4rwkeysv3zz3q5yacyr4rlmca2c4etkdobfvuqzt6vsfsu4weq.b32.i2p - loki: [] - tor: - - http://qwikxxeiw4kgmml6vjw2bsxtviuwjce735dunai2djhu6q7qbacq73id.onion - - http://qwikxx2erhx6qrymued6ox2qkf2yeogjwypqvzoif4fqkljixasr6oid.onion - - http://n.sneed4fmhevap3ci4xhf4wgkf72lwk275lcgomnfgwniwmqvaxyluuid.onion - yggdrasil: [] - hostname: - - twitter.com - - x.com - type: - - feed - - link + nitter: + clearnet: + - https://nitter.hu + - https://nitter.actionsack.com + - https://nitter.net + - https://nitter.1d4.us + - https://nitter.nixnet.services + - https://nitter.unixfox.eu + - https://nitter.sethforprivacy.com + - https://nitter.pussthecat.org + - https://nitter.it + - https://nitter.moomoo.me + - https://tw.artemislena.eu + - https://nitter.snopyta.org + - https://birdsite.xanny.family + - https://nitter.domain.glass + - https://read.whatever.social + - https://nitter.lacontrevoie.fr + - https://bird.trom.tf + - https://nitter.hostux.net + - https://nitter.sneed.network + - https://twitter.owacon.moe + - https://nitter.ggc-project.de + - https://unofficialbird.com + - https://nitter.fdn.fr + - https://nitter.no-logs.com + - https://nitter.slipfox.xyz + - https://nitter.one + - https://nitter.ungovernable.men + - https://nitter.private.coffee + - https://nitter.soopy.moe + - https://nitter.oksocial.net + - https://n.sneed.network + - https://nitter.qwik.space + - https://nitter.nohost.network + - https://de.nttr.stream + - https://farside.link/nitter + - https://nitter.42l.fr + - https://nitter.bus-hit.me + - https://nitter.ca + - https://nitter.eu + - https://nitter.grimneko.de + - https://nitter.kavin.rocks + - https://nitter.koyu.space + - https://nitter.namazso.eu + - https://nttr.stream + - https://twitter.076.ne.jp + - https://twitter.censors.us + - https://n.hyperborea.cloud + - https://n.biendeo.com + - https://n.opnxng.com + - https://nitter.adminforge.de + - https://nitter.catsarch.com + - https://nitter.cz + - https://nitter.esmailelbob.xyz + - https://nitter.in.projectsegfau.lt + - https://nitter.io.lol + - https://nitter.ktachibana.party + - https://nitter.kylrth.com + - https://nitter.poast.org + - https://nitter.privacydev.net + - https://nitter.salastil.com + - https://nitter.woodland.cafe + i2p: + - http://tm4rwkeysv3zz3q5yacyr4rlmca2c4etkdobfvuqzt6vsfsu4weq.b32.i2p + loki: [] + tor: + - http://qwikxxeiw4kgmml6vjw2bsxtviuwjce735dunai2djhu6q7qbacq73id.onion + - http://qwikxx2erhx6qrymued6ox2qkf2yeogjwypqvzoif4fqkljixasr6oid.onion + - http://n.sneed4fmhevap3ci4xhf4wgkf72lwk275lcgomnfgwniwmqvaxyluuid.onion + yggdrasil: [] + hostname: + - twitter.com + - x.com + type: + - feed + - link -proxitok: - clearnet: - - https://proxitok.lunar.icu - - https://tik.hostux.net - - https://proxitok.pabloferreiro.es - - https://proxitok.privacy.com.de - - https://tok.adminforge.de - - https://tok.habedieeh.re - - https://proxitok.pussthecat.org - - https://proxitok.privacyfucking.rocks - - https://cringe.whatever.social - - https://proxitok.esmailelbob.xyz - - https://proxitok.privacydev.net - - https://proxitok.pufe.org - - https://tok.artemislena.eu - - https://tok.thekitty.zone - - https://tiktok.chauvet.pro - - https://tt.vern.cc - - https://farside.link/proxitok - i2p: - - http://qr.vern.i2p - loki: [] - tor: [] - yggdrasil: [] - hostname: - - tiktok.com - type: - - link + proxitok: + clearnet: + - https://proxitok.lunar.icu + - https://tik.hostux.net + - https://proxitok.pabloferreiro.es + - https://proxitok.privacy.com.de + - https://tok.adminforge.de + - https://tok.habedieeh.re + - https://proxitok.pussthecat.org + - https://proxitok.privacyfucking.rocks + - https://cringe.whatever.social + - https://proxitok.esmailelbob.xyz + - https://proxitok.privacydev.net + - https://proxitok.pufe.org + - https://tok.artemislena.eu + - https://tok.thekitty.zone + - https://tiktok.chauvet.pro + - https://tt.vern.cc + - https://farside.link/proxitok + i2p: + - http://qr.vern.i2p + loki: [] + tor: [] + yggdrasil: [] + hostname: + - tiktok.com + type: + - link -quetre: - clearnet: - - https://quetre.privacydev.net - - https://quetre.pufe.org - - https://que.wilbvr.me - - https://quetre.iket.me - - https://quetre.pussthecat.org - - https://quetre.tokhmi.xyz - - https://quetre.projectsegfau.lt - - https://quetre.esmailelbob.xyz - - https://quetre.odyssey346.dev - - ://ask.habedieeh.re - - https://quetre.marcopisco.com - - https://quetre.blackdrgn.nl - - https://quetre.lunar.icu - - https://quora.femboy.hu - - https://quora.vern.cc - - https://farside.link/quetre - - https://quetre.fascinated.cc - i2p: [] - loki: [] - tor: - - http://ask.habeehrhadazsw3izbrbilqajalfyqqln54mrja3iwpqxgcuxnus7eid.onion - - http://qr.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion - - http://quetre.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd.onion - - http://quetre.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid.onion - - http://quora.cepyxplublbyw2f4axy4pyztfbxmf63lrt2c7uwv6wl4iixz53czload.onion - yggdrasil: [] - hostname: - - quora.com - type: - - link + quetre: + clearnet: + - https://quetre.privacydev.net + - https://quetre.pufe.org + - https://que.wilbvr.me + - https://quetre.iket.me + - https://quetre.pussthecat.org + - https://quetre.tokhmi.xyz + - https://quetre.projectsegfau.lt + - https://quetre.esmailelbob.xyz + - https://quetre.odyssey346.dev + - ://ask.habedieeh.re + - https://quetre.marcopisco.com + - https://quetre.blackdrgn.nl + - https://quetre.lunar.icu + - https://quora.femboy.hu + - https://quora.vern.cc + - https://farside.link/quetre + - https://quetre.fascinated.cc + i2p: [] + loki: [] + tor: + - http://ask.habeehrhadazsw3izbrbilqajalfyqqln54mrja3iwpqxgcuxnus7eid.onion + - http://qr.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad.onion + - http://quetre.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd.onion + - http://quetre.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid.onion + - http://quora.cepyxplublbyw2f4axy4pyztfbxmf63lrt2c7uwv6wl4iixz53czload.onion + yggdrasil: [] + hostname: + - quora.com + type: + - link -teddit: - clearnet: - - https://teddit.pussthecat.org - - https://teddit.zaggy.nl - - https://teddit.bus-hit.me - - https://teddit.adminforge.de - - https://incogsnoo.com - - https://teddit.hostux.net - - https://teddit.ggc-project.de - - https://teddit.httpjames.space - - https://snoo.ioens.is - - https://teddit.no-logs.com - - https://teddit.net - - https://i.opnxng.com - - https://tedd.it - - https://teddit.projectsegfau.lt - - https://reddit.lol - - https://rdt.trom.tf - - https://t.sneed.network - - https://farside.link/teddit - - https://teddit.alefvanoon.xyz - - https://teddit.domain.glass - - https://teddit.froth.zone - - https://teddit.namazso.eu - - https://teddit.sethforprivacy.com - - https://teddit.tinfoil-hat.net - - https://teddit.totaldarkness.net - - https://td.vern.cc - i2p: - - http://k62ptris7p72aborr4zoanee7xai6wguucveptwgxs5vbgt7qzpq.b32.i2p - - http://teddit.i2p - loki: [] - tor: - - http://t.sneed4fmhevap3ci4xhf4wgkf72lwk275lcgomnfgwniwmqvaxyluuid.onion - - http://tedditfyn6idalzso5wam5qd3kdtxoljjhbrbbx34q2xkcisvshuytad.onion - yggdrasil: - - http://[200:5e4b:515c:e42b:3e73:6fbf:2f11:779d] - hostname: - - reddit.com - type: - - link + teddit: + clearnet: + - https://teddit.pussthecat.org + - https://teddit.zaggy.nl + - https://teddit.bus-hit.me + - https://teddit.adminforge.de + - https://incogsnoo.com + - https://teddit.hostux.net + - https://teddit.ggc-project.de + - https://teddit.httpjames.space + - https://snoo.ioens.is + - https://teddit.no-logs.com + - https://teddit.net + - https://i.opnxng.com + - https://tedd.it + - https://teddit.projectsegfau.lt + - https://reddit.lol + - https://rdt.trom.tf + - https://t.sneed.network + - https://farside.link/teddit + - https://teddit.alefvanoon.xyz + - https://teddit.domain.glass + - https://teddit.froth.zone + - https://teddit.namazso.eu + - https://teddit.sethforprivacy.com + - https://teddit.tinfoil-hat.net + - https://teddit.totaldarkness.net + - https://td.vern.cc + i2p: + - http://k62ptris7p72aborr4zoanee7xai6wguucveptwgxs5vbgt7qzpq.b32.i2p + - http://teddit.i2p + loki: [] + tor: + - http://t.sneed4fmhevap3ci4xhf4wgkf72lwk275lcgomnfgwniwmqvaxyluuid.onion + - http://tedditfyn6idalzso5wam5qd3kdtxoljjhbrbbx34q2xkcisvshuytad.onion + yggdrasil: + - http://[200:5e4b:515c:e42b:3e73:6fbf:2f11:779d] + hostname: + - reddit.com + type: + - link diff --git a/slixfeed/action.py b/slixfeed/action.py index 790d443..35895f9 100644 --- a/slixfeed/action.py +++ b/slixfeed/action.py @@ -36,8 +36,13 @@ async def add_feed(db_file, url): db_file, url, title, status) await organize_items( db_file, [url]) - old = await sqlite.get_settings_value( - db_file, "old") + old = ( + await sqlite.get_settings_value( + db_file, "old") + ) or ( + config.get_value_default( + "settings", "Settings", "old") + ) if not old: await sqlite.mark_source_as_read( db_file, url) @@ -277,7 +282,7 @@ async def organize_items(db_file, urls): entries = feed.entries # length = len(entries) # await remove_entry(db_file, source, length) - await sqlite.remove_nonexistent_entries( + await remove_nonexistent_entries( db_file, feed, source) # new_entry = 0 for entry in entries: @@ -331,17 +336,14 @@ async def organize_items(db_file, urls): summary = "> *** No summary ***" read_status = 0 pathname = urlsplit(link).path - string = ( - "{} {} {}" - ).format( - title, - summary, - pathname - ) - allow_list = await config.is_listed( + string = ("{} {} {}" + ).format( + title, summary, pathname + ) + allow_list = await config.is_include_keyword( db_file, "filter-allow", string) if not allow_list: - reject_list = await config.is_listed( + reject_list = await config.is_include_keyword( db_file, "filter-deny", string) if reject_list: # print(">>> REJECTED", title) @@ -367,3 +369,103 @@ async def organize_items(db_file, urls): # print(current_time(), exist, title) +async def remove_nonexistent_entries(db_file, feed, source): + """ + Remove entries that don't exist in a given parsed feed. + Check the entries returned from feed and delete read non + existing entries, otherwise move to table archive, if unread. + + Parameters + ---------- + db_file : str + Path to database file. + feed : list + Parsed feed document. + source : str + Feed URL. URL of associated feed. + """ + items = sqlite.get_entries_of_source(db_file, feed, source) + entries = feed.entries + # breakpoint() + for item in items: + valid = False + for entry in entries: + title = None + link = None + time = None + # valid = False + # TODO better check and don't repeat code + if entry.has_key("id") and item[3]: + if entry.id == item[3]: + # print("compare1:", entry.id) + # print("compare2:", item[3]) + # print("============") + valid = True + break + else: + if entry.has_key("title"): + title = entry.title + else: + title = feed["feed"]["title"] + if entry.has_key("link"): + link = join_url(source, entry.link) + else: + link = source + if entry.has_key("published") and item[4]: + # print("compare11:", title, link, time) + # print("compare22:", item[1], item[2], item[4]) + # print("============") + time = rfc2822_to_iso8601(entry.published) + if (item[1] == title and + item[2] == link and + item[4] == time): + valid = True + break + else: + if (item[1] == title and + item[2] == link): + # print("compare111:", title, link) + # print("compare222:", item[1], item[2]) + # print("============") + valid = True + break + # TODO better check and don't repeat code + if not valid: + # print("id: ", item[0]) + # if title: + # print("title: ", title) + # print("item[1]: ", item[1]) + # if link: + # print("link: ", link) + # print("item[2]: ", item[2]) + # if entry.id: + # print("last_entry:", entry.id) + # print("item[3]: ", item[3]) + # if time: + # print("time: ", time) + # print("item[4]: ", item[4]) + # print("read: ", item[5]) + # breakpoint() + + # TODO Send to table archive + # TODO Also make a regular/routine check for sources that + # have been changed (though that can only happen when + # manually editing) + ix = item[0] + # print(">>> SOURCE: ", source) + # print(">>> INVALID:", item[1]) + # print("title:", item[1]) + # print("link :", item[2]) + # print("id :", item[3]) + if item[5] == 1: + sqlite.delete_entry_by_id(db_file, ix) + # print(">>> DELETING:", item[1]) + else: + # print(">>> ARCHIVING:", item[1]) + sqlite.archive_entry(db_file, ix) + limit = ( + await sqlite.get_settings_value(db_file, "archive") + ) or ( + config.get_value_default("settings", "Settings", "archive") + ) + await sqlite.maintain_archive(db_file, limit) diff --git a/slixfeed/config.py b/slixfeed/config.py index 98d35e7..1732c9f 100644 --- a/slixfeed/config.py +++ b/slixfeed/config.py @@ -115,14 +115,16 @@ def get_value_default(filename, section, key): return result -def get_list(filename): +def get_list(filename, key): """ Get settings default value. Parameters ---------- filename : str - filename of yaml file. + Filename of yaml file. + key: str + Key. Returns ------- @@ -137,6 +139,7 @@ def get_list(filename): # default = yaml.safe_load(defaults) # result = default[key] result = yaml.safe_load(defaults) + result = result[key] return result @@ -305,7 +308,7 @@ async def remove_from_list(newwords, keywords): return val -async def is_listed(db_file, key, string): +async def is_include_keyword(db_file, key, string): """ Check keyword match. @@ -325,21 +328,16 @@ async def is_listed(db_file, key, string): """ # async def reject(db_file, string): # async def is_blacklisted(db_file, string): - list = await sqlite.get_filters_value( - db_file, - key - ) - if list: - list = list.split(",") - for i in list: - if not i or len(i) < 2: - continue - if i in string.lower(): - # print(">>> ACTIVATE", i) - # return 1 - return i - else: - return None + keywords = (await sqlite.get_filters_value(db_file, key)) or '' + keywords = keywords.split(",") + keywords = keywords + (get_list("lists.yaml", key)) + for keyword in keywords: + if not keyword or len(keyword) < 2: + continue + if keyword in string.lower(): + # print(">>> ACTIVATE", i) + # return 1 + return keyword """ diff --git a/slixfeed/crawl.py b/slixfeed/crawl.py index 75caf4e..dcfe562 100644 --- a/slixfeed/crawl.py +++ b/slixfeed/crawl.py @@ -120,8 +120,7 @@ async def feed_mode_request(url, tree): """ feeds = {} parted_url = urlsplit(url) - paths = config.get_list("lists.yaml") - paths = paths["pathnames"] + paths = config.get_list("lists.yaml", "pathnames") for path in paths: address = urlunsplit([ parted_url.scheme, @@ -229,8 +228,7 @@ async def feed_mode_scan(url, tree): feeds = {} # paths = [] # TODO Test - paths = config.get_list("lists.yaml") - paths = paths["pathnames"] + paths = config.get_list("lists.yaml", "pathnames") for path in paths: # xpath_query = "//*[@*[contains(.,'{}')]]".format(path) # xpath_query = "//a[contains(@href,'{}')]".format(path) diff --git a/slixfeed/fetch.py b/slixfeed/fetch.py index 9cadd22..57a6a0d 100644 --- a/slixfeed/fetch.py +++ b/slixfeed/fetch.py @@ -66,15 +66,12 @@ async def download_feed(url): msg: list or str Document or error message. """ - try: - user_agent = config.get_value_default("settings", "Network", "user-agent") - except: - user_agent = "Slixfeed/0.1" - if not len(user_agent): - user_agent = "Slixfeed/0.1" + user_agent = (config.get_value( + "settings", "Network", "user-agent")) or 'Slixfeed/0.1' headers = {'User-Agent': user_agent} url = url[0] - proxy = (config.get_value("settings", "Network", "http_proxy")) or '' + proxy = (config.get_value( + "settings", "Network", "http_proxy")) or '' timeout = ClientTimeout(total=10) async with ClientSession(headers=headers) as session: # async with ClientSession(trust_env=True) as session: diff --git a/slixfeed/filter.py b/slixfeed/filter.py new file mode 100644 index 0000000..9b80620 --- /dev/null +++ b/slixfeed/filter.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + +TODO + +1) Website-specific filter (i.e. audiobookbay). + +2) Exclude websites from filtering (e.g. metapedia). + +3) Filter phrases: + Refer to sqlitehandler.search_entries for implementation. + It is expected to be more complex than function search_entries. + +""" + +import slixfeed.config as config +import slixfeed.sqlite as sqlite + + +async def add_to_list(newwords, keywords): + """ + Append new keywords to list. + + Parameters + ---------- + newwords : str + List of new keywords. + keywords : str + List of current keywords. + + Returns + ------- + val : str + List of current keywords and new keywords. + """ + if isinstance(keywords, str) or keywords is None: + try: + keywords = keywords.split(",") + except: + keywords = [] + newwords = newwords.lower().split(",") + for word in newwords: + word = word.strip() + if len(word) and word not in keywords: + keywords.extend([word]) + keywords.sort() + val = ",".join(keywords) + return val + + +async def remove_from_list(newwords, keywords): + """ + Remove given keywords from list. + + Parameters + ---------- + newwords : str + List of new keywords. + keywords : str + List of current keywords. + + Returns + ------- + val : str + List of new keywords. + """ + if isinstance(keywords, str) or keywords is None: + try: + keywords = keywords.split(",") + except: + keywords = [] + newwords = newwords.lower().split(",") + for word in newwords: + word = word.strip() + if len(word) and word in keywords: + keywords.remove(word) + keywords.sort() + val = ",".join(keywords) + return val + + +async def is_include_keyword(db_file, key, string): + """ + Check keyword match. + + Parameters + ---------- + db_file : str + Path to database file. + type : str + "allow" or "deny". + string : str + String. + + Returns + ------- + Matched keyword or None. + + """ +# async def reject(db_file, string): +# async def is_blacklisted(db_file, string): + keywords = (await sqlite.get_filters_value(db_file, key)) or '' + keywords = keywords.split(",") + keywords = keywords + (config.get_list("lists.yaml", key)) + for keyword in keywords: + if not keyword or len(keyword) < 2: + continue + if keyword in string.lower(): + # print(">>> ACTIVATE", i) + # return 1 + return keyword + +""" + +This code was tested at module datahandler + +reject = 0 +blacklist = await get_settings_value( + db_file, + "filter-deny" + ) +# print(">>> blacklist:") +# print(blacklist) +# breakpoint() +if blacklist: + blacklist = blacklist.split(",") + # print(">>> blacklist.split") + # print(blacklist) + # breakpoint() + for i in blacklist: + # print(">>> length", len(i)) + # breakpoint() + # if len(i): + if not i or len(i) < 2: + print(">>> continue due to length", len(i)) + # breakpoint() + continue + # print(title) + # print(">>> blacklisted word:", i) + # breakpoint() + test = (title + " " + summary + " " + link) + if i in test.lower(): + reject = 1 + break + +if reject: + print("rejected:",title) + entry = (title, '', link, source, date, 1); + +""" \ No newline at end of file diff --git a/slixfeed/sqlite.py b/slixfeed/sqlite.py index 42b1696..e808375 100644 --- a/slixfeed/sqlite.py +++ b/slixfeed/sqlite.py @@ -16,9 +16,8 @@ TODO """ from asyncio import Lock -from bs4 import BeautifulSoup from datetime import date -# from slixfeed.config import get_value_default +import logging import slixfeed.config as config # from slixfeed.data import join_url from slixfeed.datetime import ( @@ -413,7 +412,7 @@ async def get_unread_entries(db_file, num): return results -def mark_entry_as_read(cur, ix): +async def mark_entry_as_read(cur, ix): """ Set read status of entry as read. @@ -454,6 +453,70 @@ async def mark_source_as_read(db_file, source): cur.execute(sql, (source,)) +async def delete_entry_by_id(db_file, ix): + """ + Delete entry by Id. + + Parameters + ---------- + db_file : str + Path to database file. + ix : str + Index. + """ + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + "DELETE " + "FROM entries " + "WHERE id = :ix" + ) + cur.execute(sql, (ix,)) + + +async def archive_entry(db_file, ix): + """ + Insert entry to archive and delete entry. + + Parameters + ---------- + db_file : str + Path to database file. + ix : str + Index. + """ + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + "INSERT " + "INTO archive " + "SELECT * " + "FROM entries " + "WHERE entries.id = :ix" + ) + try: + cur.execute(sql, (ix,)) + except: + print( + "ERROR DB insert from entries " + "into archive at index", ix + ) + sql = ( + "DELETE " + "FROM entries " + "WHERE id = :ix" + ) + try: + cur.execute(sql, (ix,)) + except: + print( + "ERROR DB deleting items from " + "table entries at index", ix + ) + + def get_feed_title(db_file, source): with create_connection(db_file) as conn: cur = conn.cursor() @@ -477,8 +540,9 @@ async def mark_as_read(db_file, ix): # NOTE: We can use DBLOCK once for both # functions, because, due to exclusive # ID, only one can ever occur. - mark_entry_as_read(cur, ix) - delete_archived_entry(cur, ix) + await mark_entry_as_read(cur, ix) + await delete_archived_entry(cur, ix) + async def mark_all_as_read(db_file): """ @@ -503,7 +567,7 @@ async def mark_all_as_read(db_file): cur.execute(sql) -def delete_archived_entry(cur, ix): +async def delete_archived_entry(cur, ix): """ Delete entry from table archive. @@ -644,7 +708,6 @@ async def set_date(cur, url): url : str URL. """ - today = date.today() sql = ( "UPDATE feeds " "SET updated = :today " @@ -652,7 +715,7 @@ async def set_date(cur, url): ) # cur = conn.cursor() cur.execute(sql, { - "today": today, + "today": date.today(), "url": url }) @@ -780,7 +843,7 @@ async def add_entry(cur, entry): # breakpoint() -async def maintain_archive(cur, limit): +async def maintain_archive(db_file, limit): """ Maintain list of archived entries equal to specified number of items. @@ -789,37 +852,40 @@ async def maintain_archive(cur, limit): db_file : str Path to database file. """ - sql = ( - "SELECT count(id) " - "FROM archive" - ) - count = cur.execute(sql).fetchone()[0] - # FIXME Upon first time joining to a groupchat - # and then adding a URL, variable "limit" - # becomes a string in one of the iterations. - # if isinstance(limit,str): - # print("STOP") - # breakpoint() - reduc = count - int(limit) - if reduc > 0: - sql = ( - "DELETE FROM archive " - "WHERE id " - "IN (SELECT id " - "FROM archive " - "ORDER BY timestamp ASC " - "LIMIT :reduc)" - ) - cur.execute(sql, { - "reduc": reduc - }) + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + "SELECT count(id) " + "FROM archive" + ) + count = cur.execute(sql).fetchone()[0] + # FIXME Upon first time joining to a groupchat + # and then adding a URL, variable "limit" + # becomes a string in one of the iterations. + # if isinstance(limit,str): + # print("STOP") + # breakpoint() + difference = count - int(limit) + if difference > 0: + sql = ( + "DELETE FROM archive " + "WHERE id " + "IN (SELECT id " + "FROM archive " + "ORDER BY timestamp ASC " + "LIMIT :difference)" + ) + cur.execute(sql, { + "difference": difference + }) # TODO Move entries that don't exist into table archive. # NOTE Entries that are read from archive are deleted. # NOTE Unlike entries from table entries, entries from # table archive are not marked as read. -async def remove_nonexistent_entries(db_file, feed, source): +async def get_entries_of_source(db_file, feed, source): """ Remove entries that don't exist in a given parsed feed. Check the entries returned from feed and delete read non @@ -842,117 +908,7 @@ async def remove_nonexistent_entries(db_file, feed, source): "WHERE source = ?" ) items = cur.execute(sql, (source,)).fetchall() - entries = feed.entries - # breakpoint() - for item in items: - valid = False - for entry in entries: - title = None - link = None - time = None - # valid = False - # TODO better check and don't repeat code - if entry.has_key("id") and item[3]: - if entry.id == item[3]: - # print("compare1:", entry.id) - # print("compare2:", item[3]) - # print("============") - valid = True - break - else: - if entry.has_key("title"): - title = entry.title - else: - title = feed["feed"]["title"] - if entry.has_key("link"): - link = join_url(source, entry.link) - else: - link = source - if entry.has_key("published") and item[4]: - # print("compare11:", title, link, time) - # print("compare22:", item[1], item[2], item[4]) - # print("============") - time = rfc2822_to_iso8601(entry.published) - if (item[1] == title and - item[2] == link and - item[4] == time): - valid = True - break - else: - if (item[1] == title and - item[2] == link): - # print("compare111:", title, link) - # print("compare222:", item[1], item[2]) - # print("============") - valid = True - break - # TODO better check and don't repeat code - if not valid: - # print("id: ", item[0]) - # if title: - # print("title: ", title) - # print("item[1]: ", item[1]) - # if link: - # print("link: ", link) - # print("item[2]: ", item[2]) - # if entry.id: - # print("last_entry:", entry.id) - # print("item[3]: ", item[3]) - # if time: - # print("time: ", time) - # print("item[4]: ", item[4]) - # print("read: ", item[5]) - # breakpoint() - async with DBLOCK: - # TODO Send to table archive - # TODO Also make a regular/routine check for sources that - # have been changed (though that can only happen when - # manually editing) - ix = item[0] - # print(">>> SOURCE: ", source) - # print(">>> INVALID:", item[1]) - # print("title:", item[1]) - # print("link :", item[2]) - # print("id :", item[3]) - if item[5] == 1: - # print(">>> DELETING:", item[1]) - sql = ( - "DELETE " - "FROM entries " - "WHERE id = :ix" - ) - cur.execute(sql, (ix,)) - else: - # print(">>> ARCHIVING:", item[1]) - sql = ( - "INSERT " - "INTO archive " - "SELECT * " - "FROM entries " - "WHERE entries.id = :ix" - ) - try: - cur.execute(sql, (ix,)) - except: - print( - "ERROR DB insert from entries " - "into archive at index", ix - ) - sql = ( - "DELETE " - "FROM entries " - "WHERE id = :ix" - ) - try: - cur.execute(sql, (ix,)) - except: - print( - "ERROR DB deleting items from " - "table entries at index", ix - ) - async with DBLOCK: - limit = await get_settings_value(db_file, "archive") - await maintain_archive(cur, limit) + return items # TODO What is this function for? 2024-01-02 @@ -1253,7 +1209,7 @@ async def set_settings_value(db_file, key_value): async with DBLOCK: with create_connection(db_file) as conn: cur = conn.cursor() - await set_settings_value_default(cur, key) + # try: sql = ( "UPDATE settings " "SET value = :value " @@ -1263,48 +1219,10 @@ async def set_settings_value(db_file, key_value): "key": key, "value": value }) - - -async def set_settings_value_default(cur, key): - """ - Set default settings value, if no value found. - - Parameters - ---------- - cur : object - Cursor object. - key : str - Key: enabled, interval, master, quantum, random. - - Returns - ------- - val : str - Numeric value. - """ -# async def set_settings_value_default(cur): -# keys = ["enabled", "interval", "quantum"] -# for i in keys: -# sql = "SELECT id FROM settings WHERE key = ?" -# cur.execute(sql, (i,)) -# if not cur.fetchone(): -# val = settings.get_value_default(i) -# sql = "INSERT INTO settings(key,value) VALUES(?,?)" -# cur.execute(sql, (i, val)) - sql = ( - "SELECT id " - "FROM settings " - "WHERE key = ?" - ) - cur.execute(sql, (key,)) - if not cur.fetchone(): - value = config.get_value_default("settings", "Settings", key) - sql = ( - "INSERT " - "INTO settings(key,value) " - "VALUES(?,?)" - ) - cur.execute(sql, (key, value)) - return value + # except: + # logging.debug( + # "No specific value set for key {}.".format(key) + # ) async def get_settings_value(db_file, key): @@ -1324,31 +1242,20 @@ async def get_settings_value(db_file, key): val : str Numeric value. """ - # try: - # with create_connection(db_file) as conn: - # cur = conn.cursor() - # sql = "SELECT value FROM settings WHERE key = ?" - # cur.execute(sql, (key,)) - # result = cur.fetchone() - # except: - # result = settings.get_value_default(key) - # if not result: - # result = settings.get_value_default(key) - # return result with create_connection(db_file) as conn: + cur = conn.cursor() try: - cur = conn.cursor() sql = ( "SELECT value " "FROM settings " "WHERE key = ?" ) - val = cur.execute(sql, (key,)).fetchone()[0] + value = cur.execute(sql, (key,)).fetchone()[0] + return value except: - val = await set_settings_value_default(cur, key) - if not val: - val = await set_settings_value_default(cur, key) - return val + logging.debug( + "No specific value set for key {}.".format(key) + ) async def set_filters_value(db_file, key_value): @@ -1379,7 +1286,6 @@ async def set_filters_value(db_file, key_value): async with DBLOCK: with create_connection(db_file) as conn: cur = conn.cursor() - await set_filters_value_default(cur, key) sql = ( "UPDATE filters " "SET value = :value " @@ -1391,41 +1297,6 @@ async def set_filters_value(db_file, key_value): }) -async def set_filters_value_default(cur, key): - """ - Set default filters value, if no value found. - - Parameters - ---------- - cur : object - Cursor object. - key : str - Key: filter-allow, filter-deny, filter-replace. - - Returns - ------- - val : str - List of strings. - """ - sql = ( - "SELECT id " - "FROM filters " - "WHERE key = ?" - ) - cur.execute(sql, (key,)) - if not cur.fetchone(): - val = config.get_list("lists.yaml") - val = val[key] - val = ",".join(val) - sql = ( - "INSERT " - "INTO filters(key,value) " - "VALUES(?,?)" - ) - cur.execute(sql, (key, val)) - return val - - async def get_filters_value(db_file, key): """ Get filters value. @@ -1443,16 +1314,16 @@ async def get_filters_value(db_file, key): List of strings. """ with create_connection(db_file) as conn: + cur = conn.cursor() try: - cur = conn.cursor() sql = ( "SELECT value " "FROM filters " "WHERE key = ?" ) - val = cur.execute(sql, (key,)).fetchone()[0] + value = cur.execute(sql, (key,)).fetchone()[0] + return value except: - val = await set_filters_value_default(cur, key) - if not val: - val = await set_filters_value_default(cur, key) - return val + logging.debug( + "No specific value set for key {}.".format(key) + ) diff --git a/slixfeed/task.py b/slixfeed/task.py index 1604065..e03f9cd 100644 --- a/slixfeed/task.py +++ b/slixfeed/task.py @@ -116,14 +116,17 @@ async def start_tasks_xmpp(self, jid, tasks): async def clean_tasks_xmpp(jid, tasks): - logging.debug("Stopping tasks {} for JID {}".format(tasks, jid)) + logging.debug( + "Stopping tasks {} for JID {}".format(tasks, jid) + ) for task in tasks: # if task_manager[jid][task]: try: task_manager[jid][task].cancel() except: logging.debug( - "No task {} for JID {} (clean_tasks)".format(task, jid)) + "No task {} for JID {} (clean_tasks)".format(task, jid) + ) """ @@ -149,7 +152,13 @@ async def task_jid(self, jid): Jabber ID. """ db_file = get_pathname_to_database(jid) - enabled = await get_settings_value(db_file, "enabled") + enabled = ( + await get_settings_value( + db_file, "enabled") + ) or ( + get_value_default( + "settings", "Settings", "enabled") + ) if enabled: # NOTE Perhaps we want to utilize super with keyword # arguments in order to know what tasks to initiate. @@ -199,10 +208,22 @@ async def send_update(self, jid, num=None): """ logging.debug("Sending a news update to JID {}".format(jid)) db_file = get_pathname_to_database(jid) - enabled = await get_settings_value(db_file, "enabled") + enabled = ( + await get_settings_value( + db_file, "enabled") + ) or ( + get_value_default( + "settings", "Settings", "enabled") + ) if enabled: if not num: - num = await get_settings_value(db_file, "quantum") + num = ( + await get_settings_value( + db_file, "quantum") + ) or ( + get_value_default( + "settings", "Settings", "quantum") + ) else: num = int(num) news_digest = [] @@ -265,15 +286,23 @@ async def send_status(self, jid): jid : str Jabber ID. """ - logging.debug("Sending a status message to JID {}".format(jid)) - status_text="🤖️ Slixfeed RSS News Bot" + logging.debug( + "Sending a status message to JID {}".format(jid)) + status_text = "🤖️ Slixfeed RSS News Bot" db_file = get_pathname_to_database(jid) - enabled = await get_settings_value(db_file, "enabled") + enabled = ( + await get_settings_value( + db_file, "enabled") + ) or ( + get_value_default( + "settings", "Settings", "enabled") + ) if not enabled: status_mode = "xa" status_text = "📫️ Send \"Start\" to receive updates" else: - feeds = await get_number_of_items(db_file, "feeds") + feeds = await get_number_of_items( + db_file, "feeds") # print(await current_time(), jid, "has", feeds, "feeds") if not feeds: print(">>> not feeds:", feeds, "jid:", jid) @@ -335,7 +364,13 @@ async def refresh_task(self, jid, callback, key, val=None): ) if not val: db_file = get_pathname_to_database(jid) - val = await get_settings_value(db_file, key) + val = ( + await get_settings_value( + db_file, key) + ) or ( + get_value_default( + "settings", "Settings", key) + ) # if task_manager[jid][key]: if jid in task_manager: try: @@ -389,7 +424,8 @@ async def check_updates(jid): db_file = get_pathname_to_database(jid) urls = await get_feeds_url(db_file) await organize_items(db_file, urls) - val = get_value_default("settings", "Settings", "check") + val = get_value_default( + "settings", "Settings", "check") await asyncio.sleep(60 * float(val)) # Schedule to call this function again in 90 minutes # loop.call_at( diff --git a/slixfeed/url.py b/slixfeed/url.py index 7d82402..5549144 100644 --- a/slixfeed/url.py +++ b/slixfeed/url.py @@ -54,7 +54,7 @@ def replace_hostname(url, url_type): pathname = parted_url.path queries = parted_url.query fragment = parted_url.fragment - proxies = config.get_list("proxies.yaml") + proxies = config.get_list("proxies.yaml", "proxies") for proxy in proxies: proxy = proxies[proxy] if hostname in proxy["hostname"] and url_type in proxy["type"]: @@ -92,8 +92,7 @@ def remove_tracking_parameters(url): pathname = parted_url.path queries = parse_qs(parted_url.query) fragment = parted_url.fragment - trackers = config.get_list("queries.yaml") - trackers = trackers["trackers"] + trackers = config.get_list("queries.yaml", "trackers") for tracker in trackers: if tracker in queries: del queries[tracker] queries_new = urlencode(queries, doseq=True) diff --git a/slixfeed/xmpp/client.py b/slixfeed/xmpp/client.py index 68421fc..f15248b 100644 --- a/slixfeed/xmpp/client.py +++ b/slixfeed/xmpp/client.py @@ -16,10 +16,7 @@ TODO 2) Assure message delivery before calling a new task. See https://slixmpp.readthedocs.io/en/latest/event_index.html#term-marker_acknowledged -3) Do not send updates when busy or away. - See https://slixmpp.readthedocs.io/en/latest/event_index.html#term-changed_status - -4) XHTTML-IM +3) XHTTML-IM case _ if message_lowercase.startswith("html"): message['html']=" Parse me! diff --git a/slixfeed/xmpp/process.py b/slixfeed/xmpp/process.py index 7df17c1..fa3a109 100644 --- a/slixfeed/xmpp/process.py +++ b/slixfeed/xmpp/process.py @@ -23,6 +23,7 @@ import slixfeed.action as action from slixfeed.config import ( add_to_list, get_default_dbdir, + get_value_default, get_value, get_pathname_to_database, remove_from_list) @@ -224,13 +225,20 @@ async def message(self, message): if not exist: await sqlite.insert_feed(db_file, url, title) await action.organize_items(db_file, [url]) - old = await sqlite.get_settings_value(db_file, "old") + old = ( + await sqlite.get_settings_value(db_file, "old") + ) or ( + get_value_default("settings", "Settings", "old") + ) if old: - await task.clean_tasks_xmpp(jid, ["status"]) + await task.clean_tasks_xmpp( + jid, ["status"]) # await send_status(jid) - await task.start_tasks_xmpp(self, jid, ["status"]) + await task.start_tasks_xmpp( + self, jid, ["status"]) else: - await sqlite.mark_source_as_read(db_file, url) + await sqlite.mark_source_as_read( + db_file, url) response = ( "> {}\nNews source has been " "added to subscription list."