diff --git a/accounts.ini b/accounts.ini new file mode 100644 index 0000000..98493c7 --- /dev/null +++ b/accounts.ini @@ -0,0 +1,74 @@ +# Settings to tell the bot to which accounts to connect +# and also from which accounts it receives instructions. + +[XMPP] +nickname = +username = +password = +# port = 5347 +operator = + +[ActivityPub] +# Not yet implemented +username = +password = +operator = + +[Email] +# Not yet implemented +recipient_emails = +sender_emails = + +[IMAP] +# Not yet implemented +username = +password = +# port = 993 + +[SMTP] +# Not yet implemented +host = +# port = 465 + +[IRC] +# Not yet implemented +username = +password = +# port = 6667 +operator = + +[LXMF] +# Not yet implemented +username = +password = +operator = + +[Matrix] +# Not yet implemented +username = +password = +operator = + +[Nostr] +# Not yet implemented +username = +password = +operator = + +[Session] +# Not yet implemented +username = +password = +operator = + +[SIP] +# Not yet implemented +username = +password = +operator = + +[TOX] +# Not yet implemented +username = +password = +operator = diff --git a/lists.yaml b/lists.yaml new file mode 100644 index 0000000..da77bdf --- /dev/null +++ b/lists.yaml @@ -0,0 +1,434 @@ +# On occasion when given web page has no auto-discovery +# setup, the following list is engaged into action to +# scan for possible paths (i.e. pathname) at which web +# feed might be discovered. + +pathnames: + - .atom +# - .json + - .rss + - .xml + # wordpress + - /?feed=atom + - /?feed=rdf + - /?feed=rss + - /?feed=xml + # phpbb + - /?format=atom + - /?format=rdf + - /?format=rss + - /?format=xml + - /app.php/feed + - /atom + - /atom.php + - /atom.xml + - /blog/feed/ + - /content-feeds/ + - /external.php?type=RSS2 + - /en/feed/ + # good practice + - /feed + - /feed.atom +# - /feed.json + - /feed.php + - /feed.rdf + - /feed.rss + - /feed.xml + - /feed/atom/ + - /feeds/news_feed + - /feeds/posts/default + - /feeds/posts/default?alt=atom + - /feeds/posts/default?alt=rss + - /feeds/rss/news.xml.php + - /forum_rss.php + - /index.atom + - /index.php/feed + # simple machine forum + - /index.php?type=atom;action=.xml + - /index.php?type=rss;action=.xml + - /index.rss + - /jekyll/feed.xml + # discourse + - /latest.rss + - /news + - /news.xml + - /news.xml.php + - /news/feed + # discourse + - /posts.rss + - /rdf + - /rdf.php + - /rdf.xml + - /rss +# - /rss.json + - /rss.php + - /rss.xml + # mybb + - /syndication.php?type=atom1.0 + - /syndication.php?type=rss2.0 + # statusnet + - /timeline.rss + # peertube + - /videos.atom +# - /videos.json + - /videos.xml + - /xml/feed.atom + - /xml/feed.rdf + - /xml/feed.rss + +# Filters can be used to protect or depress your life. +# +# Before you continue reading, please realize that the +# news has to be treated as a very annoying and bad +# salesperson who wants to guile and trick you into +# buying something that you do not need at all. +# Sometimes you just have to hang up the phone. +# +# "I always tell people: News is the most highly +# "developed form of fiction. The most difficult." +# Donald Pleasence as J.G. in "The News-Benders 1968". +# +# News is no longer an information service, if it ever +# was. News in most of the world is a mean of fear. +# +# The government regulated news agencies, private and +# public, are not in your favour. You must filter in +# order to avoid from subverting your mind. +# +# Although it is recognized that the Palestinian land +# confiscation issue is indeed a problem that affects +# us all (i.e. confiscation of land without punishing). +# +# I have worked with far right wing Israeli and Zionist +# news jurnalists and editors for over a decade, and it +# occurred to me, time and again, that these entities +# have published factual and fictional criticism against +# themselves (the Israeli side), in order to distract +# the Israeli citizenry from removing the government +# because the Israelis wanted to better their life by +# improving the life of the average citizen. +# +# Most of my Israeli brothers and sisters are either poor +# or in debt, and the Israeli government uses the conflict +# against Palestinians and Israelis as one. +# +# By observing our problems instead of your own; you are, +# in fact, harming us and yourself. +# +# I have added keywords to deny entries that are related +# to the Palestinians, because most of the articles about +# the subject are meant to distract you too from issues +# that are much more closer and important to you. +# +# If the Americans and the Europeans will concentrate on +# Israel or Palestine, never mind which side they support, +# instead of protecting their own freedom of speech and their +# feedom of keep and bear arms, then tomorrow, without their +# freedoms, they would not even be able to express any +# opinion on any matter. +# +# If you want to harm yourself, your family and friends and +# the Chinese, Israelis, Palestinians, Tibetans, then you are +# free to remove the filters and concentrate on issues that +# do not really matter. +# +# But if you really care for Chinese, Israelis, Palestinians, +# Tibetans, then you must concentrate you efforts on your +# local area, care for your own neighbours and your own race, +# for uncompromising freedom and liberty for all, because +# otherwise non of us will have any. +# +# It is "all of us for all of us" or we are on our own. +# And it has to start locally. There is no other way. +# +# You are advised to look into the keywords and share +# yours with us, so people would get the best out of +# this news application. + +# Entries with the following keywords will not be filtered +filter-allow: + - earthing + - gaddafi + - gadhafi + - hitler + - jabber + - marijuana + - ossad + - qaddafi + - qadhafi + - risc + - sadam + - saddam + - war crim + - ware + - xmpp + +# Entries with the following keywords will be filtered +filter-deny: + # brands + # Almost every time you see a brand name in title or content, + # it is because someone, usually a marketing agency or a + # venture capital firm, has paid for it to be mentioned, not + # because an editor has found it useful or interesting. + - airbnb + - android + - at&t + - booking + - discord + - facebook + - gmail + - google + - gsoc + - instagram + - ipad + - iphone + - ipod + - microsoft + - mozilla + - myspace + - netflix + - openai + - pinterest + - robinhood + - snapchat + - spotify + - tumblr + - twitter + - verizon + - waze + - whatsapp + - wikimedia + - wikipedia + - wix + - yahoo + - youtube + # consume whore + - black friday + - coupon + - gambl + # death + - corona + - covid + - crime + - criminal + - dead + - death + - die + - disaster + - doomsday + - murder + - vaccine + - vax + - war + # degeneracy + - gay + - gender fluid + - gender identity + - homosex + - lesbian + - lgbt + - nude + - nudity + - porn + - pr0n + - prostitut + - queen + - queer + - tranny + - trans # transexual transgender transsexual + - whor + - xham + - xvid + # distraction figures + - el aviv + - el-aviv + - gaza + - gazza + - hamas + - hammas + - harari + - harary + - holocaust + - idf + - israel + - jerus + - lex jon + - lon mus + - netanya + - nfowar + - oxnew + - palestin + - trump + - west bank + # fiction + - astronaut + - meteor + - nasa + - space + # names + - boomer + - gen z + # substances + - 🚬 + - alcohol + - cigar + - drug + +# The following index was generated from machine learning +# scan of political and scientific forums gathered within +# a duration of 6 hours; this index is a set of value(s) +# (phrases and words) to be replaced by a value (key). +# +# While non reflects any of the opinions of the author +# of this program, you might find it amusing, entertaining, +# essential, important or all of the above or perhaps just +# a joke of "1984" or "They Live" to experience what it +# feels like. +# +# While this set of filters is intended merely for example +# purposes only to realize the power one might have when is +# given the liberty to control contents received, this +# machine-learning generated set is surprisingly effective +# and efficient. +# +# The filtering ability was primarily meant to solve an issue +# which my orthodox rabbi has asked me to solve for him. +# That set, which he and his community use, is not included. + +# Replace words and phrases +filter-replace: + "CIA-backed riot group": + # We are all using 4chan; yet you have got to wonder how + # is it still allowed online in a centralized DNS system. + # If 4chan was served only in I2P, IPFS, Nostr, Plebbit + # or Yggdrasil then I would not think so. + - 4chan + - al qaeda + - al-qaeda + - antifa + - black lives matter + - da'ish + - daish + - isil + - isis + - ku klux klan + "CIA-controlled publishing agency": + - abc news + - cbn news + - cnbc news + - fox news + - foxnews + - msn news + - sky news + - skynews + "dystopian": + - orwellian + "Earth is Horizontal": + - earth is flat + - flatearth + - flat earth + "electro magnetic death antenna": + - 5g + "electro magnetic water shaker antenna": + - 2.4ghz + "electro magnetic air shaker antenna": + - 5ghz + - 5.0ghz + "Federal Government of USA": + - biden administration + - biden admin + - biden government + - bush administration + - bush admin + - bush government + - obama administration + - obama admin + - obama government + - trump administration + - trump admin + - trump government + "fictional weapon": + - atom bomb + - atomic bomb + - nukes + - nuclear bomb + "firmament": + - atmosphere + - ozone + # People who have been made to believe that earth, moon and sun are + # flying rocks traveling in an impossible vacuum of nothingness. + "globe believer": + - globtard + "Government Weather Change Scam": + - climate change + - global warming + "hazardous": + - gmo + "Human Trafficking Group": + - prostitution ring + "Human Trafficking Industry": + - porn industry + - pornographic industry + - pornography industry + - sex industry + "impossible war": + - atomic war + - nuclear war + # People who claim that earth is plane, and is enclosed + # by a firmament that separates earth from water, not + # space. + "Horizontal Earth Activist": + - flatearther + - flat earther + - globe sceptic + "internationalism": + - globalism + "internationalist": + - globalist + # Better options might be Freenet, Mixnet + "Meshnet": + - darknet + - dark net + "MI5-controlled publishing agency": + - bbc news + - the guardian + "Misleading Digital Coin": + - oecd + - shit coin + "NSDAP": + - nazi + "online content publishing platform": + - facebook + - instagram + - mastodon + - myspace + - pinterest + - tumblr + - twitter + - wix + - wordpress + "petroleum": + - fossil fuel + - fossil oil + "plane": + - planet + "poison": + - flouride + - fluoride + "poisoned": + - flouridated + - fluoridated + "poisoning": + - flouridation + - fluoridation + "Scam Currency Trading": + - forex + "water above the firmament": + - outerspace + - outer space + "World": + - globe + "Worldwide": + - around the globe + - global + - globally diff --git a/settings.ini b/settings.ini new file mode 100644 index 0000000..259f8fe --- /dev/null +++ b/settings.ini @@ -0,0 +1,24 @@ +# This file lists default settings per database. +# See file /usr/share/slixfeed/defaults.ini +[Settings] + +# Maximum items to archive (0 - 500) +archive = 50 + +# Work status (Value 0 to disable) +enabled = 1 + +# Update interval (Minimum value) +interval = 300 + +# Maximum length of summary (Value 0 to disable) +length = 300 + +# Mark entries of newly added entries as unread +old = 0 + +# Amount of entries per update +quantum = 3 + +# Pick random item from database +random = 0 diff --git a/slixfeed/__main__.py b/slixfeed/__main__.py index b68cfd6..cc3a0ee 100644 --- a/slixfeed/__main__.py +++ b/slixfeed/__main__.py @@ -50,14 +50,16 @@ TODO https://github.com/michael-lazar/pygopherd https://github.com/gopherball/gb -13) Support ActivityPub @person@domain (see Tip Of The Day). +14) Support ActivityPub @person@domain (see Tip Of The Day). -12) Tip Of The Day. +15) Tip Of The Day. Did you know that you can follow you favorite Mastodon feeds by just sending the URL address? Supported fediverse websites are: Akkoma, HubZilla, Mastodon, Misskey, Pixelfed, Pleroma, Soapbox. +16) Brand: News Broker, Newsman, Newsdealer, Laura Harbinger + """ # vars and their meanings: @@ -65,8 +67,12 @@ TODO # res = response (HTTP) from argparse import ArgumentParser +import configparser +import filehandler +# from filehandler import get_default_confdir from getpass import getpass import logging +import os # from datetime import date # import time @@ -87,37 +93,86 @@ if __name__ == '__main__': # Output verbosity options. parser.add_argument( - "-q", "--quiet", help="set logging to ERROR", - action="store_const", dest="loglevel", - const=logging.ERROR, default=logging.INFO - ) + "-q", + "--quiet", + help="set logging to ERROR", + action="store_const", + dest="loglevel", + const=logging.ERROR, + default=logging.INFO + ) parser.add_argument( - "-d", "--debug", help="set logging to DEBUG", - action="store_const", dest="loglevel", - const=logging.DEBUG, default=logging.INFO - ) + "-d", + "--debug", + help="set logging to DEBUG", + action="store_const", + dest="loglevel", + const=logging.DEBUG, + default=logging.INFO + ) # JID and password options. - parser.add_argument("-j", "--jid", dest="jid", - help="JID to use") - parser.add_argument("-p", "--password", dest="password", - help="password to use") + parser.add_argument( + "-j", + "--jid", + dest="jid", + help="Jabber ID" + ) + parser.add_argument( + "-p", + "--password", + dest="password", + help="Password of JID" + ) + parser.add_argument( + "-n", + "--nickname", + dest="nickname", + help="Display name" + ) args = parser.parse_args() # Setup logging. - logging.basicConfig(level=args.loglevel, - format='%(levelname)-8s %(message)s') + logging.basicConfig( + level=args.loglevel, + format='%(levelname)-8s %(message)s' + ) - if args.jid is None: - args.jid = input("Username: ") - if args.password is None: - args.password = getpass("Password: ") + # Try configuration file + config = configparser.RawConfigParser() + config_dir = filehandler.get_default_confdir() + if not os.path.isdir(config_dir): + os.mkdir(config_dir) + # TODO Copy file from /etc/slixfeed/ or /usr/share/slixfeed/ + config_file = os.path.join(config_dir, r"accounts.ini") + config.read(config_file) + if config.has_section("XMPP"): + xmpp = config["XMPP"] + nickname = xmpp["nickname"] + username = xmpp["username"] + password = xmpp["password"] + + # Use arguments if were given + if args.jid: + username = args.jid + if args.password: + password = args.password + if args.nickname: + nickname = args.nickname + + # Prompt for credentials if none were given + if username is None: + username = input("Username: ") + if password is None: + password = getpass("Password: ") + if nickname is None: + nickname = input("Nickname: ") # Setup the Slixfeed and register plugins. Note that while plugins may # have interdependencies, the order in which you register them does # not matter. - xmpp = Slixfeed(args.jid, args.password) + xmpp = Slixfeed(username, password, nickname) xmpp.register_plugin('xep_0004') # Data Forms xmpp.register_plugin('xep_0030') # Service Discovery xmpp.register_plugin('xep_0045') # Multi-User Chat diff --git a/slixfeed/confighandler.py b/slixfeed/confighandler.py index 7ff0eb9..148129b 100644 --- a/slixfeed/confighandler.py +++ b/slixfeed/confighandler.py @@ -11,10 +11,12 @@ TODO """ +import configparser +# from filehandler import get_default_confdir +import filehandler import os -from filehandler import get_default_confdir from random import randrange - +import yaml async def get_value_default(key): """ @@ -23,144 +25,44 @@ async def get_value_default(key): Parameters ---------- key : str - Key: enabled, filter-allow, filter-deny, - interval, quantum, random. + Key: archive, enabled, allow, deny,interval, + length, old, quantum, random, replace. Returns ------- - result : int or str + result : str Value. """ - match key: - case "archive": - result = 50 - case "enabled": - result = 1 - case "filter-allow": - result = "hitler,sadam,saddam" - case "filter-deny": - result = "crim,dead,death,disaster,murder,war" - case "interval": - result = 300 - case "masters": - result = randrange(100000, 999999) - case "length": - result = 300 - case "old": - result = 0 - case "quantum": - result = 3 - case "random": - result = 0 - case "token": - result = "none" + config = configparser.RawConfigParser() + config_dir = filehandler.get_default_confdir() + if not os.path.isdir(config_dir): + config_dir = '/usr/share/slixfeed/' + config_file = os.path.join(config_dir, r"settings.ini") + config.read(config_file) + if config.has_section("Settings"): + result = config["Settings"][key] return result -def get_list(): +async def get_list(key): """ - Get dictionary file. + Get settings default value. + + Parameters + ---------- + key : str + Key: allow, deny, pathname, replace. Returns ------- - paths : list - Dictionary of pathnames. + result : list + List of pathnames or keywords. """ - paths = [] - cfg_dir = get_default_confdir() - if not os.path.isdir(cfg_dir): - os.mkdir(cfg_dir) - cfg_file = os.path.join(cfg_dir, r"url_paths.txt") - if not os.path.isfile(cfg_file): - # confighandler.generate_dictionary() - list = get_default_list() - file = open(cfg_file, "w") - file.writelines("\n".join(list)) - file.close() - file = open(cfg_file, "r") - lines = file.readlines() - for line in lines: - paths.extend([line.strip()]) - return paths - - -# async def generate_dictionary(): -def get_default_list(): - """ - Generate a dictionary file. - - Returns - ------- - paths : list - Dictionary of pathnames. - """ - paths = [ - ".atom", - ".rss", - ".xml", - "/?feed=atom", - "/?feed=rdf", - "/?feed=rss", - "/?feed=xml", # wordpress - "/?format=atom", - "/?format=rdf", - "/?format=rss", - "/?format=xml", # phpbb - "/app.php/feed", - "/atom", - "/atom.php", - "/atom.xml", - "/blog/feed/", - "/content-feeds/", - "/external.php?type=RSS2", - "/en/feed/", - "/feed", # good practice - "/feed.atom", - # "/feed.json", - "/feed.php", - "/feed.rdf", - "/feed.rss", - "/feed.xml", - "/feed/atom/", - "/feeds/news_feed", - "/feeds/posts/default", - "/feeds/posts/default?alt=atom", - "/feeds/posts/default?alt=rss", - "/feeds/rss/news.xml.php", - "/forum_rss.php", - "/index.atom", - "/index.php/feed", - "/index.php?type=atom;action=.xml", #smf - "/index.php?type=rss;action=.xml", #smf - "/index.rss", - "/jekyll/feed.xml", - "/latest.rss", - "/news", - "/news.xml", - "/news.xml.php", - "/news/feed", - "/posts.rss", # discourse - "/rdf", - "/rdf.php", - "/rdf.xml", - "/rss", - # "/rss.json", - "/rss.php", - "/rss.xml", - "/syndication.php?type=atom1.0", #mybb - "/syndication.php?type=rss2.0", - "/timeline.rss", - "/videos.atom", - # "/videos.json", - "/videos.xml", - "/xml/feed.rss" - ] - return paths - # cfg_dir = get_default_confdir() - # if not os.path.isdir(cfg_dir): - # os.mkdir(cfg_dir) - # cfg_file = os.path.join(cfg_dir, r"url_paths.txt") - # if not os.path.isfile(cfg_file): - # file = open(cfg_file, "w") - # file.writelines("\n".join(paths)) - # file.close() + config_dir = filehandler.get_default_confdir() + if not os.path.isdir(config_dir): + config_dir = '/usr/share/slixfeed/' + config_file = os.path.join(config_dir, r"lists.yaml") + with open(config_file) as defaults: + default = yaml.safe_load(defaults) + result = default[key] + return result diff --git a/slixfeed/datahandler.py b/slixfeed/datahandler.py index cf2aba7..b8e208e 100644 --- a/slixfeed/datahandler.py +++ b/slixfeed/datahandler.py @@ -25,16 +25,7 @@ from lxml import html from datetimehandler import now, rfc2822_to_iso8601 from confighandler import get_list from listhandler import is_listed -from sqlitehandler import ( - add_entry_and_set_date, - insert_feed, - check_entry_exist, - check_feed_exist, - get_feeds_url, - remove_nonexistent_entries, - update_source_status, - update_source_validity - ) +import sqlitehandler as sqlite from urllib import error # from xml.etree.ElementTree import ElementTree, ParseError from urllib.parse import urljoin, urlsplit, urlunsplit @@ -54,7 +45,7 @@ async def download_updates(db_file, url=None): if url: urls = [url] # Valid [url] and [url,] and (url,) else: - urls = await get_feeds_url(db_file) + urls = await sqlite.get_feeds_url(db_file) for url in urls: # print(os.path.basename(db_file), url[0]) source = url[0] @@ -65,7 +56,7 @@ async def download_updates(db_file, url=None): # urls.next() # next(urls) continue - await update_source_status( + await sqlite.update_source_status( db_file, res[1], source @@ -83,7 +74,7 @@ async def download_updates(db_file, url=None): valid = 0 else: valid = 1 - await update_source_validity( + await sqlite.update_source_validity( db_file, source, valid) @@ -108,7 +99,7 @@ async def download_updates(db_file, url=None): entries = feed.entries # length = len(entries) # await remove_entry(db_file, source, length) - await remove_nonexistent_entries( + await sqlite.remove_nonexistent_entries( db_file, feed, source @@ -145,7 +136,7 @@ async def download_updates(db_file, url=None): eid = entry.id else: eid = link - exist = await check_entry_exist( + exist = await sqlite.check_entry_exist( db_file, source, eid=eid, @@ -212,7 +203,7 @@ async def download_updates(db_file, url=None): # breakpoint() print(source) print(date) - await add_entry_and_set_date( + await sqlite.add_entry_and_set_date( db_file, source, entry @@ -412,9 +403,9 @@ async def add_feed_no_check(db_file, data): url = data[0] title = data[1] url = await trim_url(url) - exist = await check_feed_exist(db_file, url) + exist = await sqlite.check_feed_exist(db_file, url) if not exist: - msg = await insert_feed(db_file, url, title) + msg = await sqlite.insert_feed(db_file, url, title) await download_updates(db_file, [url]) else: ix = exist[0] @@ -445,7 +436,7 @@ async def add_feed(db_file, url): """ msg = None url = await trim_url(url) - exist = await check_feed_exist(db_file, url) + exist = await sqlite.check_feed_exist(db_file, url) if not exist: res = await download_feed(url) if res[0]: @@ -459,7 +450,7 @@ async def add_feed(db_file, url): msg = await probe_page(add_feed, url, res[0], db_file=db_file) else: status = res[1] - msg = await insert_feed( + msg = await sqlite.insert_feed( db_file, url, title, @@ -777,7 +768,7 @@ async def feed_mode_request(url, tree): """ feeds = {} parted_url = urlsplit(url) - paths = get_list() + paths = await get_list("pathnames") for path in paths: address = urlunsplit([ parted_url.scheme, @@ -877,7 +868,7 @@ async def feed_mode_scan(url, tree): feeds = {} # paths = [] # TODO Test - paths = get_list() + paths = await get_list("pathnames") for path in paths: # xpath_query = "//*[@*[contains(.,'{}')]]".format(path) xpath_query = "//a[contains(@href,'{}')]".format(path) diff --git a/slixfeed/filehandler.py b/slixfeed/filehandler.py index 4d12c10..07c2386 100644 --- a/slixfeed/filehandler.py +++ b/slixfeed/filehandler.py @@ -72,8 +72,7 @@ def get_default_confdir(): config_home = os.path.join(os.environ.get('HOME'), '.config') return os.path.join(config_home, 'slixfeed') -# NOTE Perhaps this needs to be executed -# just once per program execution + async def initdb(jid, callback, message=None): """ Callback function to instantiate action on database. diff --git a/slixfeed/listhandler.py b/slixfeed/listhandler.py index 11dcab7..ca3227d 100644 --- a/slixfeed/listhandler.py +++ b/slixfeed/listhandler.py @@ -15,7 +15,7 @@ TODO """ -from sqlitehandler import get_settings_value +import sqlitehandler as sqlite async def add_to_list(newwords, keywords): @@ -34,10 +34,11 @@ async def add_to_list(newwords, keywords): val : str List of current keywords and new keywords. """ - try: - keywords = keywords.split(",") - except: - keywords = [] + if isinstance(keywords, str) or keywords is None: + try: + keywords = keywords.split(",") + except: + keywords = [] newwords = newwords.lower().split(",") for word in newwords: word = word.strip() @@ -64,10 +65,11 @@ async def remove_from_list(newwords, keywords): val : str List of new keywords. """ - try: - keywords = keywords.split(",") - except: - keywords = [] + if isinstance(keywords, str) or keywords is None: + try: + keywords = keywords.split(",") + except: + keywords = [] newwords = newwords.lower().split(",") for word in newwords: word = word.strip() @@ -98,7 +100,7 @@ async def is_listed(db_file, key, string): """ # async def reject(db_file, string): # async def is_blacklisted(db_file, string): - list = await get_settings_value( + list = await sqlite.get_filters_value( db_file, key ) @@ -118,37 +120,37 @@ async def is_listed(db_file, key, string): This code was tested at module datahandler - reject = 0 - blacklist = await get_settings_value( - db_file, - "filter-deny" - ) - # print(">>> blacklist:") - # print(blacklist) - # breakpoint() - if blacklist: - blacklist = blacklist.split(",") - # print(">>> blacklist.split") - # print(blacklist) - # breakpoint() - for i in blacklist: - # print(">>> length", len(i)) - # breakpoint() - # if len(i): - if not i or len(i) < 2: - print(">>> continue due to length", len(i)) - # breakpoint() - continue - # print(title) - # print(">>> blacklisted word:", i) - # breakpoint() - test = (title + " " + summary + " " + link) - if i in test.lower(): - reject = 1 - break - - if reject: - print("rejected:",title) - entry = (title, '', link, source, date, 1); +reject = 0 +blacklist = await get_settings_value( + db_file, + "filter-deny" + ) +# print(">>> blacklist:") +# print(blacklist) +# breakpoint() +if blacklist: + blacklist = blacklist.split(",") + # print(">>> blacklist.split") + # print(blacklist) + # breakpoint() + for i in blacklist: + # print(">>> length", len(i)) + # breakpoint() + # if len(i): + if not i or len(i) < 2: + print(">>> continue due to length", len(i)) + # breakpoint() + continue + # print(title) + # print(">>> blacklisted word:", i) + # breakpoint() + test = (title + " " + summary + " " + link) + if i in test.lower(): + reject = 1 + break + +if reject: + print("rejected:",title) + entry = (title, '', link, source, date, 1); """ \ No newline at end of file diff --git a/slixfeed/sqlitehandler.py b/slixfeed/sqlitehandler.py index c823ebf..fbceeff 100644 --- a/slixfeed/sqlitehandler.py +++ b/slixfeed/sqlitehandler.py @@ -19,7 +19,7 @@ from asyncio import Lock from bs4 import BeautifulSoup from datetime import date # from slixfeed.confighandler import get_value_default -import confighandler as confighandler +import confighandler as config # from slixfeed.datahandler import join_url import datahandler as datahandler from datetimehandler import current_time, rfc2822_to_iso8601 @@ -122,6 +122,13 @@ def create_tables(db_file): "value INTEGER" ");" ) + filters_table_sql = ( + "CREATE TABLE IF NOT EXISTS filters (" + "id INTEGER PRIMARY KEY," + "key TEXT NOT NULL," + "value TEXT" + ");" + ) cur = conn.cursor() # cur = get_cursor(db_file) cur.execute(feeds_table_sql) @@ -129,6 +136,7 @@ def create_tables(db_file): cur.execute(archive_table_sql) # cur.execute(statistics_table_sql) cur.execute(settings_table_sql) + cur.execute(filters_table_sql) def get_cursor(db_file): @@ -911,7 +919,13 @@ async def maintain_archive(cur, limit): "FROM archive" ) count = cur.execute(sql).fetchone()[0] - reduc = count - limit + # FIXME Upon first time joining to a groupchat + # and then adding a URL, variable "limit" + # becomes a string in one of the iterations. + # if isinstance(limit,str): + # print("STOP") + # breakpoint() + reduc = count - int(limit) if reduc > 0: sql = ( "DELETE FROM archive " @@ -1046,7 +1060,7 @@ async def remove_nonexistent_entries(db_file, feed, source): cur.execute(sql, (ix,)) except: print( - "ERROR DB inset from entries " + "ERROR DB insert from entries " "into archive at index", ix ) sql = ( @@ -1456,12 +1470,11 @@ async def set_settings_value(db_file, key_value): "WHERE key = :key" ) cur.execute(sql, { - "key": key, + "key": key, "value": val }) -# TODO Place settings also in a file async def set_settings_value_default(cur, key): """ Set default settings value, if no value found. @@ -1494,7 +1507,7 @@ async def set_settings_value_default(cur, key): ) cur.execute(sql, (key,)) if not cur.fetchone(): - val = await confighandler.get_value_default(key) + val = await config.get_value_default(key) sql = ( "INSERT " "INTO settings(key,value) " @@ -1513,7 +1526,8 @@ async def get_settings_value(db_file, key): db_file : str Path to database file. key : str - Key: "enabled", "interval", "master", "quantum", "random". + Key: archive, enabled, filter-allow, filter-deny, + interval, length, old, quantum, random. Returns ------- @@ -1545,3 +1559,109 @@ async def get_settings_value(db_file, key): if not val: val = await set_settings_value_default(cur, key) return val + + +async def set_filters_value(db_file, key_value): + """ + Set settings value. + + Parameters + ---------- + db_file : str + Path to database file. + key_value : list + key : str + filter-allow, filter-deny, filter-replace. + value : int + Numeric value. + """ + # if isinstance(key_value, list): + # key = key_value[0] + # val = key_value[1] + # elif key_value == "enable": + # key = "enabled" + # val = 1 + # else: + # key = "enabled" + # val = 0 + key = key_value[0] + val = key_value[1] + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + await set_filters_value_default(cur, key) + sql = ( + "UPDATE filters " + "SET value = :value " + "WHERE key = :key" + ) + cur.execute(sql, { + "key": key, + "value": val + }) + + +async def set_filters_value_default(cur, key): + """ + Set default filters value, if no value found. + + Parameters + ---------- + cur : object + Cursor object. + key : str + Key: filter-allow, filter-deny, filter-replace. + + Returns + ------- + val : str + List of strings. + """ + sql = ( + "SELECT id " + "FROM filters " + "WHERE key = ?" + ) + cur.execute(sql, (key,)) + if not cur.fetchone(): + val = await config.get_list(key) + val = ",".join(val) + sql = ( + "INSERT " + "INTO filters(key,value) " + "VALUES(?,?)" + ) + cur.execute(sql, (key, val)) + return val + + +async def get_filters_value(db_file, key): + """ + Get filters value. + + Parameters + ---------- + db_file : str + Path to database file. + key : str + Key: allow, deny. + + Returns + ------- + val : str + List of strings. + """ + with create_connection(db_file) as conn: + try: + cur = conn.cursor() + sql = ( + "SELECT value " + "FROM filters " + "WHERE key = ?" + ) + val = cur.execute(sql, (key,)).fetchone()[0] + except: + val = await set_filters_value_default(cur, key) + if not val: + val = await set_filters_value_default(cur, key) + return val diff --git a/slixfeed/xmpphandler.py b/slixfeed/xmpphandler.py index c42f97e..c1b6a4a 100644 --- a/slixfeed/xmpphandler.py +++ b/slixfeed/xmpphandler.py @@ -57,35 +57,13 @@ import logging import slixmpp from slixmpp.exceptions import IqError from random import randrange -from datahandler import ( - add_feed, - add_feed_no_check, - check_xmpp_uri, - feed_to_http, - view_entry, - view_feed - ) +import datahandler as fetcher from datetimehandler import current_time from filehandler import initdb -from listhandler import add_to_list, remove_from_list -from sqlitehandler import ( - get_settings_value, - set_settings_value, - mark_source_as_read, - last_entries, - list_feeds, - remove_feed, - search_feeds, - statistics, - toggle_status - ) -from taskhandler import ( - clean_tasks_xmpp, - start_tasks_xmpp, - refresh_task, - send_status, - send_update - ) +import listhandler as lister +import sqlitehandler as sqlite +import taskhandler as tasker + from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound # from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference from slixmpp.plugins.xep_0048.stanza import Bookmarks @@ -116,9 +94,11 @@ class Slixfeed(slixmpp.ClientXMPP): ------- News bot that sends updates from RSS feeds. """ - def __init__(self, jid, password, room=None, nick=None): + def __init__(self, jid, password, nick): slixmpp.ClientXMPP.__init__(self, jid, password) + self.nick = nick + # The session_start event will be triggered when # the bot establishes its connection with the server # and the XML streams are ready for use. We want to @@ -192,11 +172,11 @@ class Slixfeed(slixmpp.ClientXMPP): # print("def presence_available", presence["from"].bare) if presence["from"].bare not in self.boundjid.bare: jid = presence["from"].bare - await clean_tasks_xmpp( + await tasker.clean_tasks_xmpp( jid, ["interval", "status", "check"] ) - await start_tasks_xmpp( + await tasker.start_tasks_xmpp( self, jid, ["interval", "status", "check"] @@ -209,7 +189,7 @@ class Slixfeed(slixmpp.ClientXMPP): if not self.boundjid.bare: jid = presence["from"].bare print(">>> unavailable:", jid) - await clean_tasks_xmpp( + await tasker.clean_tasks_xmpp( jid, ["interval", "status", "check"] ) @@ -274,7 +254,7 @@ class Slixfeed(slixmpp.ClientXMPP): print(muc_jid) self.plugin['xep_0045'].join_muc( muc_jid, - "Slixfeed (RSS News Bot)", + self.nick, # If a room password is needed, use: # password=the_room_password, ) @@ -299,7 +279,7 @@ class Slixfeed(slixmpp.ClientXMPP): for muc in mucs: bookmarks.add_conference( muc, - "Slixfeed (RSS News Bot)", + self.nick, autojoin=True ) await self.plugin['xep_0048'].set_bookmarks(bookmarks) @@ -308,7 +288,7 @@ class Slixfeed(slixmpp.ClientXMPP): # print(await self.plugin['xep_0048'].get_bookmarks()) # bm = BookmarkStorage() - # bm.conferences.append(Conference(muc_jid, autojoin=True, nick="Slixfeed (RSS News Bot)")) + # bm.conferences.append(Conference(muc_jid, autojoin=True, nick=self.nick)) # await self['xep_0402'].publish(bm) @@ -333,13 +313,13 @@ class Slixfeed(slixmpp.ClientXMPP): for muc in mucs: bookmarks.add_conference( muc, - "Slixfeed (RSS News Bot)", + self.nick, autojoin=True ) await self.plugin['xep_0048'].set_bookmarks(bookmarks) self.plugin['xep_0045'].leave_muc( muc_jid, - "Slixfeed (RSS News Bot)", + self.nick, "Goodbye!", self.boundjid.bare ) @@ -355,7 +335,7 @@ class Slixfeed(slixmpp.ClientXMPP): print(muc) self.plugin['xep_0045'].join_muc( muc, - "Slixfeed (RSS News Bot)", + self.nick, # If a room password is needed, use: # password=the_room_password, ) @@ -414,10 +394,10 @@ class Slixfeed(slixmpp.ClientXMPP): print(message) if message["type"] in ("chat", "normal"): jid = message["from"].bare - await refresh_task( + await tasker.refresh_task( self, jid, - send_status, + tasker.send_status, "status", 20 ) @@ -446,11 +426,11 @@ class Slixfeed(slixmpp.ClientXMPP): jid = presence["from"].bare if presence["show"] in ("away", "dnd", "xa"): print(">>> away, dnd, xa:", jid) - await clean_tasks_xmpp( + await tasker.clean_tasks_xmpp( jid, ["interval"] ) - await start_tasks_xmpp( + await tasker.start_tasks_xmpp( self, jid, ["status", "check"] @@ -539,7 +519,7 @@ class Slixfeed(slixmpp.ClientXMPP): self.send_presence_subscription( pto=jid, ptype="subscribe", - pnick="Slixfeed RSS News Bot" + pnick=self.nick ) self.update_roster( jid, @@ -551,23 +531,27 @@ class Slixfeed(slixmpp.ClientXMPP): pto=jid, pfrom=self.boundjid.bare, ptype="subscribe", - pnick="Slixfeed RSS News Bot" + pnick=self.nick ) self.send_message( mto=jid, # mtype="headline", msubject="RSS News Bot", - mbody="Accept subscription request to receive updates.", + mbody=( + "Accept subscription request to receive updates" + ), mfrom=self.boundjid.bare, - mnick="Slixfeed RSS News Bot" + mnick=self.nick ) self.send_presence( pto=jid, pfrom=self.boundjid.bare, # Accept symbol 🉑️ 👍️ ✍ - pstatus="✒️ Accept subscription request to receive updates", + pstatus=( + "✒️ Accept subscription request to receive updates" + ), # ptype="subscribe", - pnick="Slixfeed RSS News Bot" + pnick=self.nick ) @@ -582,7 +566,7 @@ class Slixfeed(slixmpp.ClientXMPP): pto=jid, pfrom=self.boundjid.bare, pstatus="🖋️ Subscribe to receive updates", - pnick="Slixfeed RSS News Bot" + pnick=self.nick ) self.send_message( mto=jid, @@ -617,7 +601,7 @@ class Slixfeed(slixmpp.ClientXMPP): # nick = msg["from"][msg["from"].index("/")+1:] nick = str(msg["from"]) nick = nick[nick.index("/")+1:] - if (msg['muc']['nick'] == "Slixfeed (RSS News Bot)" or + if (msg['muc']['nick'] == self.nick or not msg["body"].startswith("!")): return # token = await initdb( @@ -732,21 +716,21 @@ class Slixfeed(slixmpp.ClientXMPP): if url.startswith("http"): action = await initdb( jid, - add_feed_no_check, + fetcher.add_feed_no_check, [url, title] ) old = await initdb( jid, - get_settings_value, + sqlite.get_settings_value, "old" ) if old: - await clean_tasks_xmpp( + await tasker.clean_tasks_xmpp( jid, ["status"] ) # await send_status(jid) - await start_tasks_xmpp( + await tasker.start_tasks_xmpp( self, jid, ["status"] @@ -754,7 +738,7 @@ class Slixfeed(slixmpp.ClientXMPP): else: await initdb( jid, - mark_source_as_read, + sqlite.mark_source_as_read, url ) else: @@ -765,16 +749,16 @@ class Slixfeed(slixmpp.ClientXMPP): if val: keywords = await initdb( jid, - get_settings_value, + sqlite.get_filters_value, key ) - val = await add_to_list( + val = await lister.add_to_list( val, keywords ) await initdb( jid, - set_settings_value, + sqlite.set_filters_value, [key, val] ) action = ( @@ -789,16 +773,16 @@ class Slixfeed(slixmpp.ClientXMPP): if val: keywords = await initdb( jid, - get_settings_value, + sqlite.get_filters_value, key ) - val = await remove_from_list( + val = await lister.remove_from_list( val, keywords ) await initdb( jid, - set_settings_value, + sqlite.set_filters_value, [key, val] ) action = ( @@ -816,7 +800,7 @@ class Slixfeed(slixmpp.ClientXMPP): else: await initdb( jid, - set_settings_value, + sqlite.set_settings_value, [key, val] ) action = ( @@ -830,16 +814,16 @@ class Slixfeed(slixmpp.ClientXMPP): if val: keywords = await initdb( jid, - get_settings_value, + sqlite.get_filters_value, key ) - val = await add_to_list( + val = await lister.add_to_list( val, keywords ) await initdb( jid, - set_settings_value, + sqlite.set_filters_value, [key, val] ) action = ( @@ -854,16 +838,16 @@ class Slixfeed(slixmpp.ClientXMPP): if val: keywords = await initdb( jid, - get_settings_value, + sqlite.get_filters_value, key ) - val = await remove_from_list( + val = await lister.remove_from_list( val, keywords ) await initdb( jid, - set_settings_value, + sqlite.set_filters_value, [key, val] ) action = ( @@ -879,8 +863,8 @@ class Slixfeed(slixmpp.ClientXMPP): message_lowercase.startswith("feed:")): url = message if url.startswith("feed:"): - url = await feed_to_http(url) - await clean_tasks_xmpp( + url = await fetcher.feed_to_http(url) + await tasker.clean_tasks_xmpp( jid, ["status"] ) @@ -890,10 +874,10 @@ class Slixfeed(slixmpp.ClientXMPP): process_task_message(self, jid, task) action = await initdb( jid, - add_feed, + fetcher.add_feed, url ) - await start_tasks_xmpp( + await tasker.start_tasks_xmpp( self, jid, ["status"] @@ -910,16 +894,16 @@ class Slixfeed(slixmpp.ClientXMPP): # NOTE This would show the number of new unread entries old = await initdb( jid, - get_settings_value, + sqlite.get_settings_value, "old" ) if old: - await clean_tasks_xmpp( + await tasker.clean_tasks_xmpp( jid, ["status"] ) # await send_status(jid) - await start_tasks_xmpp( + await tasker.start_tasks_xmpp( self, jid, ["status"] @@ -927,7 +911,7 @@ class Slixfeed(slixmpp.ClientXMPP): else: await initdb( jid, - mark_source_as_read, + sqlite.mark_source_as_read, url ) case _ if message_lowercase.startswith("feeds"): @@ -936,7 +920,7 @@ class Slixfeed(slixmpp.ClientXMPP): if len(query) > 3: action = await initdb( jid, - search_feeds, + sqlite.search_feeds, query ) else: @@ -946,7 +930,7 @@ class Slixfeed(slixmpp.ClientXMPP): else: action = await initdb( jid, - list_feeds + sqlite.list_feeds ) case "goodbye": if msg["type"] == "groupchat": @@ -967,15 +951,15 @@ class Slixfeed(slixmpp.ClientXMPP): # ).format(action) await initdb( jid, - set_settings_value, + sqlite.set_settings_value, [key, val] ) # NOTE Perhaps this should be replaced # by functions clean and start - await refresh_task( + await tasker.refresh_task( self, jid, - send_update, + tasker.send_update, key, val ) @@ -985,7 +969,7 @@ class Slixfeed(slixmpp.ClientXMPP): else: action = "Missing value." case _ if message_lowercase.startswith("join"): - muc = await check_xmpp_uri(message[5:]) + muc = await fetcher.check_xmpp_uri(message[5:]) if muc: "TODO probe JID and confirm it's a groupchat" await self.join_muc(jid, muc) @@ -1002,7 +986,7 @@ class Slixfeed(slixmpp.ClientXMPP): if val: await initdb( jid, - set_settings_value, + sqlite.set_settings_value, [key, val] ) if val == 0: @@ -1043,7 +1027,7 @@ class Slixfeed(slixmpp.ClientXMPP): case "new": await initdb( jid, - set_settings_value, + sqlite.set_settings_value, ["old", 0] ) action = ( @@ -1051,11 +1035,11 @@ class Slixfeed(slixmpp.ClientXMPP): ) case _ if message_lowercase.startswith("next"): num = message[5:] - await clean_tasks_xmpp( + await tasker.clean_tasks_xmpp( jid, ["interval", "status"] ) - await start_tasks_xmpp( + await tasker.start_tasks_xmpp( self, jid, ["interval", "status"] @@ -1078,7 +1062,7 @@ class Slixfeed(slixmpp.ClientXMPP): case "old": await initdb( jid, - set_settings_value, + sqlite.set_settings_value, ["old", 1] ) action = ( @@ -1093,7 +1077,7 @@ class Slixfeed(slixmpp.ClientXMPP): # ).format(action) await initdb( jid, - set_settings_value, + sqlite.set_settings_value, [key, val] ) action = ( @@ -1111,22 +1095,22 @@ class Slixfeed(slixmpp.ClientXMPP): "📫️ Processing request to fetch data from {}" ).format(url) process_task_message(self, jid, task) - await clean_tasks_xmpp( + await tasker.clean_tasks_xmpp( jid, ["status"] ) if url.startswith("feed:"): - url = await feed_to_http(url) + url = await fetcher.feed_to_http(url) match len(data): case 1: if url.startswith("http"): - action = await view_feed(url) + action = await fetcher.view_feed(url) else: action = "Missing URL." case 2: num = data[1] if url.startswith("http"): - action = await view_entry(url, num) + action = await fetcher.view_entry(url, num) else: action = "Missing URL." case _: @@ -1135,7 +1119,7 @@ class Slixfeed(slixmpp.ClientXMPP): "`read URL` or `read URL NUMBER`\n" "URL must not contain white space." ) - await start_tasks_xmpp( + await tasker.start_tasks_xmpp( self, jid, ["status"] @@ -1145,7 +1129,7 @@ class Slixfeed(slixmpp.ClientXMPP): if num: action = await initdb( jid, - last_entries, + sqlite.last_entries, num ) else: @@ -1155,7 +1139,7 @@ class Slixfeed(slixmpp.ClientXMPP): if ix: action = await initdb( jid, - remove_feed, + sqlite.remove_feed, ix ) # await refresh_task( @@ -1165,11 +1149,11 @@ class Slixfeed(slixmpp.ClientXMPP): # "status", # 20 # ) - await clean_tasks_xmpp( + await tasker.clean_tasks_xmpp( jid, ["status"] ) - await start_tasks_xmpp( + await tasker.start_tasks_xmpp( self, jid, ["status"] @@ -1197,11 +1181,11 @@ class Slixfeed(slixmpp.ClientXMPP): val = 1 await initdb( jid, - set_settings_value, + sqlite.set_settings_value, [key, val] ) # asyncio.create_task(task_jid(self, jid)) - await start_tasks_xmpp( + await tasker.start_tasks_xmpp( self, jid, ["interval", "status", "check"] @@ -1212,13 +1196,13 @@ class Slixfeed(slixmpp.ClientXMPP): case "stats": action = await initdb( jid, - statistics + sqlite.statistics ) case _ if message_lowercase.startswith("status "): ix = message[7:] action = await initdb( jid, - toggle_status, + sqlite.toggle_status, ix ) case "stop": @@ -1247,10 +1231,10 @@ class Slixfeed(slixmpp.ClientXMPP): val = 0 await initdb( jid, - set_settings_value, + sqlite.set_settings_value, [key, val] ) - await clean_tasks_xmpp( + await tasker.clean_tasks_xmpp( jid, ["interval", "status"] ) @@ -1264,7 +1248,7 @@ class Slixfeed(slixmpp.ClientXMPP): # TODO Send an invitation. action = "Join xmpp:slixmpp@muc.poez.io?join" case _ if message_lowercase.startswith("xmpp:"): - muc = await check_xmpp_uri(message) + muc = await fetcher.check_xmpp_uri(message) if muc: "TODO probe JID and confirm it's a groupchat" await self.join_muc(jid, muc)