#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ TODO 1) Function scan at "for entry in entries" Suppress directly calling function "add_entry" (accept db_file) Pass a list of valid entries to a new function "add_entries" (accept db_file) which would call function "add_entry" (accept cur). * accelerate adding of large set of entries at once. * prevent (or mitigate halt of consequent actions). * reduce I/O. 2) Call sqlite function from function statistics. Returning a list of values doesn't' seem to be a good practice. 3) Special statistics for operator: * Size of database(s); * Amount of JIDs subscribed; * Amount of feeds of all JIDs; * Amount of entries of all JIDs. 4) Consider to append text to remind to share presence '✒️ Share online status to receive updates' 5) Request for subscription if (await XmppUtilities.get_chat_type(self, jid_bare) == 'chat' and not self.client_roster[jid_bare]['to']): XmppPresence.subscription(self, jid_bare, 'subscribe') await XmppRoster.add(self, jid_bare) status_message = '✒️ Share online status to receive updates' XmppPresence.send(self, jid_bare, status_message) message_subject = 'RSS News Bot' message_body = 'Share online status to receive updates.' XmppMessage.send_headline(self, jid_bare, message_subject, message_body, 'chat') """ import hashlib import slixfeed.config as config from slixfeed.config import Config from lxml import etree, html import slixfeed.dt as dt import slixfeed.fetch as fetch from slixfeed.log import Logger import slixfeed.sqlite as sqlite from slixfeed.url import join_url, complete_url import sys try: import tomllib except: import tomli as tomllib logger = Logger(__name__) class Documentation: def manual(filename, section=None, command=None): function_name = sys._getframe().f_code.co_name logger.debug('{}: filename: {}'.format(function_name, filename)) config_dir = config.get_default_config_directory() with open(config_dir + '/' + filename, mode="rb") as commands: cmds = tomllib.load(commands) if section == 'all': cmd_list = '' for cmd in cmds: for i in cmds[cmd]: cmd_list += cmds[cmd][i] + '\n' elif command and section: try: cmd_list = cmds[section][command] except KeyError as e: logger.error(e) cmd_list = None elif section: try: cmd_list = [] for cmd in cmds[section]: cmd_list.extend([cmd]) except KeyError as e: logger.error('KeyError:' + str(e)) cmd_list = None else: cmd_list = [] for cmd in cmds: cmd_list.extend([cmd]) return cmd_list class Html: async def extract_image_from_html(url): function_name = sys._getframe().f_code.co_name logger.debug('{}: url: {}'.format(function_name, url)) result = await fetch.http(url) if not result['error']: data = result['content'] tree = html.fromstring(data) # TODO Exclude banners, class="share" links etc. images = tree.xpath( '//img[not(' 'contains(@src, "avatar") or ' 'contains(@src, "cc-by-sa") or ' 'contains(@src, "emoji") or ' 'contains(@src, "icon") or ' 'contains(@src, "logo") or ' 'contains(@src, "letture") or ' 'contains(@src, "poweredby_mediawi") or ' 'contains(@src, "search") or ' 'contains(@src, "share") or ' 'contains(@src, "smiley")' ')]/@src') if len(images): image = images[0] image = str(image) image_url = complete_url(url, image) return image_url def remove_html_tags(data): function_name = sys._getframe().f_code.co_name logger.debug('{}'.format(function_name)) parser = etree.HTMLParser() tree = etree.fromstring(data, parser) data = etree.tostring(tree, encoding='unicode', method='text') data = data.replace("\n\n", "\n") return data # /questions/9662346/python-code-to-remove-html-tags-from-a-string def _remove_html_tags(text): import xml.etree.ElementTree return ''.join(xml.etree.ElementTree.fromstring(text).itertext()) def __remove_html_tags(data): from bs4 import BeautifulSoup function_name = sys._getframe().f_code.co_name logger.debug('{}'.format(function_name)) data = BeautifulSoup(data, "lxml").text data = data.replace("\n\n", "\n") return data class MD: def export_to_markdown(jid, filename, results): function_name = sys._getframe().f_code.co_name logger.debug('{}: jid: {} filename: {}' .format(function_name, jid, filename)) with open(filename, 'w') as file: file.write('# Subscriptions for {}\n'.format(jid)) file.write('## Set of feeds exported with Slixfeed\n') for result in results: file.write('- [{}]({})\n'.format(result[1], result[2])) file.write('\n\n* * *\n\nThis list was saved on {} from xmpp:{} using ' '[Slixfeed](https://slixfeed.woodpeckersnest.space/)\n' .format(dt.current_date(), jid)) def log_to_markdown(timestamp, filename, jid, message): """ Log message to a markdown file. Parameters ---------- timestamp : str Time stamp. filename : str Jabber ID as name of file. jid : str Jabber ID. message : str Message content. Returns ------- None. """ function_name = sys._getframe().f_code.co_name logger.debug('{}: timestamp: {} filename: {} jid: {} message: {}'.format(function_name, timestamp, filename, jid, message)) with open(filename + '.md', 'a') as file: # entry = "{} {}:\n{}\n\n".format(timestamp, jid, message) entry = '## {}\n### {}\n\n{}\n\n'.format(jid, timestamp, message) file.write(entry) class SQLiteMaintain: # TODO # (1) Check for duplications # (2) append all duplications to a list # (3) Send the list to a function in module sqlite. async def remove_nonexistent_entries(self, jid_bare, db_file, url, feed): """ Remove entries that don't exist in a given parsed feed. Check the entries returned from feed and delete read non existing entries, otherwise move to table archive, if unread. Parameters ---------- db_file : str Path to database file. url : str Feed URL. feed : list Parsed feed document. """ function_name = sys._getframe().f_code.co_name logger.debug('{}: db_file: {} url: {}' .format(function_name, db_file, url)) feed_id = sqlite.get_feed_id(db_file, url) feed_id = feed_id[0] items = sqlite.get_entries_of_feed(db_file, feed_id) entries = feed.entries limit = Config.get_setting_value(self.settings, jid_bare, 'archive') print(limit) for item in items: ix, entry_title, entry_link, entry_id, timestamp = item read_status = sqlite.is_entry_read(db_file, ix) read_status = read_status[0] valid = False for entry in entries: title = None link = None time = None # valid = False # TODO better check and don't repeat code if entry.has_key("id") and entry_id: if entry.id == entry_id: print("compare entry.id == entry_id:", entry.id) print("compare entry.id == entry_id:", entry_id) print("============") valid = True break else: if entry.has_key("title"): title = entry.title else: title = feed["feed"]["title"] if entry.has_key("link"): link = join_url(url, entry.link) else: link = url if entry.has_key("published") and timestamp: print("compare published:", title, link, time) print("compare published:", entry_title, entry_link, timestamp) print("============") time = dt.rfc2822_to_iso8601(entry.published) if (entry_title == title and entry_link == link and timestamp == time): valid = True break else: if (entry_title == title and entry_link == link): print("compare entry_link == link:", title, link) print("compare entry_title == title:", entry_title, entry_link) print("============") valid = True break # TODO better check and don't repeat code if not valid: # print("id: ", ix) # if title: # print("title: ", title) # print("entry_title: ", entry_title) # if link: # print("link: ", link) # print("entry_link: ", entry_link) # if entry.id: # print("last_entry:", entry.id) # print("entry_id: ", entry_id) # if time: # print("time: ", time) # print("timestamp: ", timestamp) # print("read: ", read_status) # breakpoint() # TODO Send to table archive # TODO Also make a regular/routine check for sources that # have been changed (though that can only happen when # manually editing) # ix = item[0] # print(">>> SOURCE: ", source) # print(">>> INVALID:", entry_title) # print("title:", entry_title) # print("link :", entry_link) # print("id :", entry_id) if read_status == 1: await sqlite.delete_entry_by_id(db_file, ix) # print(">>> DELETING:", entry_title) else: # print(">>> ARCHIVING:", entry_title) await sqlite.archive_entry(db_file, ix) await sqlite.maintain_archive(db_file, limit) class Task: def start(self, jid_bare, callback): callback(self, jid_bare) def stop(self, jid_bare, task): if (jid_bare in self.task_manager and task in self.task_manager[jid_bare]): self.task_manager[jid_bare][task].cancel() else: logger.debug('No task {} for JID {} (Task.stop)' .format(task, jid_bare)) class Utilities: # NOTE Warning: Entry might not have a link # TODO Handle situation error def hash_url_to_md5(url): url_encoded = url.encode() url_hashed = hashlib.md5(url_encoded) url_digest = url_hashed.hexdigest() return url_digest def pick_a_feed(lang=None): function_name = sys._getframe().f_code.co_name logger.debug('{}: lang: {}' .format(function_name, lang)) config_dir = config.get_default_config_directory() with open(config_dir + '/' + 'feeds.toml', mode="rb") as feeds: urls = tomllib.load(feeds) import random url = random.choice(urls['feeds']) return url