From 64727d207fbefd6a7bc5f94b241551007d9fc41b Mon Sep 17 00:00:00 2001 From: "Schimon Jehudah, Adv." Date: Thu, 13 Jun 2024 18:53:53 +0300 Subject: [PATCH] Restructure code; Add more classes and modules; Restore database maintenance; Remove JSON support; Remove Beautiful Soup. --- pyproject.toml | 3 +- slixfeed/__main__.py | 22 +- slixfeed/action.py | 1858 ------------------------------ slixfeed/assets/information.toml | 13 +- slixfeed/assets/settings.toml | 24 +- slixfeed/bittorrent.py | 39 + slixfeed/config.py | 20 +- slixfeed/fetch.py | 31 +- slixfeed/log.py | 4 +- slixfeed/opml.py | 65 -- slixfeed/sqlite.py | 177 +-- slixfeed/syndication.py | 958 +++++++++++++++ slixfeed/task.py | 313 +---- slixfeed/url.py | 12 +- slixfeed/utilities.py | 347 ++++++ slixfeed/version.py | 4 +- slixfeed/xmpp/chat.py | 359 ++++-- slixfeed/xmpp/client.py | 205 ++-- slixfeed/xmpp/commands.py | 127 +- slixfeed/xmpp/connect.py | 28 +- slixfeed/xmpp/groupchat.py | 54 + slixfeed/xmpp/ipc.py | 40 +- slixfeed/xmpp/iq.py | 12 +- slixfeed/xmpp/message.py | 19 +- slixfeed/xmpp/muc.py | 97 -- slixfeed/xmpp/profile.py | 29 +- slixfeed/xmpp/publish.py | 152 ++- slixfeed/xmpp/status.py | 94 ++ slixfeed/xmpp/upload.py | 30 +- slixfeed/xmpp/utilities.py | 60 + slixfeed/xmpp/utility.py | 56 - 31 files changed, 2401 insertions(+), 2851 deletions(-) delete mode 100644 slixfeed/action.py create mode 100644 slixfeed/bittorrent.py delete mode 100644 slixfeed/opml.py create mode 100644 slixfeed/syndication.py create mode 100644 slixfeed/utilities.py create mode 100644 slixfeed/xmpp/groupchat.py delete mode 100644 slixfeed/xmpp/muc.py create mode 100644 slixfeed/xmpp/status.py create mode 100644 slixfeed/xmpp/utilities.py delete mode 100644 slixfeed/xmpp/utility.py diff --git a/pyproject.toml b/pyproject.toml index 2ace4c0..8c92bb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,12 +40,11 @@ keywords = [ # urls = {Homepage = "https://gitgud.io/sjehuda/slixfeed"} dependencies = [ "aiohttp", - "bs4", + # "daemonize", "feedparser", "lxml", # "pysocks", "python-dateutil", - "pyyaml", "requests", "slixmpp", "tomli", # Python 3.10 diff --git a/slixfeed/__main__.py b/slixfeed/__main__.py index 1ea1703..44d8897 100644 --- a/slixfeed/__main__.py +++ b/slixfeed/__main__.py @@ -37,7 +37,7 @@ TODO 13) Tip Of The Day. Did you know that you can follow you favorite Mastodon feeds by just sending the URL address? - Supported fediverse websites are: + Supported ActivityPub (i.e. fediverse) instances are: Akkoma, Firefish (Calckey), Friendica, HubZilla, Mastodon, Misskey, Pixelfed, Pleroma, Socialhome, Soapbox. @@ -57,30 +57,20 @@ TODO # jid = Jabber ID (XMPP) # res = response (HTTP) from argparse import ArgumentParser -from getpass import getpass -import sys -import configparser -# import filehandler -# from slixfeed.file import get_default_confdir -from getpass import getpass import logging -import os - -# from datetime import date -# import time +import sys # from eliot import start_action, to_file # # to_file(open('slixfeed.log', 'w')) # # with start_action(action_type='set_date()', jid=jid): # # with start_action(action_type='message()', msg=msg): -#import slixfeed.smtp -#import slixfeed.irc -#import slixfeed.matrix - import slixfeed.config as config +from slixfeed.log import Logger from slixfeed.version import __version__ +logger = Logger(__name__) + # import socks # import socket @@ -89,7 +79,7 @@ from slixfeed.version import __version__ def main(): config_dir = config.get_default_config_directory() - logging.info('Reading configuration from {}'.format(config_dir)) + logger.info('Reading configuration from {}'.format(config_dir)) print('Reading configuration from {}'.format(config_dir)) network_settings = config.get_values('settings.toml', 'network') print('User agent:', network_settings['user_agent'] or 'Slixfeed/0.1') diff --git a/slixfeed/action.py b/slixfeed/action.py deleted file mode 100644 index d34bfab..0000000 --- a/slixfeed/action.py +++ /dev/null @@ -1,1858 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" - -TODO - -1) Function scan at "for entry in entries" - Suppress directly calling function "add_entry" (accept db_file) - Pass a list of valid entries to a new function "add_entries" - (accept db_file) which would call function "add_entry" (accept cur). - * accelerate adding of large set of entries at once. - * prevent (or mitigate halt of consequent actions). - * reduce I/O. - -2) Call sqlite function from function statistics. - Returning a list of values doesn't' seem to be a good practice. - -3) Special statistics for operator: - * Size of database(s); - * Amount of JIDs subscribed; - * Amount of feeds of all JIDs; - * Amount of entries of all JIDs. - -""" - -# import asyncio -from asyncio.exceptions import IncompleteReadError -from bs4 import BeautifulSoup -from feedparser import parse -import hashlib -from http.client import IncompleteRead -import json -from slixfeed.log import Logger -from lxml import html -import os -import slixfeed.config as config -from slixfeed.config import Config -import slixfeed.crawl as crawl -import slixfeed.dt as dt -import slixfeed.fetch as fetch -from slixfeed.opml import Opml -import slixfeed.sqlite as sqlite -import slixfeed.task as task -import slixfeed.url as uri -from slixfeed.url import ( - complete_url, - join_url, - remove_tracking_parameters, - replace_hostname, - trim_url - ) -from slixfeed.xmpp.bookmark import XmppBookmark -from slixfeed.xmpp.muc import XmppGroupchat -from slixfeed.xmpp.iq import XmppIQ -from slixfeed.xmpp.message import XmppMessage -from slixfeed.xmpp.presence import XmppPresence -from slixfeed.xmpp.publish import XmppPubsub -from slixfeed.xmpp.upload import XmppUpload -from slixfeed.xmpp.utility import get_chat_type -from slixmpp.xmlstream import ET -import sys -from urllib import error -from urllib.parse import parse_qs, urlsplit -import xml.etree.ElementTree as ETR - -try: - import tomllib -except: - import tomli as tomllib - -logger = Logger(__name__) - - -# NOTE Consider removal of MD (and any other option HTML and XBEL) -def export_feeds(self, jid_bare, ext): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: jid_bare: {}: ext: {}'.format(function_name, jid_bare, ext)) - cache_dir = config.get_default_cache_directory() - if not os.path.isdir(cache_dir): - os.mkdir(cache_dir) - if not os.path.isdir(cache_dir + '/' + ext): - os.mkdir(cache_dir + '/' + ext) - filename = os.path.join( - cache_dir, ext, 'slixfeed_' + dt.timestamp() + '.' + ext) - db_file = config.get_pathname_to_database(jid_bare) - results = sqlite.get_feeds(db_file) - match ext: - # case 'html': - # response = 'Not yet implemented.' - case 'md': - export_to_markdown(jid_bare, filename, results) - case 'opml': - Opml.export_to_file(jid_bare, filename, results) - # case 'xbel': - # response = 'Not yet implemented.' - return filename - - -async def xmpp_muc_autojoin(self, bookmarks): - for bookmark in bookmarks: - if bookmark["jid"] and bookmark["autojoin"]: - if not bookmark["nick"]: - bookmark["nick"] = self.alias - logger.error('Alias (i.e. Nicknname) is missing for ' - 'bookmark {}'.format(bookmark['name'])) - alias = bookmark["nick"] - muc_jid = bookmark["jid"] - result = await XmppGroupchat.join(self, muc_jid, alias) - print(result) - if result == 'ban': - await XmppBookmark.remove(self, muc_jid) - logger.warning('{} is banned from {}'.format(self.alias, muc_jid)) - logger.warning('Groupchat {} has been removed from bookmarks' - .format(muc_jid)) - else: - logger.info('Autojoin groupchat\n' - 'Name : {}\n' - 'JID : {}\n' - 'Alias : {}\n' - .format(bookmark["name"], - bookmark["jid"], - bookmark["nick"])) - elif not bookmark["jid"]: - logger.error('JID is missing for bookmark {}' - .format(bookmark['name'])) - - -""" -TODO - -Consider to append text to remind to share presence -'✒️ Share online status to receive updates' - -# TODO Request for subscription -if (await get_chat_type(self, jid_bare) == 'chat' and - not self.client_roster[jid_bare]['to']): - XmppPresence.subscription(self, jid_bare, 'subscribe') - await XmppRoster.add(self, jid_bare) - status_message = '✒️ Share online status to receive updates' - XmppPresence.send(self, jid_bare, status_message) - message_subject = 'RSS News Bot' - message_body = 'Share online status to receive updates.' - XmppMessage.send_headline(self, jid_bare, message_subject, - message_body, 'chat') - -""" - -async def xmpp_send_status_message(self, jid_bare): - """ - Send status message. - - Parameters - ---------- - jid : str - Jabber ID. - """ - function_name = sys._getframe().f_code.co_name - logger.debug('{}: jid: {}'.format(function_name, jid_bare)) - status_text = '📜️ Slixfeed RSS News Bot' - db_file = config.get_pathname_to_database(jid_bare) - enabled = Config.get_setting_value(self.settings, jid_bare, 'enabled') - if enabled: - jid_task = self.pending_tasks[jid_bare] - if len(jid_task): - status_mode = 'dnd' - status_text = jid_task[list(jid_task.keys())[0]] - else: - feeds = sqlite.get_number_of_items(db_file, 'feeds_properties') - # print(await current_time(), jid, "has", feeds, "feeds") - if not feeds: - status_mode = 'available' - status_text = '📪️ Send a URL from a blog or a news website' - else: - unread = sqlite.get_number_of_entries_unread(db_file) - if unread: - status_mode = 'chat' - status_text = '📬️ There are {} news items'.format(str(unread)) - # status_text = ( - # "📰 News items: {}" - # ).format(str(unread)) - # status_text = ( - # "📰 You have {} news items" - # ).format(str(unread)) - else: - status_mode = 'available' - status_text = '📭️ No news' - else: - status_mode = 'xa' - status_text = '📪️ Send "Start" to receive updates' - # breakpoint() - # print(await current_time(), status_text, "for", jid) - XmppPresence.send(self, jid_bare, status_text, status_type=status_mode) - # await asyncio.sleep(60 * 20) - # await refresh_task(self, jid, send_status, 'status', '90') - # loop.call_at( - # loop.time() + 60 * 20, - # loop.create_task, - # send_status(jid) - # ) - - -async def xmpp_pubsub_send_selected_entry(self, jid_bare, jid_file, node_id, entry_id): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: jid_bare: {} jid_file: {}'.format(function_name, jid_bare, jid_file)) - db_file = config.get_pathname_to_database(jid_file) - report = {} - if jid_bare == self.boundjid.bare: - node_id = 'urn:xmpp:microblog:0' - node_subtitle = None - node_title = None - else: - feed_id = sqlite.get_feed_id_by_entry_index(db_file, entry_id) - feed_id = feed_id[0] - node_id, node_title, node_subtitle = sqlite.get_feed_properties(db_file, feed_id) - print('THIS IS A TEST') - print(node_id) - print(node_title) - print(node_subtitle) - print('THIS IS A TEST') - xep = None - iq_create_node = XmppPubsub.create_node( - self, jid_bare, node_id, xep, node_title, node_subtitle) - await XmppIQ.send(self, iq_create_node) - entry = sqlite.get_entry_properties(db_file, entry_id) - print('xmpp_pubsub_send_selected_entry',jid_bare) - print(node_id) - entry_dict = pack_entry_into_dict(db_file, entry) - node_item = create_rfc4287_entry(entry_dict) - entry_url = entry_dict['link'] - item_id = hash_url_to_md5(entry_url) - iq_create_entry = XmppPubsub.create_entry( - self, jid_bare, node_id, item_id, node_item) - await XmppIQ.send(self, iq_create_entry) - await sqlite.mark_as_read(db_file, entry_id) - report = entry_url - return report - - -async def xmpp_pubsub_send_unread_items(self, jid_bare): - """ - - Parameters - ---------- - jid_bare : TYPE - Bare Jabber ID. - - Returns - ------- - report : dict - URL and Number of processed entries. - - """ - function_name = sys._getframe().f_code.co_name - logger.debug('{}: jid_bare: {}'.format(function_name, jid_bare)) - db_file = config.get_pathname_to_database(jid_bare) - report = {} - subscriptions = sqlite.get_active_feeds_url(db_file) - for url in subscriptions: - url = url[0] - # feed_id = sqlite.get_feed_id(db_file, url) - # feed_id = feed_id[0] - # feed_properties = sqlite.get_feed_properties(db_file, feed_id) - feed_id = sqlite.get_feed_id(db_file, url) - feed_id = feed_id[0] - - # Publish to node 'urn:xmpp:microblog:0' for own JID - # Publish to node based on feed identifier for PubSub service. - - if jid_bare == self.boundjid.bare: - node_id = 'urn:xmpp:microblog:0' - node_subtitle = None - node_title = None - else: - # node_id = feed_properties[2] - # node_title = feed_properties[3] - # node_subtitle = feed_properties[5] - node_id = sqlite.get_feed_identifier(db_file, feed_id) - node_id = node_id[0] - if not node_id: - counter = 0 - hostname = uri.get_hostname(url) - hostname = hostname.replace('.','-') - identifier = hostname + ':' + str(counter) - while True: - if sqlite.check_identifier_exist(db_file, identifier): - counter += 1 - identifier = hostname + ':' + str(counter) - else: - break - await sqlite.update_feed_identifier(db_file, feed_id, identifier) - node_id = sqlite.get_feed_identifier(db_file, feed_id) - node_id = node_id[0] - node_title = sqlite.get_feed_title(db_file, feed_id) - node_title = node_title[0] - node_subtitle = sqlite.get_feed_subtitle(db_file, feed_id) - node_subtitle = node_subtitle[0] - xep = None - node_exist = await XmppPubsub.get_node_configuration(self, jid_bare, node_id) - if not node_exist: - iq_create_node = XmppPubsub.create_node( - self, jid_bare, node_id, xep, node_title, node_subtitle) - await XmppIQ.send(self, iq_create_node) - entries = sqlite.get_unread_entries_of_feed(db_file, feed_id) - report[url] = len(entries) - for entry in entries: - feed_entry = pack_entry_into_dict(db_file, entry) - node_entry = create_rfc4287_entry(feed_entry) - entry_url = feed_entry['link'] - item_id = hash_url_to_md5(entry_url) - print('PubSub node item was sent to', jid_bare, node_id) - print(entry_url) - print(item_id) - iq_create_entry = XmppPubsub.create_entry( - self, jid_bare, node_id, item_id, node_entry) - await XmppIQ.send(self, iq_create_entry) - ix = entry[0] - await sqlite.mark_as_read(db_file, ix) - return report - - -def pack_entry_into_dict(db_file, entry): - entry_id = entry[0] - authors = sqlite.get_authors_by_entry_id(db_file, entry_id) - entry_authors = [] - for author in authors: - entry_author = { - 'name': author[2], - 'email': author[3], - 'url': author[4]} - entry_authors.extend([entry_author]) - - contributors = sqlite.get_contributors_by_entry_id(db_file, entry_id) - entry_contributors = [] - for contributor in contributors: - entry_contributor = { - 'name': contributor[2], - 'email': contributor[3], - 'url': contributor[4]} - entry_contributors.extend([entry_contributor]) - - links = sqlite.get_links_by_entry_id(db_file, entry_id) - entry_links = [] - for link in links: - entry_link = { - 'url': link[2], - 'type': link[3], - 'rel': link[4], - 'size': link[5]} - entry_links.extend([entry_link]) - - - tags = sqlite.get_tags_by_entry_id(db_file, entry_id) - entry_tags = [] - for tag in tags: - entry_tag = { - 'term': tag[2], - 'scheme': tag[3], - 'label': tag[4]} - entry_tags.extend([entry_tag]) - - contents = sqlite.get_contents_by_entry_id(db_file, entry_id) - entry_contents = [] - for content in contents: - entry_content = { - 'text': content[2], - 'type': content[3], - 'base': content[4], - 'lang': content[5]} - entry_contents.extend([entry_content]) - - feed_entry = { - 'authors' : entry_authors, - 'category' : entry[10], - 'comments' : entry[12], - 'contents' : entry_contents, - 'contributors' : entry_contributors, - 'summary_base' : entry[9], - 'summary_lang' : entry[7], - 'summary_text' : entry[6], - 'summary_type' : entry[8], - 'enclosures' : entry[13], - 'href' : entry[11], - 'link' : entry[3], - 'links' : entry_links, - 'published' : entry[14], - 'rating' : entry[13], - 'tags' : entry_tags, - 'title' : entry[4], - 'title_type' : entry[3], - 'updated' : entry[15]} - return feed_entry - - -# NOTE Warning: Entry might not have a link -# TODO Handle situation error -def hash_url_to_md5(url): - url_encoded = url.encode() - url_hashed = hashlib.md5(url_encoded) - url_digest = url_hashed.hexdigest() - return url_digest - - -def create_rfc4287_entry(feed_entry): - node_entry = ET.Element('entry') - node_entry.set('xmlns', 'http://www.w3.org/2005/Atom') - - # Title - title = ET.SubElement(node_entry, 'title') - if feed_entry['title']: - if feed_entry['title_type']: title.set('type', feed_entry['title_type']) - title.text = feed_entry['title'] - elif feed_entry['summary_text']: - if feed_entry['summary_type']: title.set('type', feed_entry['summary_type']) - title.text = feed_entry['summary_text'] - # if feed_entry['summary_base']: title.set('base', feed_entry['summary_base']) - # if feed_entry['summary_lang']: title.set('lang', feed_entry['summary_lang']) - else: - title.text = feed_entry['published'] - - # Some feeds have identical content for contents and summary - # So if content is present, do not add summary - if feed_entry['contents']: - # Content - for feed_entry_content in feed_entry['contents']: - content = ET.SubElement(node_entry, 'content') - # if feed_entry_content['base']: content.set('base', feed_entry_content['base']) - if feed_entry_content['lang']: content.set('lang', feed_entry_content['lang']) - if feed_entry_content['type']: content.set('type', feed_entry_content['type']) - content.text = feed_entry_content['text'] - else: - # Summary - summary = ET.SubElement(node_entry, 'summary') # TODO Try 'content' - # if feed_entry['summary_base']: summary.set('base', feed_entry['summary_base']) - # TODO Check realization of "lang" - if feed_entry['summary_type']: summary.set('type', feed_entry['summary_type']) - if feed_entry['summary_lang']: summary.set('lang', feed_entry['summary_lang']) - summary.text = feed_entry['summary_text'] - - # Authors - for feed_entry_author in feed_entry['authors']: - author = ET.SubElement(node_entry, 'author') - name = ET.SubElement(author, 'name') - name.text = feed_entry_author['name'] - if feed_entry_author['url']: - uri = ET.SubElement(author, 'uri') - uri.text = feed_entry_author['url'] - if feed_entry_author['email']: - email = ET.SubElement(author, 'email') - email.text = feed_entry_author['email'] - - # Contributors - for feed_entry_contributor in feed_entry['contributors']: - contributor = ET.SubElement(node_entry, 'author') - name = ET.SubElement(contributor, 'name') - name.text = feed_entry_contributor['name'] - if feed_entry_contributor['url']: - uri = ET.SubElement(contributor, 'uri') - uri.text = feed_entry_contributor['url'] - if feed_entry_contributor['email']: - email = ET.SubElement(contributor, 'email') - email.text = feed_entry_contributor['email'] - - # Category - category = ET.SubElement(node_entry, "category") - category.set('category', feed_entry['category']) - - # Tags - for feed_entry_tag in feed_entry['tags']: - tag = ET.SubElement(node_entry, 'category') - tag.set('term', feed_entry_tag['term']) - - # Link - link = ET.SubElement(node_entry, "link") - link.set('href', feed_entry['link']) - - # Links - for feed_entry_link in feed_entry['links']: - link = ET.SubElement(node_entry, "link") - link.set('href', feed_entry_link['url']) - link.set('type', feed_entry_link['type']) - link.set('rel', feed_entry_link['rel']) - - # Date updated - if feed_entry['updated']: - updated = ET.SubElement(node_entry, 'updated') - updated.text = feed_entry['updated'] - - # Date published - if feed_entry['published']: - published = ET.SubElement(node_entry, 'published') - published.text = feed_entry['published'] - - return node_entry - - -async def xmpp_chat_send_unread_items(self, jid_bare, num=None): - """ - Send news items as messages. - - Parameters - ---------- - jid : str - Jabber ID. - num : str, optional - Number. The default is None. - """ - function_name = sys._getframe().f_code.co_name - logger.debug('{}: jid: {} num: {}'.format(function_name, jid_bare, num)) - db_file = config.get_pathname_to_database(jid_bare) - show_media = Config.get_setting_value(self.settings, jid_bare, 'media') - if not num: - num = Config.get_setting_value(self.settings, jid_bare, 'quantum') - else: - num = int(num) - results = sqlite.get_unread_entries(db_file, num) - news_digest = '' - media = None - chat_type = await get_chat_type(self, jid_bare) - for result in results: - ix = result[0] - title_e = result[1] - url = result[2] - summary = result[3] - feed_id = result[4] - date = result[5] - enclosure = sqlite.get_enclosure_by_entry_id(db_file, ix) - if enclosure: enclosure = enclosure[0] - title_f = sqlite.get_feed_title(db_file, feed_id) - title_f = title_f[0] - news_digest += await list_unread_entries(self, result, title_f, jid_bare) - # print(db_file) - # print(result[0]) - # breakpoint() - await sqlite.mark_as_read(db_file, ix) - - # Find media - # if url.startswith("magnet:"): - # media = action.get_magnet(url) - # elif enclosure.startswith("magnet:"): - # media = action.get_magnet(enclosure) - # elif enclosure: - if show_media: - if enclosure: - media = enclosure - else: - media = await extract_image_from_html(url) - - if media and news_digest: - # Send textual message - XmppMessage.send(self, jid_bare, news_digest, chat_type) - news_digest = '' - # Send media - XmppMessage.send_oob(self, jid_bare, media, chat_type) - media = None - - if news_digest: - XmppMessage.send(self, jid_bare, news_digest, chat_type) - # TODO Add while loop to assure delivery. - # print(await current_time(), ">>> ACT send_message",jid) - # NOTE Do we need "if statement"? See NOTE at is_muc. - # if chat_type in ('chat', 'groupchat'): - # # TODO Provide a choice (with or without images) - # XmppMessage.send(self, jid, news_digest, chat_type) - # See XEP-0367 - # if media: - # # message = xmpp.Slixfeed.make_message( - # # self, mto=jid, mbody=new, mtype=chat_type) - # message = xmpp.Slixfeed.make_message( - # self, mto=jid, mbody=media, mtype=chat_type) - # message['oob']['url'] = media - # message.send() - - # TODO Do not refresh task before - # verifying that it was completed. - - # await start_tasks_xmpp_chat(self, jid, ['status']) - # await refresh_task(self, jid, send_update, 'interval') - - # interval = await initdb( - # jid, - # sqlite.is_setting_key, - # "interval" - # ) - # self.task_manager[jid]["interval"] = loop.call_at( - # loop.time() + 60 * interval, - # loop.create_task, - # send_update(jid) - # ) - - # print(await current_time(), "asyncio.get_event_loop().time()") - # print(await current_time(), asyncio.get_event_loop().time()) - # await asyncio.sleep(60 * interval) - - # loop.call_later( - # 60 * interval, - # loop.create_task, - # send_update(jid) - # ) - - # print - # await handle_event() - - -def manual(filename, section=None, command=None): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: filename: {}'.format(function_name, filename)) - config_dir = config.get_default_config_directory() - with open(config_dir + '/' + filename, mode="rb") as commands: - cmds = tomllib.load(commands) - if section == 'all': - cmd_list = '' - for cmd in cmds: - for i in cmds[cmd]: - cmd_list += cmds[cmd][i] + '\n' - elif command and section: - try: - cmd_list = cmds[section][command] - except KeyError as e: - logger.error(e) - cmd_list = None - elif section: - try: - cmd_list = [] - for cmd in cmds[section]: - cmd_list.extend([cmd]) - except KeyError as e: - logger.error('KeyError:' + str(e)) - cmd_list = None - else: - cmd_list = [] - for cmd in cmds: - cmd_list.extend([cmd]) - return cmd_list - - -def log_to_markdown(timestamp, filename, jid, message): - """ - Log message to file. - - Parameters - ---------- - timestamp : str - Time stamp. - filename : str - Jabber ID as name of file. - jid : str - Jabber ID. - message : str - Message content. - - Returns - ------- - None. - - """ - function_name = sys._getframe().f_code.co_name - logger.debug('{}: timestamp: {} filename: {} jid: {} message: {}'.format(function_name, timestamp, filename, jid, message)) - with open(filename + '.md', 'a') as file: - # entry = "{} {}:\n{}\n\n".format(timestamp, jid, message) - entry = ( - "## {}\n" - "### {}\n\n" - "{}\n\n").format(jid, timestamp, message) - file.write(entry) - - -def is_feed_json(document): - """ - - NOTE /kurtmckee/feedparser/issues/103 - - Determine whether document is json feed or not. - - Parameters - ---------- - feed : dict - Parsed feed. - - Returns - ------- - val : boolean - True or False. - """ - function_name = sys._getframe().f_code.co_name - logger.debug('{}'.format(function_name)) - value = False - try: - feed = json.loads(document) - if not feed['items']: - if "version" in feed.keys(): - if 'jsonfeed' in feed['version']: - value = True - else: # TODO Test - value = False - # elif 'title' in feed.keys(): - # value = True - else: - value = False - else: - value = True - except: - pass - return value - - -def is_feed(url, feed): - """ - Determine whether document is feed or not. - - Parameters - ---------- - feed : dict - Parsed feed. - - Returns - ------- - val : boolean - True or False. - """ - function_name = sys._getframe().f_code.co_name - logger.debug('{}'.format(function_name)) - value = False - # message = None - if not feed.entries: - if "version" in feed.keys(): - # feed["version"] - if feed.version: - value = True - # message = ( - # "Empty feed for {}" - # ).format(url) - elif "title" in feed["feed"].keys(): - value = True - # message = ( - # "Empty feed for {}" - # ).format(url) - else: - value = False - # message = ( - # "No entries nor title for {}" - # ).format(url) - elif feed.bozo: - # NOTE Consider valid even when is not-well-formed - value = True - logger.warning('Bozo detected for {}'.format(url)) - else: - value = True - # message = ( - # "Good feed for {}" - # ).format(url) - return value - - -async def list_unread_entries(self, result, feed_title, jid): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: feed_title: {} jid: {}' - .format(function_name, feed_title, jid)) - # TODO Add filtering - # TODO Do this when entry is added to list and mark it as read - # DONE! - # results = [] - # if sqlite.is_setting_key(db_file, "deny"): - # while len(results) < num: - # result = cur.execute(sql).fetchone() - # blacklist = sqlite.get_setting_value(db_file, "deny").split(",") - # for i in blacklist: - # if i in result[1]: - # continue - # print("rejected:", result[1]) - # print("accepted:", result[1]) - # results.extend([result]) - - # news_list = "You've got {} news items:\n".format(num) - # NOTE Why doesn't this work without list? - # i.e. for result in results - # for result in results.fetchall(): - ix = str(result[0]) - title = str(result[1]) - # # TODO Retrieve summary from feed - # # See fetch.view_entry - summary = result[3] - # Remove HTML tags - try: - title = BeautifulSoup(title, "lxml").text - summary = BeautifulSoup(summary, "lxml").text - except: - print('Found issue at action.py') - print(result[3]) - breakpoint() - # TODO Limit text length - # summary = summary.replace("\n\n\n", "\n\n") - summary = summary.replace('\n', ' ') - summary = summary.replace(' ', ' ') - summary = summary.replace(' ', ' ') - length = Config.get_setting_value(self.settings, jid, 'length') - length = int(length) - summary = summary[:length] + " […]" - # summary = summary.strip().split('\n') - # summary = ["> " + line for line in summary] - # summary = "\n".join(summary) - link = result[2] - link = remove_tracking_parameters(link) - link = await replace_hostname(link, "link") or link - # news_item = ("\n{}\n{}\n{} [{}]\n").format(str(title), str(link), - # str(feed_title), str(ix)) - formatting = Config.get_setting_value(self.settings, jid, 'formatting') - news_item = formatting.format(feed_title=feed_title, - title=title, - summary=summary, - link=link, - ix=ix) - # news_item = news_item.replace('\\n', '\n') - return news_item - - -def pick_a_feed(lang=None): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: lang: {}' - .format(function_name, lang)) - config_dir = config.get_default_config_directory() - with open(config_dir + '/' + 'feeds.toml', mode="rb") as feeds: - urls = tomllib.load(feeds) - import random - url = random.choice(urls['feeds']) - return url - - -def export_to_markdown(jid, filename, results): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: jid: {} filename: {}' - .format(function_name, jid, filename)) - with open(filename, 'w') as file: - file.write('# Subscriptions for {}\n'.format(jid)) - file.write('## Set of feeds exported with Slixfeed\n') - for result in results: - file.write('- [{}]({})\n'.format(result[1], result[2])) - file.write('\n\n* * *\n\nThis list was saved on {} from xmpp:{} using ' - '[Slixfeed](https://gitgud.io/sjehuda/slixfeed)\n' - .format(dt.current_date(), jid)) - - -async def add_feed(self, jid_bare, db_file, url, identifier): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: db_file: {} url: {}' - .format(function_name, db_file, url)) - while True: - feed_id = sqlite.get_feed_id(db_file, url) - if not feed_id: - exist_identifier = sqlite.check_identifier_exist(db_file, identifier) - if not exist_identifier: - result = await fetch.http(url) - message = result['message'] - status_code = result['status_code'] - if not result['error']: - await sqlite.update_feed_status(db_file, feed_id, status_code) - document = result['content'] - feed = parse(document) - # if document and status_code == 200: - if is_feed(url, feed): - if "title" in feed["feed"].keys(): - title = feed["feed"]["title"] - else: - title = urlsplit(url).netloc - if "language" in feed["feed"].keys(): - language = feed["feed"]["language"] - else: - language = '' - if "encoding" in feed.keys(): - encoding = feed["encoding"] - else: - encoding = '' - if "updated_parsed" in feed["feed"].keys(): - updated = feed["feed"]["updated_parsed"] - try: - updated = dt.convert_struct_time_to_iso8601(updated) - except: - updated = '' - else: - updated = '' - version = feed.version - entries_count = len(feed.entries) - await sqlite.insert_feed(db_file, - url, - title, - identifier, - entries=entries_count, - version=version, - encoding=encoding, - language=language, - status_code=status_code, - updated=updated) - feed_valid = 0 if feed.bozo else 1 - await sqlite.update_feed_validity(db_file, feed_id, feed_valid) - if feed.has_key('updated_parsed'): - feed_updated = feed.updated_parsed - try: - feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) - except: - feed_updated = None - else: - feed_updated = None - feed_properties = get_properties_of_feed(db_file, - feed_id, feed) - await sqlite.update_feed_properties(db_file, feed_id, - feed_properties) - feed_id = sqlite.get_feed_id(db_file, url) - feed_id = feed_id[0] - new_entries = get_properties_of_entries( - jid_bare, db_file, url, feed_id, feed) - if new_entries: - await sqlite.add_entries_and_update_feed_state( - db_file, feed_id, new_entries) - old = Config.get_setting_value(self.settings, jid_bare, 'old') - if not old: await sqlite.mark_feed_as_read(db_file, feed_id) - result_final = {'link' : url, - 'index' : feed_id, - 'name' : title, - 'code' : status_code, - 'error' : False, - 'message': message, - 'exist' : False, - 'identifier' : None} - break - # NOTE This elif statement be unnecessary - # when feedparser be supporting json feed. - elif is_feed_json(document): - feed = json.loads(document) - if "title" in feed.keys(): - title = feed["title"] - else: - title = urlsplit(url).netloc - if "language" in feed.keys(): - language = feed["language"] - else: - language = '' - if "encoding" in feed.keys(): - encoding = feed["encoding"] - else: - encoding = '' - if "date_published" in feed.keys(): - updated = feed["date_published"] - try: - updated = dt.convert_struct_time_to_iso8601(updated) - except: - updated = '' - else: - updated = '' - version = 'json' + feed["version"].split('/').pop() - entries_count = len(feed["items"]) - await sqlite.insert_feed(db_file, - url, - title, - identifier, - entries=entries_count, - version=version, - encoding=encoding, - language=language, - status_code=status_code, - updated=updated) - await scan_json(self, jid_bare, db_file, url) - old = Config.get_setting_value(self.settings, jid_bare, 'old') - if not old: - feed_id = sqlite.get_feed_id(db_file, url) - feed_id = feed_id[0] - await sqlite.mark_feed_as_read(db_file, feed_id) - result_final = {'link' : url, - 'index' : feed_id, - 'name' : title, - 'code' : status_code, - 'error' : False, - 'message': message, - 'exist' : False, - 'identifier' : None} - break - else: - # NOTE Do not be tempted to return a compact dictionary. - # That is, dictionary within dictionary - # Return multiple dictionaries in a list or tuple. - result = await crawl.probe_page(url, document) - if not result: - # Get out of the loop with dict indicating error. - result_final = {'link' : url, - 'index' : None, - 'name' : None, - 'code' : status_code, - 'error' : True, - 'message': message, - 'exist' : False, - 'identifier' : None} - break - elif isinstance(result, list): - # Get out of the loop and deliver a list of dicts. - result_final = result - break - else: - # Go back up to the while loop and try again. - url = result['link'] - else: - await sqlite.update_feed_status(db_file, feed_id, status_code) - result_final = {'link' : url, - 'index' : None, - 'name' : None, - 'code' : status_code, - 'error' : True, - 'message': message, - 'exist' : False, - 'identifier' : None} - break - else: - ix = exist_identifier[1] - identifier = exist_identifier[2] - message = ('Identifier "{}" is already allocated.' - .format(identifier)) - result_final = {'link' : url, - 'index' : ix, - 'name' : None, - 'code' : None, - 'error' : False, - 'message': message, - 'exist' : False, - 'identifier' : identifier} - break - else: - feed_id = feed_id[0] - title = sqlite.get_feed_title(db_file, feed_id) - title = title[0] - message = 'URL already exist.' - result_final = {'link' : url, - 'index' : feed_id, - 'name' : title, - 'code' : None, - 'error' : False, - 'message': message, - 'exist' : True, - 'identifier' : None} - break - return result_final - - -async def scan_json(self, jid_bare, db_file, url): - """ - Check feeds for new entries. - - Parameters - ---------- - db_file : str - Path to database file. - url : str, optional - URL. The default is None. - """ - function_name = sys._getframe().f_code.co_name - logger.debug('{}: db_file: {} url: {}' - .format(function_name, db_file, url)) - if isinstance(url, tuple): url = url[0] - result = await fetch.http(url) - if not result['error']: - document = result['content'] - status = result['status_code'] - new_entries = [] - if document and status == 200: - feed = json.loads(document) - entries = feed["items"] - await remove_nonexistent_entries_json(self, jid_bare, db_file, url, feed) - try: - feed_id = sqlite.get_feed_id(db_file, url) - feed_id = feed_id[0] - # await sqlite.update_feed_validity( - # db_file, feed_id, valid) - if "date_published" in feed.keys(): - updated = feed["date_published"] - try: - updated = dt.convert_struct_time_to_iso8601(updated) - except: - updated = '' - else: - updated = '' - feed_id = sqlite.get_feed_id(db_file, url) - feed_id = feed_id[0] - await sqlite.update_feed_properties( - db_file, feed_id, len(feed["items"]), updated) - # await update_feed_status - except ( - IncompleteReadError, - IncompleteRead, - error.URLError - ) as e: - logger.error(e) - return - # new_entry = 0 - for entry in entries: - logger.debug('{}: entry: {}' - .format(function_name, entry["title"])) - if "date_published" in entry.keys(): - date = entry["date_published"] - date = dt.rfc2822_to_iso8601(date) - elif "date_modified" in entry.keys(): - date = entry["date_modified"] - date = dt.rfc2822_to_iso8601(date) - else: - date = dt.now() - if "url" in entry.keys(): - # link = complete_url(source, entry.link) - link = join_url(url, entry["url"]) - link = trim_url(link) - else: - link = url - # title = feed["feed"]["title"] - # title = "{}: *{}*".format(feed["feed"]["title"], entry.title) - title = entry["title"] if "title" in entry.keys() else date - entry_id = entry["id"] if "id" in entry.keys() else link - feed_id = sqlite.get_feed_id(db_file, url) - feed_id = feed_id[0] - exist = sqlite.check_entry_exist(db_file, feed_id, - entry_id=entry_id, - title=title, link=link, - date=date) - if not exist: - summary = entry["summary"] if "summary" in entry.keys() else '' - if not summary: - summary = (entry["content_html"] - if "content_html" in entry.keys() - else '') - if not summary: - summary = (entry["content_text"] - if "content_text" in entry.keys() - else '') - read_status = 0 - pathname = urlsplit(link).path - string = ( - "{} {} {}" - ).format( - title, summary, pathname) - if self.settings['default']['filter']: - print('Filter is now processing data.') - allow_list = config.is_include_keyword(db_file, - "allow", string) - if not allow_list: - reject_list = config.is_include_keyword(db_file, - "deny", - string) - if reject_list: - read_status = 1 - logger.debug('Rejected : {}' - '\n' - 'Keyword : {}' - .format(link, reject_list)) - if isinstance(date, int): - logger.error('Variable "date" is int: {}'.format(date)) - media_link = '' - if "attachments" in entry.keys(): - for e_link in entry["attachments"]: - try: - # if (link.rel == "enclosure" and - # (link.type.startswith("audio/") or - # link.type.startswith("image/") or - # link.type.startswith("video/")) - # ): - media_type = e_link["mime_type"][:e_link["mime_type"].index("/")] - if media_type in ("audio", "image", "video"): - media_link = e_link["url"] - media_link = join_url(url, e_link["url"]) - media_link = trim_url(media_link) - break - except: - logger.error('KeyError: "url"\n' - 'Missing "url" attribute for {}' - .format(url)) - logger.error('Continue scanning for next ' - 'potential enclosure of {}' - .format(link)) - entry = { - "title": title, - "link": link, - "enclosure": media_link, - "entry_id": entry_id, - "date": date, - "read_status": read_status - } - new_entries.extend([entry]) - # await sqlite.add_entry( - # db_file, title, link, entry_id, - # url, date, read_status) - # await sqlite.set_date(db_file, url) - if len(new_entries): - feed_id = sqlite.get_feed_id(db_file, url) - feed_id = feed_id[0] - await sqlite.add_entries_and_update_feed_state(db_file, feed_id, - new_entries) - - -def view_feed(url, feed): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: url: {}' - .format(function_name, url)) - if "title" in feed["feed"].keys(): - title = feed["feed"]["title"] - else: - title = urlsplit(url).netloc - entries = feed.entries - response = "Preview of {}:\n\n```\n".format(title) - counter = 0 - for entry in entries: - counter += 1 - if entry.has_key("title"): - title = entry.title - else: - title = "*** No title ***" - if entry.has_key("link"): - # link = complete_url(source, entry.link) - link = join_url(url, entry.link) - link = trim_url(link) - else: - link = "*** No link ***" - if entry.has_key("published"): - date = entry.published - date = dt.rfc2822_to_iso8601(date) - elif entry.has_key("updated"): - date = entry.updated - date = dt.rfc2822_to_iso8601(date) - else: - date = "*** No date ***" - response += ("Title : {}\n" - "Date : {}\n" - "Link : {}\n" - "Count : {}\n" - "\n" - .format(title, date, link, counter)) - if counter > 4: - break - response += ( - "```\nSource: {}" - ).format(url) - return response - - -def view_entry(url, feed, num): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: url: {} num: {}' - .format(function_name, url, num)) - if "title" in feed["feed"].keys(): - title = feed["feed"]["title"] - else: - title = urlsplit(url).netloc - entries = feed.entries - num = int(num) - 1 - entry = entries[num] - response = "Preview of {}:\n\n```\n".format(title) - if entry.has_key("title"): - title = entry.title - else: - title = "*** No title ***" - if entry.has_key("published"): - date = entry.published - date = dt.rfc2822_to_iso8601(date) - elif entry.has_key("updated"): - date = entry.updated - date = dt.rfc2822_to_iso8601(date) - else: - date = "*** No date ***" - if entry.has_key("summary"): - summary = entry.summary - # Remove HTML tags - summary = BeautifulSoup(summary, "lxml").text - # TODO Limit text length - summary = summary.replace("\n\n\n", "\n\n") - else: - summary = "*** No summary ***" - if entry.has_key("link"): - # link = complete_url(source, entry.link) - link = join_url(url, entry.link) - link = trim_url(link) - else: - link = "*** No link ***" - response = ("{}\n" - "\n" - # "> {}\n" - "{}\n" - "\n" - "{}\n" - "\n" - .format(title, summary, link)) - return response - - -async def download_feed(self, db_file, feed_url): - """ - Get feed content. - - Parameters - ---------- - db_file : str - Path to database file. - url : str, optional - URL. - """ - function_name = sys._getframe().f_code.co_name - logger.debug('{}: db_file: {} url: {}' - .format(function_name, db_file, feed_url)) - if isinstance(feed_url, tuple): feed_url = feed_url[0] - result = await fetch.http(feed_url) - feed_id = sqlite.get_feed_id(db_file, feed_url) - feed_id = feed_id[0] - status_code = result['status_code'] - await sqlite.update_feed_status(db_file, feed_id, status_code) - - -def get_properties_of_feed(db_file, feed_id, feed): - - if feed.has_key('updated_parsed'): - feed_updated = feed.updated_parsed - try: - feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) - except: - feed_updated = '' - else: - feed_updated = '' - - entries_count = len(feed.entries) - - feed_version = feed.version if feed.has_key('version') else '' - feed_encoding = feed.encoding if feed.has_key('encoding') else '' - feed_language = feed.feed.language if feed.feed.has_key('language') else '' - feed_icon = feed.feed.icon if feed.feed.has_key('icon') else '' - feed_image = feed.feed.image.href if feed.feed.has_key('image') else '' - feed_logo = feed.feed.logo if feed.feed.has_key('logo') else '' - feed_ttl = feed.feed.ttl if feed.feed.has_key('ttl') else '' - - feed_properties = { - "version" : feed_version, - "encoding" : feed_encoding, - "language" : feed_language, - "rating" : '', - "entries_count" : entries_count, - "icon" : feed_icon, - "image" : feed_image, - "logo" : feed_logo, - "ttl" : feed_ttl, - "updated" : feed_updated, - } - - return feed_properties - -# TODO get all active feeds of active accounts and scan the feed with the earliest scanned time -# TODO Rename function name (idea: scan_and_populate) -def get_properties_of_entries(jid_bare, db_file, feed_url, feed_id, feed): - """ - Get new entries. - - Parameters - ---------- - db_file : str - Path to database file. - url : str, optional - URL. - """ - # print('MID', feed_url, jid_bare, 'get_properties_of_entries') - function_name = sys._getframe().f_code.co_name - logger.debug('{}: feed_id: {} url: {}' - .format(function_name, feed_id, feed_url)) - - new_entries = [] - for entry in feed.entries: - logger.debug('{}: entry: {}'.format(function_name, entry.link)) - if entry.has_key("published"): - entry_published = entry.published - entry_published = dt.rfc2822_to_iso8601(entry_published) - else: - entry_published = '' - if entry.has_key("updated"): - entry_updated = entry.updated - entry_updated = dt.rfc2822_to_iso8601(entry_updated) - else: - entry_updated = dt.now() - if entry.has_key("link"): - # link = complete_url(source, entry.link) - entry_link = join_url(feed_url, entry.link) - entry_link = trim_url(entry_link) - else: - entry_link = feed_url - # title = feed["feed"]["title"] - # title = "{}: *{}*".format(feed["feed"]["title"], entry.title) - entry_title = entry.title if entry.has_key("title") else entry_published - entry_id = entry.id if entry.has_key("id") else entry_link - exist = sqlite.check_entry_exist(db_file, feed_id, - identifier=entry_id, - title=entry_title, - link=entry_link, - published=entry_published) - if not exist: - read_status = 0 - # # Filter - # pathname = urlsplit(link).path - # string = ( - # "{} {} {}" - # ).format( - # title, summary, pathname) - # if self.settings['default']['filter']: - # print('Filter is now processing data.') - # allow_list = config.is_include_keyword(db_file, - # "allow", string) - # if not allow_list: - # reject_list = config.is_include_keyword(db_file, - # "deny", - # string) - # if reject_list: - # read_status = 1 - # logger.debug('Rejected : {}' - # '\n' - # 'Keyword : {}' - # .format(link, reject_list)) - if isinstance(entry_published, int): - logger.error('Variable "published" is int: {}'.format(entry_published)) - if isinstance(entry_updated, int): - logger.error('Variable "updated" is int: {}'.format(entry_updated)) - - # Authors - entry_authors =[] - if entry.has_key('authors'): - for author in entry.authors: - author_properties = { - 'name' : author.name if author.has_key('name') else '', - 'url' : author.href if author.has_key('href') else '', - 'email' : author.email if author.has_key('email') else '', - } - entry_authors.extend([author_properties]) - elif entry.has_key('author_detail'): - author_properties = { - 'name' : entry.author_detail.name if entry.author_detail.has_key('name') else '', - 'url' : entry.author_detail.href if entry.author_detail.has_key('href') else '', - 'email' : entry.author_detail.email if entry.author_detail.has_key('email') else '', - } - entry_authors.extend([author_properties]) - elif entry.has_key('author'): - author_properties = { - 'name' : entry.author, - 'url' : '', - 'email' : '', - } - entry_authors.extend([author_properties]) - - # Contributors - entry_contributors = [] - if entry.has_key('contributors'): - for contributor in entry.contributors: - contributor_properties = { - 'name' : contributor.name if contributor.has_key('name') else '', - 'url' : contributor.href if contributor.has_key('href') else '', - 'email' : contributor.email if contributor.has_key('email') else '', - } - entry_contributors.extend([contributor_properties]) - - # Tags - entry_tags = [] - if entry.has_key('tags'): - for tag in entry.tags: - tag_properties = { - 'term' : tag.term if tag.has_key('term') else '', - 'scheme' : tag.scheme if tag.has_key('scheme') else '', - 'label' : tag.label if tag.has_key('label') else '', - } - entry_tags.extend([tag_properties]) - - # Content - entry_contents = [] - if entry.has_key('content'): - for content in entry.content: - text = content.value if content.has_key('value') else '' - type = content.type if content.has_key('type') else '' - lang = content.lang if content.has_key('lang') else '' - base = content.base if content.has_key('base') else '' - entry_content = { - 'text' : text, - 'lang' : lang, - 'type' : type, - 'base' : base, - } - entry_contents.extend([entry_content]) - - # Links and Enclosures - entry_links = [] - if entry.has_key('links'): - for link in entry.links: - link_properties = { - 'url' : link.href if link.has_key('href') else '', - 'rel' : link.rel if link.has_key('rel') else '', - 'type' : link.type if link.has_key('type') else '', - 'length' : '', - } - entry_links.extend([link_properties]) - # Element media:content is utilized by Mastodon - if entry.has_key('media_content'): - for link in entry.media_content: - link_properties = { - 'url' : link['url'] if 'url' in link else '', - 'rel' : 'enclosure', - 'type' : link['type'] if 'type' in link else '', - # 'medium' : link['medium'] if 'medium' in link else '', - 'length' : link['filesize'] if 'filesize' in link else '', - } - entry_links.extend([link_properties]) - if entry.has_key('media_thumbnail'): - for link in entry.media_thumbnail: - link_properties = { - 'url' : link['url'] if 'url' in link else '', - 'rel' : 'enclosure', - 'type' : '', - # 'medium' : 'image', - 'length' : '', - } - entry_links.extend([link_properties]) - - # Category - entry_category = entry.category if entry.has_key('category') else '' - - # Comments - entry_comments = entry.comments if entry.has_key('comments') else '' - - # href - entry_href = entry.href if entry.has_key('href') else '' - - # Link: Same as entry.links[0].href in most if not all cases - entry_link = entry.link if entry.has_key('link') else '' - - # Rating - entry_rating = entry.rating if entry.has_key('rating') else '' - - # Summary - entry_summary_text = entry.summary if entry.has_key('summary') else '' - if entry.has_key('summary_detail'): - entry_summary_type = entry.summary_detail.type if entry.summary_detail.has_key('type') else '' - entry_summary_lang = entry.summary_detail.lang if entry.summary_detail.has_key('lang') else '' - entry_summary_base = entry.summary_detail.base if entry.summary_detail.has_key('base') else '' - else: - entry_summary_type = '' - entry_summary_lang = '' - entry_summary_base = '' - - # Title - entry_title = entry.title if entry.has_key('title') else '' - if entry.has_key('title_detail'): - entry_title_type = entry.title_detail.type if entry.title_detail.has_key('type') else '' - else: - entry_title_type = '' - - ########################################################### - - # media_type = e_link.type[:e_link.type.index("/")] - # if (e_link.rel == "enclosure" and - # media_type in ("audio", "image", "video")): - # media_link = e_link.href - # media_link = join_url(url, e_link.href) - # media_link = trim_url(media_link) - - ########################################################### - - entry_properties = { - "identifier": entry_id, - "link": entry_link, - "href": entry_href, - "title": entry_title, - "title_type": entry_title_type, - 'summary_text' : entry_summary_text, - 'summary_lang' : entry_summary_lang, - 'summary_type' : entry_summary_type, - 'summary_base' : entry_summary_base, - 'category' : entry_category, - "comments": entry_comments, - "rating": entry_rating, - "published": entry_published, - "updated": entry_updated, - "read_status": read_status - } - - new_entries.extend([{ - "entry_properties" : entry_properties, - "entry_authors" : entry_authors, - "entry_contributors" : entry_contributors, - "entry_contents" : entry_contents, - "entry_links" : entry_links, - "entry_tags" : entry_tags - }]) - # await sqlite.add_entry( - # db_file, title, link, entry_id, - # url, date, read_status) - # await sqlite.set_date(db_file, url) - return new_entries - - -async def extract_image_from_feed(db_file, feed_id, url): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: db_file: {} feed_id: {} url: {}' - .format(function_name, db_file, feed_id, url)) - feed_url = sqlite.get_feed_url(db_file, feed_id) - feed_url = feed_url[0] - result = await fetch.http(feed_url) - if not result['error']: - document = result['content'] - feed = parse(document) - for entry in feed.entries: - try: - if entry.link == url: - for link in entry.links: - if (link.rel == "enclosure" and - link.type.startswith("image/")): - image_url = link.href - return image_url - except: - logger.error(url) - logger.error('AttributeError: object has no attribute "link"') - - -async def extract_image_from_html(url): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: url: {}'.format(function_name, url)) - result = await fetch.http(url) - if not result['error']: - data = result['content'] - tree = html.fromstring(data) - # TODO Exclude banners, class="share" links etc. - images = tree.xpath( - '//img[not(' - 'contains(@src, "avatar") or ' - 'contains(@src, "emoji") or ' - 'contains(@src, "icon") or ' - 'contains(@src, "logo") or ' - 'contains(@src, "letture") or ' - 'contains(@src, "search") or ' - 'contains(@src, "share") or ' - 'contains(@src, "smiley")' - ')]/@src') - if len(images): - image = images[0] - image = str(image) - image_url = complete_url(url, image) - return image_url - - -# This works too -# ''.join(xml.etree.ElementTree.fromstring(text).itertext()) -def remove_html_tags(data): - function_name = sys._getframe().f_code.co_name - logger.debug('{}'.format(function_name)) - data = BeautifulSoup(data, "lxml").text - data = data.replace("\n\n", "\n") - return data - -# TODO Add support for eDonkey, Gnutella, Soulseek -async def get_magnet(link): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: {}'.format(function_name, link)) - parted_link = urlsplit(link) - queries = parse_qs(parted_link.query) - query_xt = queries["xt"][0] - if query_xt.startswith("urn:btih:"): - filename = queries["dn"][0] - checksum = query_xt[len("urn:btih:"):] - torrent = await fetch.magnet(link) - logger.debug('Attempting to retrieve {} ({})' - .format(filename, checksum)) - if not torrent: - logger.debug('Attempting to retrieve {} from HTTP caching service' - .format(filename)) - urls = [ - 'https://watercache.libertycorp.org/get/{}/{}', - 'https://itorrents.org/torrent/{}.torrent?title={}', - 'https://firecache.libertycorp.org/get/{}/{}', - 'http://fcache63sakpihd44kxdduy6kgpdhgejgp323wci435zwy6kiylcnfad.onion/get/{}/{}' - ] - for url in urls: - torrent = fetch.http(url.format(checksum, filename)) - if torrent: - break - return torrent - - -async def remove_nonexistent_entries(self, jid_bare, db_file, url, feed): - """ - Remove entries that don't exist in a given parsed feed. - Check the entries returned from feed and delete read non - existing entries, otherwise move to table archive, if unread. - - Parameters - ---------- - db_file : str - Path to database file. - url : str - Feed URL. - feed : list - Parsed feed document. - """ - function_name = sys._getframe().f_code.co_name - logger.debug('{}: db_file: {} url: {}' - .format(function_name, db_file, url)) - feed_id = sqlite.get_feed_id(db_file, url) - feed_id = feed_id[0] - items = sqlite.get_entries_of_feed(db_file, feed_id) - entries = feed.entries - limit = Config.get_setting_value(self.settings, jid_bare, 'archive') - for item in items: - ix = item[0] - entry_title = item[1] - entry_link = item[2] - entry_id = item[3] - timestamp = item[4] - read_status = item[5] - valid = False - for entry in entries: - title = None - link = None - time = None - # valid = False - # TODO better check and don't repeat code - if entry.has_key("id") and entry_id: - if entry.id == entry_id: - # print("compare1:", entry.id) - # print("compare2:", entry_id) - # print("============") - valid = True - break - else: - if entry.has_key("title"): - title = entry.title - else: - title = feed["feed"]["title"] - if entry.has_key("link"): - link = join_url(url, entry.link) - else: - link = url - if entry.has_key("published") and timestamp: - # print("compare11:", title, link, time) - # print("compare22:", entry_title, entry_link, timestamp) - # print("============") - time = dt.rfc2822_to_iso8601(entry.published) - if (entry_title == title and - entry_link == link and - timestamp == time): - valid = True - break - else: - if (entry_title == title and - entry_link == link): - # print("compare111:", title, link) - # print("compare222:", entry_title, entry_link) - # print("============") - valid = True - break - # TODO better check and don't repeat code - if not valid: - # print("id: ", ix) - # if title: - # print("title: ", title) - # print("entry_title: ", entry_title) - # if link: - # print("link: ", link) - # print("entry_link: ", entry_link) - # if entry.id: - # print("last_entry:", entry.id) - # print("entry_id: ", entry_id) - # if time: - # print("time: ", time) - # print("timestamp: ", timestamp) - # print("read: ", read_status) - # breakpoint() - - # TODO Send to table archive - # TODO Also make a regular/routine check for sources that - # have been changed (though that can only happen when - # manually editing) - # ix = item[0] - # print(">>> SOURCE: ", source) - # print(">>> INVALID:", entry_title) - # print("title:", entry_title) - # print("link :", entry_link) - # print("id :", entry_id) - if read_status == 1: - await sqlite.delete_entry_by_id(db_file, ix) - # print(">>> DELETING:", entry_title) - else: - # print(">>> ARCHIVING:", entry_title) - await sqlite.archive_entry(db_file, ix) - await sqlite.maintain_archive(db_file, limit) - - - -async def remove_nonexistent_entries_json(self, jid_bare, db_file, url, feed): - """ - Remove entries that don't exist in a given parsed feed. - Check the entries returned from feed and delete read non - existing entries, otherwise move to table archive, if unread. - - Parameters - ---------- - db_file : str - Path to database file. - url : str - Feed URL. - feed : list - Parsed feed document. - """ - function_name = sys._getframe().f_code.co_name - logger.debug('{}: db_file: {}: url: {}' - .format(function_name, db_file, url)) - feed_id = sqlite.get_feed_id(db_file, url) - feed_id = feed_id[0] - items = sqlite.get_entries_of_feed(db_file, feed_id) - entries = feed["items"] - limit = Config.get_setting_value(self.settings, jid_bare, 'archive') - for item in items: - ix = item[0] - entry_title = item[1] - entry_link = item[2] - entry_id = item[3] - timestamp = item[4] - read_status = item[5] - valid = False - for entry in entries: - title = None - link = None - time = None - # valid = False - # TODO better check and don't repeat code - if entry.has_key("id") and entry_id: - if entry["id"] == entry_id: - # print("compare1:", entry.id) - # print("compare2:", entry_id) - # print("============") - valid = True - break - else: - if entry.has_key("title"): - title = entry["title"] - else: - title = feed["title"] - if entry.has_key("link"): - link = join_url(url, entry["link"]) - else: - link = url - # "date_published" "date_modified" - if entry.has_key("date_published") and timestamp: - time = dt.rfc2822_to_iso8601(entry["date_published"]) - if (entry_title == title and - entry_link == link and - timestamp == time): - valid = True - break - else: - if (entry_title == title and - entry_link == link): - valid = True - break - if not valid: - print("CHECK ENTRY OF JSON FEED IN ARCHIVE") - if read_status == 1: - await sqlite.delete_entry_by_id(db_file, ix) - else: - await sqlite.archive_entry(db_file, ix) - await sqlite.maintain_archive(db_file, limit) diff --git a/slixfeed/assets/information.toml b/slixfeed/assets/information.toml index 097affc..0311878 100644 --- a/slixfeed/assets/information.toml +++ b/slixfeed/assets/information.toml @@ -3,14 +3,14 @@ info = """ Slixfeed is a news broker bot for syndicated news which aims to be \ an easy to use and fully-featured news aggregating bot. -Slixfeed provides a convenient access to Blogs, News websites and \ +Slixfeed provides a convenient access to Blogs, News sites and \ even Fediverse instances, along with filtering and other privacy \ driven functionalities. Slixfeed is designed primarily for the XMPP communication network \ (aka Jabber). -https://gitgud.io/sjehuda/slixfeed +https://git.xmpp-it.net/sch/Slixfeed """ [note] @@ -160,6 +160,15 @@ https://mov.im Poezio https://poez.io + +Psi +https://psi-im.org + +Psi+ +https://psi-plus.com + +Profanity +https://profanity-im.github.io """ [services] diff --git a/slixfeed/assets/settings.toml b/slixfeed/assets/settings.toml index 58a59a6..0f32f53 100644 --- a/slixfeed/assets/settings.toml +++ b/slixfeed/assets/settings.toml @@ -6,6 +6,7 @@ archive = 50 # Maximum items to archive (0 - 500) check = 120 # Source check interval (recommended 90; minimum 10) enabled = 1 # Work status (Value 0 to disable) filter = 0 # Enable filters (Value 1 to enable) +finished = 0 # Send an extra message which indicates of the amount of time of a done task (Value 1 to enable) interval = 300 # Update interval (Minimum value 10) length = 300 # Maximum length of summary (Value 0 to disable) media = 0 # Display media (audio, image, video) when available @@ -20,13 +21,16 @@ random = 0 # Pick random item from database # * feed_title = Title of news source # * ix = Index of item formatting = """ -{title} +{ix}. {title} > {summary} {link} -{feed_title} [{ix}] +{feed_title} [{feed_id}] """ +[ipc] +bsd = 0 # IPC (BSD/UDS) POSIX sockets + # Utilized in case of missing protocol support. [bridge] gopher = "" @@ -37,11 +41,11 @@ tor = "" yggdrasil = "" [network] -http_proxy = "" -user_agent = "Slixfeed/0.1" -clearnet = 0 # Enable policed DNS system (not recommended) -i2p = 1 # Enable I2P mixnet system (safer) -ipfs = 1 # Enable IPFS DHT system (safer) -loki = 1 # Enable Loki mixnet system (safer) -tor = 1 # Enable Tor semi-mixnet system (semi-safer) -yggdrasil = 1 # Enable Yggdrasil mixnet system (safer) +http_proxy = "" # Example: http://127.0.0.1:8118 +user_agent = "Slixfeed/0.1" # Default Slixfeed/0.1 +clearnet = 0 # Enable policed DNS system (not recommended) +i2p = 1 # Enable I2P mixnet system (safer) +ipfs = 1 # Enable IPFS DHT system (safer) +loki = 1 # Enable Loki mixnet system (safer) +tor = 1 # Enable Tor semi-mixnet system (semi-safer) +yggdrasil = 1 # Enable Yggdrasil mixnet system (safer) diff --git a/slixfeed/bittorrent.py b/slixfeed/bittorrent.py new file mode 100644 index 0000000..891b9a8 --- /dev/null +++ b/slixfeed/bittorrent.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import slixfeed.fetch as fetch +from slixfeed.log import Logger +import sys +from urllib.parse import parse_qs, urlsplit + +logger = Logger(__name__) + +class BitTorrent: + + # TODO Add support for eDonkey, Gnutella, Soulseek + async def get_magnet(link): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: {}'.format(function_name, link)) + parted_link = urlsplit(link) + queries = parse_qs(parted_link.query) + query_xt = queries["xt"][0] + if query_xt.startswith("urn:btih:"): + filename = queries["dn"][0] + checksum = query_xt[len("urn:btih:"):] + torrent = await fetch.magnet(link) + logger.debug('Attempting to retrieve {} ({})' + .format(filename, checksum)) + if not torrent: + logger.debug('Attempting to retrieve {} from HTTP caching service' + .format(filename)) + urls = [ + 'https://watercache.libertycorp.org/get/{}/{}', + 'https://itorrents.org/torrent/{}.torrent?title={}', + 'https://firecache.libertycorp.org/get/{}/{}', + 'http://fcache63sakpihd44kxdduy6kgpdhgejgp323wci435zwy6kiylcnfad.onion/get/{}/{}' + ] + for url in urls: + torrent = fetch.http(url.format(checksum, filename)) + if torrent: + break + return torrent \ No newline at end of file diff --git a/slixfeed/config.py b/slixfeed/config.py index a8ed1b8..51cad6d 100644 --- a/slixfeed/config.py +++ b/slixfeed/config.py @@ -11,9 +11,9 @@ FIXME TODO -1) Website-specific filter (i.e. audiobookbay). +1) Site-specific filter (i.e. audiobookbay). -2) Exclude websites from being subjected to filtering (e.g. metapedia). +2) Exclude sites from being subjected to filtering (e.g. metapedia). 3) Filter phrases: Refer to sqlitehandler.search_entries for implementation. @@ -32,7 +32,7 @@ TODO """ import configparser -import logging +from slixfeed.log import Logger import os # from random import randrange import slixfeed.sqlite as sqlite @@ -43,6 +43,8 @@ try: except: import tomli as tomllib +logger = Logger(__name__) + # TODO Consider a class ConfigDefault for default values to be initiate at most # basic level possible and a class ConfigJID for each JID (i.e. db_file) to be # also initiated at same level or at least at event call, then check whether @@ -75,6 +77,8 @@ class Config: else: await sqlite.set_setting_value(db_file, key_val) + # TODO Segregate Jabber ID settings from Slixfeed wide settings. + # self.settings, self.settings_xmpp, self.settings_irc etc. def get_setting_value(settings, jid_bare, key): if jid_bare in settings and key in settings[jid_bare]: value = settings[jid_bare][key] @@ -248,21 +252,21 @@ def get_value(filename, section, keys): for key in keys: if key in section_res: value = section_res[key] - logging.debug("Found value {} for key {}".format(value, key)) + logger.debug("Found value {} for key {}".format(value, key)) else: value = '' - logging.debug("Missing key:", key) + logger.debug("Missing key:", key) result.extend([value]) elif isinstance(keys, str): key = keys if key in section_res: result = section_res[key] - logging.debug("Found value {} for key {}".format(result, key)) + logger.debug("Found value {} for key {}".format(result, key)) else: result = '' - # logging.error("Missing key:", key) + # logger.error("Missing key:", key) if result == None: - logging.error( + logger.error( "Check configuration file {}.ini for " "missing key(s) \"{}\" under section [{}].".format( filename, keys, section) diff --git a/slixfeed/fetch.py b/slixfeed/fetch.py index 01cafd4..57aca50 100644 --- a/slixfeed/fetch.py +++ b/slixfeed/fetch.py @@ -39,37 +39,39 @@ NOTE from aiohttp import ClientError, ClientSession, ClientTimeout from asyncio import TimeoutError # from asyncio.exceptions import IncompleteReadError -# from bs4 import BeautifulSoup # from http.client import IncompleteRead -import logging # from lxml import html # from xml.etree.ElementTree import ElementTree, ParseError import requests import slixfeed.config as config +from slixfeed.log import Logger + +logger = Logger(__name__) + try: from magnet2torrent import Magnet2Torrent, FailedToFetchException except: - logging.info( + logger.info( "Package magnet2torrent was not found.\n" "BitTorrent is disabled.") -# class FetchDat: +# class Dat: # async def dat(): -# class FetchFtp: +# class Ftp: # async def ftp(): -# class FetchGemini: +# class Gemini: # async def gemini(): -# class FetchGopher: +# class Gopher: # async def gopher(): -# class FetchHttp: +# class Http: # async def http(): -# class FetchIpfs: +# class Ipfs: # async def ipfs(): @@ -103,12 +105,13 @@ def http_response(url): "User-Agent": user_agent } try: - # Don't use HEAD request because quite a few websites may deny it + # Do not use HEAD request because it appears that too many sites would + # deny it. # response = requests.head(url, headers=headers, allow_redirects=True) response = requests.get(url, headers=headers, allow_redirects=True) except Exception as e: - logging.warning('Error in HTTP response') - logging.error(e) + logger.warning('Error in HTTP response') + logger.error(e) response = None return response @@ -175,7 +178,7 @@ async def http(url): 'original_url': url, 'status_code': None} except Exception as e: - logging.error(e) + logger.error(e) result = {'error': True, 'message': 'Error:' + str(e) if e else 'Error', 'original_url': url, @@ -188,4 +191,4 @@ async def magnet(link): try: filename, torrent_data = await m2t.retrieve_torrent() except FailedToFetchException: - logging.debug("Failed") + logger.debug("Failed") diff --git a/slixfeed/log.py b/slixfeed/log.py index e884a2a..3696b6f 100644 --- a/slixfeed/log.py +++ b/slixfeed/log.py @@ -13,8 +13,10 @@ logger.debug('This is a debug message') import logging + class Logger: + def __init__(self, name): self.logger = logging.getLogger(name) self.logger.setLevel(logging.WARNING) # DEBUG @@ -44,4 +46,4 @@ class Logger: # def check_difference(function_name, difference): # if difference > 1: - # Logger.warning(message) + # Logger.warning(message) \ No newline at end of file diff --git a/slixfeed/opml.py b/slixfeed/opml.py deleted file mode 100644 index f639247..0000000 --- a/slixfeed/opml.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -from slixfeed.log import Logger -import slixfeed.dt as dt -import slixfeed.sqlite as sqlite -import sys -import xml.etree.ElementTree as ETR - -logger = Logger(__name__) - -class Opml: - - # TODO Consider adding element jid as a pointer of import - def export_to_file(jid, filename, results): - # print(jid, filename, results) - function_name = sys._getframe().f_code.co_name - logger.debug('{} jid: {} filename: {}' - .format(function_name, jid, filename)) - root = ETR.Element("opml") - root.set("version", "1.0") - head = ETR.SubElement(root, "head") - ETR.SubElement(head, "title").text = "{}".format(jid) - ETR.SubElement(head, "description").text = ( - "Set of subscriptions exported by Slixfeed") - ETR.SubElement(head, "generator").text = "Slixfeed" - ETR.SubElement(head, "urlPublic").text = ( - "https://gitgud.io/sjehuda/slixfeed") - time_stamp = dt.current_time() - ETR.SubElement(head, "dateCreated").text = time_stamp - ETR.SubElement(head, "dateModified").text = time_stamp - body = ETR.SubElement(root, "body") - for result in results: - outline = ETR.SubElement(body, "outline") - outline.set("text", result[1]) - outline.set("xmlUrl", result[2]) - # outline.set("type", result[2]) - tree = ETR.ElementTree(root) - tree.write(filename) - - - async def import_from_file(db_file, result): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: db_file: {}' - .format(function_name, db_file)) - if not result['error']: - document = result['content'] - root = ETR.fromstring(document) - before = sqlite.get_number_of_items(db_file, 'feeds_properties') - feeds = [] - for child in root.findall(".//outline"): - url = child.get("xmlUrl") - title = child.get("text") - # feed = (url, title) - # feeds.extend([feed]) - feed = { - 'title' : title, - 'url' : url, - } - feeds.extend([feed]) - await sqlite.import_feeds(db_file, feeds) - await sqlite.add_metadata(db_file) - after = sqlite.get_number_of_items(db_file, 'feeds_properties') - difference = int(after) - int(before) - return difference diff --git a/slixfeed/sqlite.py b/slixfeed/sqlite.py index edee011..d0d2206 100644 --- a/slixfeed/sqlite.py +++ b/slixfeed/sqlite.py @@ -1558,29 +1558,38 @@ def is_entry_archived(cur, ix): result = cur.execute(sql, par).fetchone() return result -async def mark_entry_as_read(cur, ix): + +def is_entry_read(db_file, ix): """ - Set read status of entry as read. + Check whether a given entry is marked as read. Parameters ---------- - cur : object - Cursor object. + db_file : str + Path to database file. ix : str Index of entry. + + Returns + ------- + result : tuple + Entry ID. """ function_name = sys._getframe().f_code.co_name logger.debug('{}: ix: {}' .format(function_name, ix)) - sql = ( - """ - UPDATE entries_state - SET read = 1 - WHERE entry_id = ? - """ - ) - par = (ix,) - cur.execute(sql, par) + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + """ + SELECT read + FROM entries_state + WHERE entry_id = ? + """ + ) + par = (ix,) + result = cur.execute(sql, par).fetchone() + return result def get_last_update_time_of_feed(db_file, feed_id): @@ -1669,45 +1678,6 @@ def get_number_of_unread_entries_by_feed(db_file, feed_id): return count -async def mark_feed_as_read(db_file, feed_id): - """ - Set read status of entries of given feed as read. - - Parameters - ---------- - db_file : str - Path to database file. - feed_id : str - Feed ID. - """ - function_name = sys._getframe().f_code.co_name - logger.debug('{}: db_file: {} feed_id: {}' - .format(function_name, db_file, feed_id)) - async with DBLOCK: - with create_connection(db_file) as conn: - cur = conn.cursor() - sql = ( - """ - SELECT id - FROM entries_properties - WHERE feed_id = ? - """ - ) - par = (feed_id,) - ixs = cur.execute(sql, par).fetchall() - sql = ( - """ - UPDATE entries_state - SET read = 1 - WHERE entry_id = ? - """ - ) - for ix in ixs: cur.execute(sql, ix) - # for ix in ixs: - # par = ix # Variable ix is already of type tuple - # cur.execute(sql, par) - - async def delete_entry_by_id(db_file, ix): """ Delete entry by Id. @@ -1923,26 +1893,6 @@ def get_feed_url(db_file, feed_id): return url -async def mark_as_read(db_file, ix): - function_name = sys._getframe().f_code.co_name - logger.debug('{}: db_file: {} ix: {}' - .format(function_name, db_file, ix)) - async with DBLOCK: - with create_connection(db_file) as conn: - cur = conn.cursor() - # TODO While `async with DBLOCK` does work well from - # outside of functions, it would be better practice - # to place it within the functions. - # NOTE: We can use DBLOCK once for both - # functions, because, due to exclusive - # ID, only one can ever occur. - if is_entry_archived(cur, ix): - await delete_entry(cur, ix) - else: - await mark_entry_as_read(cur, ix) - - - async def mark_all_as_read(db_file): """ Set read status of all entries as read. @@ -1985,6 +1935,89 @@ async def mark_all_as_read(db_file): for ix in ixs: cur.execute(sql, ix) +async def mark_feed_as_read(db_file, feed_id): + """ + Set read status of entries of given feed as read. + + Parameters + ---------- + db_file : str + Path to database file. + feed_id : str + Feed ID. + """ + function_name = sys._getframe().f_code.co_name + logger.debug('{}: db_file: {} feed_id: {}' + .format(function_name, db_file, feed_id)) + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + """ + SELECT id + FROM entries_properties + WHERE feed_id = ? + """ + ) + par = (feed_id,) + ixs = cur.execute(sql, par).fetchall() + sql = ( + """ + UPDATE entries_state + SET read = 1 + WHERE entry_id = ? + """ + ) + for ix in ixs: cur.execute(sql, ix) + # for ix in ixs: + # par = ix # Variable ix is already of type tuple + # cur.execute(sql, par) + + +async def mark_entry_as_read(cur, ix): + """ + Set read status of entry as read. + + Parameters + ---------- + cur : object + Cursor object. + ix : str + Index of entry. + """ + function_name = sys._getframe().f_code.co_name + logger.debug('{}: ix: {}' + .format(function_name, ix)) + sql = ( + """ + UPDATE entries_state + SET read = 1 + WHERE entry_id = ? + """ + ) + par = (ix,) + cur.execute(sql, par) + + +async def mark_as_read(db_file, ix): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: db_file: {} ix: {}' + .format(function_name, db_file, ix)) + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + # TODO While `async with DBLOCK` does work well from + # outside of functions, it would be better practice + # to place it within the functions. + # NOTE: We can use DBLOCK once for both + # functions, because, due to exclusive + # ID, only one can ever occur. + if is_entry_archived(cur, ix): + await delete_entry(cur, ix) + else: + await mark_entry_as_read(cur, ix) + + async def delete_entry(cur, ix): """ Delete entry. diff --git a/slixfeed/syndication.py b/slixfeed/syndication.py new file mode 100644 index 0000000..235da03 --- /dev/null +++ b/slixfeed/syndication.py @@ -0,0 +1,958 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + +TODO + +1) Function scan at "for entry in entries" + Suppress directly calling function "add_entry" (accept db_file) + Pass a list of valid entries to a new function "add_entries" + (accept db_file) which would call function "add_entry" (accept cur). + * accelerate adding of large set of entries at once. + * prevent (or mitigate halt of consequent actions). + * reduce I/O. + +2) Call sqlite function from function statistics. + Returning a list of values doesn't' seem to be a good practice. + +3) Special statistics for operator: + * Size of database(s); + * Amount of JIDs subscribed; + * Amount of feeds of all JIDs; + * Amount of entries of all JIDs. + +""" + +import asyncio +from feedparser import parse +import os +import slixfeed.config as config +from slixfeed.config import Config +import slixfeed.crawl as crawl +import slixfeed.dt as dt +import slixfeed.fetch as fetch +from slixfeed.log import Logger +import slixfeed.sqlite as sqlite +from slixfeed.url import join_url, trim_url +from slixfeed.utilities import Html, MD, SQLiteMaintain +from slixmpp.xmlstream import ET +import sys +from urllib.parse import urlsplit +import xml.etree.ElementTree as ETR + +logger = Logger(__name__) + + +class Feed: + + # NOTE Consider removal of MD (and any other option HTML and XBEL) + def export_feeds(jid_bare, ext): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: jid_bare: {}: ext: {}'.format(function_name, jid_bare, ext)) + cache_dir = config.get_default_cache_directory() + if not os.path.isdir(cache_dir): + os.mkdir(cache_dir) + if not os.path.isdir(cache_dir + '/' + ext): + os.mkdir(cache_dir + '/' + ext) + filename = os.path.join( + cache_dir, ext, 'slixfeed_' + dt.timestamp() + '.' + ext) + db_file = config.get_pathname_to_database(jid_bare) + results = sqlite.get_feeds(db_file) + match ext: + # case 'html': + # response = 'Not yet implemented.' + case 'md': + MD.export_to_markdown(jid_bare, filename, results) + case 'opml': + Opml.export_to_file(jid_bare, filename, results) + # case 'xbel': + # response = 'Not yet implemented.' + return filename + + + def pack_entry_into_dict(db_file, entry): + entry_id = entry[0] + authors = sqlite.get_authors_by_entry_id(db_file, entry_id) + entry_authors = [] + for author in authors: + entry_author = { + 'name': author[2], + 'email': author[3], + 'url': author[4]} + entry_authors.extend([entry_author]) + contributors = sqlite.get_contributors_by_entry_id(db_file, entry_id) + entry_contributors = [] + for contributor in contributors: + entry_contributor = { + 'name': contributor[2], + 'email': contributor[3], + 'url': contributor[4]} + entry_contributors.extend([entry_contributor]) + links = sqlite.get_links_by_entry_id(db_file, entry_id) + entry_links = [] + for link in links: + entry_link = { + 'url': link[2], + 'type': link[3], + 'rel': link[4], + 'size': link[5]} + entry_links.extend([entry_link]) + tags = sqlite.get_tags_by_entry_id(db_file, entry_id) + entry_tags = [] + for tag in tags: + entry_tag = { + 'term': tag[2], + 'scheme': tag[3], + 'label': tag[4]} + entry_tags.extend([entry_tag]) + contents = sqlite.get_contents_by_entry_id(db_file, entry_id) + entry_contents = [] + for content in contents: + entry_content = { + 'text': content[2], + 'type': content[3], + 'base': content[4], + 'lang': content[5]} + entry_contents.extend([entry_content]) + feed_entry = { + 'authors' : entry_authors, + 'category' : entry[10], + 'comments' : entry[12], + 'contents' : entry_contents, + 'contributors' : entry_contributors, + 'summary_base' : entry[9], + 'summary_lang' : entry[7], + 'summary_text' : entry[6], + 'summary_type' : entry[8], + 'enclosures' : entry[13], + 'href' : entry[11], + 'link' : entry[3], + 'links' : entry_links, + 'published' : entry[14], + 'rating' : entry[13], + 'tags' : entry_tags, + 'title' : entry[4], + 'title_type' : entry[3], + 'updated' : entry[15]} + return feed_entry + + + def create_rfc4287_entry(feed_entry): + node_entry = ET.Element('entry') + node_entry.set('xmlns', 'http://www.w3.org/2005/Atom') + # Title + title = ET.SubElement(node_entry, 'title') + if feed_entry['title']: + if feed_entry['title_type']: title.set('type', feed_entry['title_type']) + title.text = feed_entry['title'] + elif feed_entry['summary_text']: + if feed_entry['summary_type']: title.set('type', feed_entry['summary_type']) + title.text = feed_entry['summary_text'] + # if feed_entry['summary_base']: title.set('base', feed_entry['summary_base']) + # if feed_entry['summary_lang']: title.set('lang', feed_entry['summary_lang']) + else: + title.text = feed_entry['published'] + # Some feeds have identical content for contents and summary + # So if content is present, do not add summary + if feed_entry['contents']: + # Content + for feed_entry_content in feed_entry['contents']: + content = ET.SubElement(node_entry, 'content') + # if feed_entry_content['base']: content.set('base', feed_entry_content['base']) + if feed_entry_content['lang']: content.set('lang', feed_entry_content['lang']) + if feed_entry_content['type']: content.set('type', feed_entry_content['type']) + content.text = feed_entry_content['text'] + else: + # Summary + summary = ET.SubElement(node_entry, 'summary') # TODO Try 'content' + # if feed_entry['summary_base']: summary.set('base', feed_entry['summary_base']) + # TODO Check realization of "lang" + if feed_entry['summary_type']: summary.set('type', feed_entry['summary_type']) + if feed_entry['summary_lang']: summary.set('lang', feed_entry['summary_lang']) + summary.text = feed_entry['summary_text'] + # Authors + for feed_entry_author in feed_entry['authors']: + author = ET.SubElement(node_entry, 'author') + name = ET.SubElement(author, 'name') + name.text = feed_entry_author['name'] + if feed_entry_author['url']: + uri = ET.SubElement(author, 'uri') + uri.text = feed_entry_author['url'] + if feed_entry_author['email']: + email = ET.SubElement(author, 'email') + email.text = feed_entry_author['email'] + # Contributors + for feed_entry_contributor in feed_entry['contributors']: + contributor = ET.SubElement(node_entry, 'author') + name = ET.SubElement(contributor, 'name') + name.text = feed_entry_contributor['name'] + if feed_entry_contributor['url']: + uri = ET.SubElement(contributor, 'uri') + uri.text = feed_entry_contributor['url'] + if feed_entry_contributor['email']: + email = ET.SubElement(contributor, 'email') + email.text = feed_entry_contributor['email'] + # Category + category = ET.SubElement(node_entry, "category") + category.set('category', feed_entry['category']) + # Tags + for feed_entry_tag in feed_entry['tags']: + tag = ET.SubElement(node_entry, 'category') + tag.set('term', feed_entry_tag['term']) + # Link + link = ET.SubElement(node_entry, "link") + link.set('href', feed_entry['link']) + # Links + for feed_entry_link in feed_entry['links']: + link = ET.SubElement(node_entry, "link") + link.set('href', feed_entry_link['url']) + link.set('type', feed_entry_link['type']) + link.set('rel', feed_entry_link['rel']) + # Date updated + if feed_entry['updated']: + updated = ET.SubElement(node_entry, 'updated') + updated.text = feed_entry['updated'] + # Date published + if feed_entry['published']: + published = ET.SubElement(node_entry, 'published') + published.text = feed_entry['published'] + return node_entry + + + def is_feed(url, feed): + """ + Determine whether document is feed or not. + + Parameters + ---------- + feed : dict + Parsed feed. + + Returns + ------- + val : boolean + True or False. + """ + function_name = sys._getframe().f_code.co_name + logger.debug('{}'.format(function_name)) + value = False + # message = None + if not feed.entries: + if "version" in feed.keys(): + # feed["version"] + if feed.version: + value = True + # message = ( + # "Empty feed for {}" + # ).format(url) + elif "title" in feed["feed"].keys(): + value = True + # message = ( + # "Empty feed for {}" + # ).format(url) + else: + value = False + # message = ( + # "No entries nor title for {}" + # ).format(url) + elif feed.bozo: + # NOTE Consider valid even when is not-well-formed + value = True + logger.warning('Bozo detected for {}'.format(url)) + else: + value = True + # message = ( + # "Good feed for {}" + # ).format(url) + return value + + + async def add_feed(self, jid_bare, db_file, url, identifier): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: db_file: {} url: {}' + .format(function_name, db_file, url)) + while True: + feed_id = sqlite.get_feed_id(db_file, url) + if not feed_id: + exist_identifier = sqlite.check_identifier_exist(db_file, identifier) + if not exist_identifier: + result = await fetch.http(url) + message = result['message'] + status_code = result['status_code'] + if not result['error']: + await sqlite.update_feed_status(db_file, feed_id, status_code) + document = result['content'] + feed = parse(document) + # if document and status_code == 200: + if Feed.is_feed(url, feed): + if "title" in feed["feed"].keys(): + title = feed["feed"]["title"] + else: + title = urlsplit(url).netloc + if "language" in feed["feed"].keys(): + language = feed["feed"]["language"] + else: + language = '' + if "encoding" in feed.keys(): + encoding = feed["encoding"] + else: + encoding = '' + if "updated_parsed" in feed["feed"].keys(): + updated = feed["feed"]["updated_parsed"] + try: + updated = dt.convert_struct_time_to_iso8601(updated) + except Exception as e: + logger.error(str(e)) + updated = '' + else: + updated = '' + version = feed.version + entries_count = len(feed.entries) + await sqlite.insert_feed(db_file, + url, + title, + identifier, + entries=entries_count, + version=version, + encoding=encoding, + language=language, + status_code=status_code, + updated=updated) + feed_valid = 0 if feed.bozo else 1 + await sqlite.update_feed_validity( + db_file, feed_id, feed_valid) + if feed.has_key('updated_parsed'): + feed_updated = feed.updated_parsed + try: + feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) + except Exception as e: + logger.error(str(e)) + feed_updated = None + else: + feed_updated = None + feed_properties = Feed.get_properties_of_feed( + db_file, feed_id, feed) + await sqlite.update_feed_properties( + db_file, feed_id, feed_properties) + feed_id = sqlite.get_feed_id(db_file, url) + feed_id = feed_id[0] + new_entries = Feed.get_properties_of_entries( + jid_bare, db_file, url, feed_id, feed) + if new_entries: + await sqlite.add_entries_and_update_feed_state( + db_file, feed_id, new_entries) + old = Config.get_setting_value(self.settings, jid_bare, 'old') + if not old: await sqlite.mark_feed_as_read(db_file, feed_id) + result_final = {'link' : url, + 'index' : feed_id, + 'name' : title, + 'code' : status_code, + 'error' : False, + 'message': message, + 'exist' : False, + 'identifier' : None} + break + else: + # NOTE Do not be tempted to return a compact dictionary. + # That is, dictionary within dictionary + # Return multiple dictionaries in a list or tuple. + result = await crawl.probe_page(url, document) + if not result: + # Get out of the loop with dict indicating error. + result_final = {'link' : url, + 'index' : None, + 'name' : None, + 'code' : status_code, + 'error' : True, + 'message': message, + 'exist' : False, + 'identifier' : None} + break + elif isinstance(result, list): + # Get out of the loop and deliver a list of dicts. + result_final = result + break + else: + # Go back up to the while loop and try again. + url = result['link'] + else: + await sqlite.update_feed_status(db_file, feed_id, status_code) + result_final = {'link' : url, + 'index' : None, + 'name' : None, + 'code' : status_code, + 'error' : True, + 'message': message, + 'exist' : False, + 'identifier' : None} + break + else: + ix = exist_identifier[1] + identifier = exist_identifier[2] + message = ('Identifier "{}" is already allocated.' + .format(identifier)) + result_final = {'link' : url, + 'index' : ix, + 'name' : None, + 'code' : None, + 'error' : False, + 'message': message, + 'exist' : False, + 'identifier' : identifier} + break + else: + feed_id = feed_id[0] + title = sqlite.get_feed_title(db_file, feed_id) + title = title[0] + message = 'URL already exist.' + result_final = {'link' : url, + 'index' : feed_id, + 'name' : title, + 'code' : None, + 'error' : False, + 'message': message, + 'exist' : True, + 'identifier' : None} + break + return result_final + + + def view_feed(url, feed): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: url: {}' + .format(function_name, url)) + if "title" in feed["feed"].keys(): + title = feed["feed"]["title"] + else: + title = urlsplit(url).netloc + entries = feed.entries + response = "Preview of {}:\n\n```\n".format(title) + counter = 0 + for entry in entries: + counter += 1 + if entry.has_key("title"): + title = entry.title + else: + title = "*** No title ***" + if entry.has_key("link"): + # link = complete_url(source, entry.link) + link = join_url(url, entry.link) + link = trim_url(link) + else: + link = "*** No link ***" + if entry.has_key("published"): + date = entry.published + date = dt.rfc2822_to_iso8601(date) + elif entry.has_key("updated"): + date = entry.updated + date = dt.rfc2822_to_iso8601(date) + else: + date = "*** No date ***" + response += ("Title : {}\n" + "Date : {}\n" + "Link : {}\n" + "Count : {}\n" + "\n" + .format(title, date, link, counter)) + if counter > 4: + break + response += ( + "```\nSource: {}" + ).format(url) + return response + + + def view_entry(url, feed, num): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: url: {} num: {}' + .format(function_name, url, num)) + if "title" in feed["feed"].keys(): + title = feed["feed"]["title"] + else: + title = urlsplit(url).netloc + entries = feed.entries + num = int(num) - 1 + entry = entries[num] + response = "Preview of {}:\n\n```\n".format(title) + if entry.has_key("title"): + title = entry.title + else: + title = '*** No title ***' + if entry.has_key("published"): + date = entry.published + date = dt.rfc2822_to_iso8601(date) + elif entry.has_key("updated"): + date = entry.updated + date = dt.rfc2822_to_iso8601(date) + else: + date = '*** No date ***' + if entry.has_key("summary"): + summary = entry.summary + # Remove HTML tags + if summary: + summary = Html.remove_html_tags(summary) + # TODO Limit text length + summary = summary.replace("\n\n\n", "\n\n") + else: + summary = '*** No summary ***' + else: + summary = '*** No summary ***' + if entry.has_key("link"): + # link = complete_url(source, entry.link) + link = join_url(url, entry.link) + link = trim_url(link) + else: + link = '*** No link ***' + response = ("{}\n" + "\n" + # "> {}\n" + "{}\n" + "\n" + "{}\n" + "\n" + .format(title, summary, link)) + return response + + + # NOTE This function is not being utilized + async def download_feed(self, db_file, feed_url): + """ + Get feed content. + + Parameters + ---------- + db_file : str + Path to database file. + url : str, optional + URL. + """ + function_name = sys._getframe().f_code.co_name + logger.debug('{}: db_file: {} url: {}' + .format(function_name, db_file, feed_url)) + if isinstance(feed_url, tuple): feed_url = feed_url[0] + result = await fetch.http(feed_url) + feed_id = sqlite.get_feed_id(db_file, feed_url) + feed_id = feed_id[0] + status_code = result['status_code'] + await sqlite.update_feed_status(db_file, feed_id, status_code) + + + def get_properties_of_feed(db_file, feed_id, feed): + + if feed.has_key('updated_parsed'): + feed_updated = feed.updated_parsed + try: + feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) + except: + feed_updated = '' + else: + feed_updated = '' + + entries_count = len(feed.entries) + + feed_version = feed.version if feed.has_key('version') else '' + feed_encoding = feed.encoding if feed.has_key('encoding') else '' + feed_language = feed.feed.language if feed.feed.has_key('language') else '' + feed_icon = feed.feed.icon if feed.feed.has_key('icon') else '' + feed_image = feed.feed.image.href if feed.feed.has_key('image') else '' + feed_logo = feed.feed.logo if feed.feed.has_key('logo') else '' + feed_ttl = feed.feed.ttl if feed.feed.has_key('ttl') else '' + + feed_properties = { + "version" : feed_version, + "encoding" : feed_encoding, + "language" : feed_language, + "rating" : '', + "entries_count" : entries_count, + "icon" : feed_icon, + "image" : feed_image, + "logo" : feed_logo, + "ttl" : feed_ttl, + "updated" : feed_updated, + } + + return feed_properties + + + # TODO get all active feeds of active accounts and scan the feed with the earliest scanned time + # TODO Rename function name (idea: scan_and_populate) + def get_properties_of_entries(jid_bare, db_file, feed_url, feed_id, feed): + """ + Get new entries. + + Parameters + ---------- + db_file : str + Path to database file. + url : str, optional + URL. + """ + # print('MID', feed_url, jid_bare, 'get_properties_of_entries') + function_name = sys._getframe().f_code.co_name + logger.debug('{}: feed_id: {} url: {}' + .format(function_name, feed_id, feed_url)) + + new_entries = [] + for entry in feed.entries: + logger.debug('{}: entry: {}'.format(function_name, entry.link)) + if entry.has_key("published"): + entry_published = entry.published + entry_published = dt.rfc2822_to_iso8601(entry_published) + else: + entry_published = '' + if entry.has_key("updated"): + entry_updated = entry.updated + entry_updated = dt.rfc2822_to_iso8601(entry_updated) + else: + entry_updated = dt.now() + if entry.has_key("link"): + # link = complete_url(source, entry.link) + entry_link = join_url(feed_url, entry.link) + entry_link = trim_url(entry_link) + else: + entry_link = feed_url + # title = feed["feed"]["title"] + # title = "{}: *{}*".format(feed["feed"]["title"], entry.title) + entry_title = entry.title if entry.has_key("title") else entry_published + entry_id = entry.id if entry.has_key("id") else entry_link + exist = sqlite.check_entry_exist(db_file, feed_id, + identifier=entry_id, + title=entry_title, + link=entry_link, + published=entry_published) + if not exist: + read_status = 0 + # # Filter + # pathname = urlsplit(link).path + # string = ( + # "{} {} {}" + # ).format( + # title, summary, pathname) + # if self.settings['default']['filter']: + # print('Filter is now processing data.') + # allow_list = config.is_include_keyword(db_file, + # "allow", string) + # if not allow_list: + # reject_list = config.is_include_keyword(db_file, + # "deny", + # string) + # if reject_list: + # read_status = 1 + # logger.debug('Rejected : {}' + # '\n' + # 'Keyword : {}' + # .format(link, reject_list)) + if isinstance(entry_published, int): + logger.error('Variable "published" is int: {}'.format(entry_published)) + if isinstance(entry_updated, int): + logger.error('Variable "updated" is int: {}'.format(entry_updated)) + + # Authors + entry_authors =[] + if entry.has_key('authors'): + for author in entry.authors: + author_properties = { + 'name' : author.name if author.has_key('name') else '', + 'url' : author.href if author.has_key('href') else '', + 'email' : author.email if author.has_key('email') else '', + } + entry_authors.extend([author_properties]) + elif entry.has_key('author_detail'): + author_properties = { + 'name' : entry.author_detail.name if entry.author_detail.has_key('name') else '', + 'url' : entry.author_detail.href if entry.author_detail.has_key('href') else '', + 'email' : entry.author_detail.email if entry.author_detail.has_key('email') else '', + } + entry_authors.extend([author_properties]) + elif entry.has_key('author'): + author_properties = { + 'name' : entry.author, + 'url' : '', + 'email' : '', + } + entry_authors.extend([author_properties]) + + # Contributors + entry_contributors = [] + if entry.has_key('contributors'): + for contributor in entry.contributors: + contributor_properties = { + 'name' : contributor.name if contributor.has_key('name') else '', + 'url' : contributor.href if contributor.has_key('href') else '', + 'email' : contributor.email if contributor.has_key('email') else '', + } + entry_contributors.extend([contributor_properties]) + + # Tags + entry_tags = [] + if entry.has_key('tags'): + for tag in entry.tags: + tag_properties = { + 'term' : tag.term if tag.has_key('term') else '', + 'scheme' : tag.scheme if tag.has_key('scheme') else '', + 'label' : tag.label if tag.has_key('label') else '', + } + entry_tags.extend([tag_properties]) + + # Content + entry_contents = [] + if entry.has_key('content'): + for content in entry.content: + text = content.value if content.has_key('value') else '' + type = content.type if content.has_key('type') else '' + lang = content.lang if content.has_key('lang') else '' + base = content.base if content.has_key('base') else '' + entry_content = { + 'text' : text, + 'lang' : lang, + 'type' : type, + 'base' : base, + } + entry_contents.extend([entry_content]) + + # Links and Enclosures + entry_links = [] + if entry.has_key('links'): + for link in entry.links: + link_properties = { + 'url' : link.href if link.has_key('href') else '', + 'rel' : link.rel if link.has_key('rel') else '', + 'type' : link.type if link.has_key('type') else '', + 'length' : '', + } + entry_links.extend([link_properties]) + # Element media:content is utilized by Mastodon + if entry.has_key('media_content'): + for link in entry.media_content: + link_properties = { + 'url' : link['url'] if 'url' in link else '', + 'rel' : 'enclosure', + 'type' : link['type'] if 'type' in link else '', + # 'medium' : link['medium'] if 'medium' in link else '', + 'length' : link['filesize'] if 'filesize' in link else '', + } + entry_links.extend([link_properties]) + if entry.has_key('media_thumbnail'): + for link in entry.media_thumbnail: + link_properties = { + 'url' : link['url'] if 'url' in link else '', + 'rel' : 'enclosure', + 'type' : '', + # 'medium' : 'image', + 'length' : '', + } + entry_links.extend([link_properties]) + + # Category + entry_category = entry.category if entry.has_key('category') else '' + + # Comments + entry_comments = entry.comments if entry.has_key('comments') else '' + + # href + entry_href = entry.href if entry.has_key('href') else '' + + # Link: Same as entry.links[0].href in most if not all cases + entry_link = entry.link if entry.has_key('link') else '' + + # Rating + entry_rating = entry.rating if entry.has_key('rating') else '' + + # Summary + entry_summary_text = entry.summary if entry.has_key('summary') else '' + if entry.has_key('summary_detail'): + entry_summary_type = entry.summary_detail.type if entry.summary_detail.has_key('type') else '' + entry_summary_lang = entry.summary_detail.lang if entry.summary_detail.has_key('lang') else '' + entry_summary_base = entry.summary_detail.base if entry.summary_detail.has_key('base') else '' + else: + entry_summary_type = '' + entry_summary_lang = '' + entry_summary_base = '' + + # Title + entry_title = entry.title if entry.has_key('title') else '' + if entry.has_key('title_detail'): + entry_title_type = entry.title_detail.type if entry.title_detail.has_key('type') else '' + else: + entry_title_type = '' + + ########################################################### + + # media_type = e_link.type[:e_link.type.index("/")] + # if (e_link.rel == "enclosure" and + # media_type in ("audio", "image", "video")): + # media_link = e_link.href + # media_link = join_url(url, e_link.href) + # media_link = trim_url(media_link) + + ########################################################### + + entry_properties = { + "identifier": entry_id, + "link": entry_link, + "href": entry_href, + "title": entry_title, + "title_type": entry_title_type, + 'summary_text' : entry_summary_text, + 'summary_lang' : entry_summary_lang, + 'summary_type' : entry_summary_type, + 'summary_base' : entry_summary_base, + 'category' : entry_category, + "comments": entry_comments, + "rating": entry_rating, + "published": entry_published, + "updated": entry_updated, + "read_status": read_status + } + + new_entries.extend([{ + "entry_properties" : entry_properties, + "entry_authors" : entry_authors, + "entry_contributors" : entry_contributors, + "entry_contents" : entry_contents, + "entry_links" : entry_links, + "entry_tags" : entry_tags + }]) + # await sqlite.add_entry( + # db_file, title, link, entry_id, + # url, date, read_status) + # await sqlite.set_date(db_file, url) + return new_entries + + +class FeedTask: + + + # TODO Take this function out of + # + async def check_updates(self, jid_bare): + """ + Start calling for update check up. + + Parameters + ---------- + jid : str + Jabber ID. + """ + # print('Scanning for updates for JID {}'.format(jid_bare)) + logger.info('Scanning for updates for JID {}'.format(jid_bare)) + while True: + db_file = config.get_pathname_to_database(jid_bare) + urls = sqlite.get_active_feeds_url(db_file) + for url in urls: + url = url[0] + print('sta : ' + url) + # print('STA',url) + + # # Skip Reddit + # if 'reddit.com' in str(url).lower(): + # print('Reddit Atom Syndication feeds are not supported by Slixfeed.') + # print('Skipping URL:', url) + # continue + + result = await fetch.http(url) + status_code = result['status_code'] + feed_id = sqlite.get_feed_id(db_file, url) + feed_id = feed_id[0] + if not result['error']: + await sqlite.update_feed_status(db_file, feed_id, status_code) + document = result['content'] + feed = parse(document) + feed_valid = 0 if feed.bozo else 1 + await sqlite.update_feed_validity(db_file, feed_id, feed_valid) + feed_properties = Feed.get_properties_of_feed( + db_file, feed_id, feed) + await sqlite.update_feed_properties( + db_file, feed_id, feed_properties) + new_entries = Feed.get_properties_of_entries( + jid_bare, db_file, url, feed_id, feed) + if new_entries: + print('{}: {} new_entries: {} ({})'.format(jid_bare, len(new_entries), url, feed_id)) + await sqlite.add_entries_and_update_feed_state(db_file, feed_id, new_entries) + await SQLiteMaintain.remove_nonexistent_entries(self, jid_bare, db_file, url, feed) + # await SQLiteMaintain.remove_nonexistent_entries(self, jid_bare, db_file, url, feed) + print('end : ' + url) + # await asyncio.sleep(50) + val = Config.get_setting_value(self.settings, jid_bare, 'check') + await asyncio.sleep(60 * float(val)) + # Schedule to call this function again in 90 minutes + # loop.call_at( + # loop.time() + 60 * 90, + # loop.create_task, + # self.check_updates(jid) + # ) + + + def restart_task(self, jid_bare): + if jid_bare == self.boundjid.bare: + return + if jid_bare not in self.task_manager: + self.task_manager[jid_bare] = {} + logger.info('Creating new task manager for JID {}'.format(jid_bare)) + logger.info('Stopping task "check" for JID {}'.format(jid_bare)) + try: + self.task_manager[jid_bare]['check'].cancel() + except: + logger.info('No task "check" for JID {} (FeedTask.check_updates)' + .format(jid_bare)) + logger.info('Starting tasks "check" for JID {}'.format(jid_bare)) + self.task_manager[jid_bare]['check'] = asyncio.create_task( + FeedTask.check_updates(self, jid_bare)) + + +class Opml: + + + # TODO Consider adding element jid as a pointer of import + def export_to_file(jid, filename, results): + # print(jid, filename, results) + function_name = sys._getframe().f_code.co_name + logger.debug('{} jid: {} filename: {}' + .format(function_name, jid, filename)) + root = ETR.Element("opml") + root.set("version", "1.0") + head = ETR.SubElement(root, "head") + ETR.SubElement(head, "title").text = "{}".format(jid) + ETR.SubElement(head, "description").text = ( + "Set of subscriptions exported by Slixfeed") + ETR.SubElement(head, "generator").text = "Slixfeed" + ETR.SubElement(head, "urlPublic").text = ( + "https://slixfeed.woodpeckersnest.space/") + time_stamp = dt.current_time() + ETR.SubElement(head, "dateCreated").text = time_stamp + ETR.SubElement(head, "dateModified").text = time_stamp + body = ETR.SubElement(root, "body") + for result in results: + outline = ETR.SubElement(body, "outline") + outline.set("text", result[1]) + outline.set("xmlUrl", result[2]) + # outline.set("type", result[2]) + tree = ETR.ElementTree(root) + tree.write(filename) + + + async def import_from_file(db_file, result): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: db_file: {}' + .format(function_name, db_file)) + if not result['error']: + document = result['content'] + root = ETR.fromstring(document) + before = sqlite.get_number_of_items(db_file, 'feeds_properties') + feeds = [] + for child in root.findall(".//outline"): + url = child.get("xmlUrl") + title = child.get("text") + # feed = (url, title) + # feeds.extend([feed]) + feed = { + 'title' : title, + 'url' : url, + } + feeds.extend([feed]) + await sqlite.import_feeds(db_file, feeds) + await sqlite.add_metadata(db_file) + after = sqlite.get_number_of_items(db_file, 'feeds_properties') + difference = int(after) - int(before) + return difference diff --git a/slixfeed/task.py b/slixfeed/task.py index 9f50eaa..4e9ef85 100644 --- a/slixfeed/task.py +++ b/slixfeed/task.py @@ -38,7 +38,7 @@ NOTE Apparently, it is possible to view self presence. This means that there is no need to store presences in order to switch or restore presence. check_readiness - 📂 Send a URL from a blog or a news website. + 📂 Send a URL from a blog or a news site. JID: self.boundjid.bare MUC: self.alias @@ -68,319 +68,27 @@ except Exception as exc: """ import asyncio -from feedparser import parse -import logging import os -import slixfeed.action as action import slixfeed.config as config -from slixfeed.config import Config -# from slixfeed.dt import current_time -import slixfeed.dt as dt -import slixfeed.fetch as fetch -import slixfeed.sqlite as sqlite -# from xmpp import Slixfeed -from slixfeed.xmpp.presence import XmppPresence -from slixfeed.xmpp.message import XmppMessage -from slixfeed.xmpp.connect import XmppConnect -from slixfeed.xmpp.utility import get_chat_type -import time +from slixfeed.log import Logger -# main_task = [] -# jid_tasker = {} -# task_manager = {} -loop = asyncio.get_event_loop() - - -# def init_tasks(self): -# global task_ping -# # if task_ping is None or task_ping.done(): -# # task_ping = asyncio.create_task(ping(self, jid=None)) -# try: -# task_ping.cancel() -# except: -# logging.info('No ping task to cancel') -# task_ping = asyncio.create_task(ping(self, jid=None)) +logger = Logger(__name__) class Task: - def start(self, jid_full, tasks=None): - asyncio.create_task() - def cancel(self, jid_full, tasks=None): - pass + def start(self, jid_bare, callback): + callback(self, jid_bare) - - -def task_ping(self): - # global task_ping_instance - try: - self.task_ping_instance.cancel() - except: - logging.info('No ping task to cancel.') - self.task_ping_instance = asyncio.create_task(XmppConnect.ping(self)) - - -""" -FIXME - -Tasks don't begin at the same time. - -This is noticeable when calling "check" before "status". - -await taskhandler.start_tasks( - self, - jid, - ["check", "status"] - ) - -""" -async def start_tasks_xmpp_pubsub(self, jid_bare, tasks=None): - try: - self.task_manager[jid_bare] - except KeyError as e: - self.task_manager[jid_bare] = {} - logging.debug('KeyError:', str(e)) - logging.info('Creating new task manager for JID {}'.format(jid_bare)) - if not tasks: - tasks = ['check', 'publish'] - logging.info('Stopping tasks {} for JID {}'.format(tasks, jid_bare)) - for task in tasks: - # if self.task_manager[jid][task]: - try: + def stop(self, jid_bare, task): + if (jid_bare in self.task_manager and + task in self.task_manager[jid_bare]): self.task_manager[jid_bare][task].cancel() - except: - logging.info('No task {} for JID {} (start_tasks_xmpp_chat)' + else: + logger.debug('No task {} for JID {} (Task.stop)' .format(task, jid_bare)) - logging.info('Starting tasks {} for JID {}'.format(tasks, jid_bare)) - for task in tasks: - match task: - case 'publish': - self.task_manager[jid_bare]['publish'] = asyncio.create_task( - task_publish(self, jid_bare)) - case 'check': - self.task_manager[jid_bare]['check'] = asyncio.create_task( - check_updates(self, jid_bare)) - - -async def task_publish(self, jid_bare): - db_file = config.get_pathname_to_database(jid_bare) - if jid_bare not in self.settings: - Config.add_settings_jid(self.settings, jid_bare, db_file) - while True: - await action.xmpp_pubsub_send_unread_items(self, jid_bare) - await asyncio.sleep(60 * 180) - - -async def start_tasks_xmpp_chat(self, jid_bare, tasks=None): - """ - NOTE - - For proper activation of tasks involving task 'interval', it is essential - to place task 'interval' as the last to start due to await asyncio.sleep() - which otherwise would postpone tasks that would be set after task 'interval' - """ - if jid_bare == self.boundjid.bare: - return - try: - self.task_manager[jid_bare] - except KeyError as e: - self.task_manager[jid_bare] = {} - logging.debug('KeyError:', str(e)) - logging.info('Creating new task manager for JID {}'.format(jid_bare)) - if not tasks: - tasks = ['status', 'check', 'interval'] - logging.info('Stopping tasks {} for JID {}'.format(tasks, jid_bare)) - for task in tasks: - # if self.task_manager[jid][task]: - try: - self.task_manager[jid_bare][task].cancel() - except: - logging.info('No task {} for JID {} (start_tasks_xmpp_chat)' - .format(task, jid_bare)) - logging.info('Starting tasks {} for JID {}'.format(tasks, jid_bare)) - for task in tasks: - # print("task:", task) - # print("tasks:") - # print(tasks) - # breakpoint() - match task: - case 'check': - self.task_manager[jid_bare]['check'] = asyncio.create_task( - check_updates(self, jid_bare)) - case 'status': - self.task_manager[jid_bare]['status'] = asyncio.create_task( - task_status_message(self, jid_bare)) - case 'interval': - self.task_manager[jid_bare]['interval'] = asyncio.create_task( - task_message(self, jid_bare)) - # for task in self.task_manager[jid].values(): - # print("task_manager[jid].values()") - # print(self.task_manager[jid].values()) - # print("task") - # print(task) - # print("jid") - # print(jid) - # breakpoint() - # await task - - -async def task_status_message(self, jid): - await action.xmpp_send_status_message(self, jid) - refresh_task(self, jid, task_status_message, 'status', '90') - - -async def task_message(self, jid_bare): - db_file = config.get_pathname_to_database(jid_bare) - if jid_bare not in self.settings: - Config.add_settings_jid(self.settings, jid_bare, db_file) - update_interval = Config.get_setting_value(self.settings, jid_bare, 'interval') - update_interval = 60 * int(update_interval) - last_update_time = sqlite.get_last_update_time(db_file) - if last_update_time: - last_update_time = float(last_update_time) - diff = time.time() - last_update_time - if diff < update_interval: - next_update_time = update_interval - diff - await asyncio.sleep(next_update_time) # FIXME! - - # print("jid :", jid, "\n" - # "time :", time.time(), "\n" - # "last_update_time :", last_update_time, "\n" - # "difference :", diff, "\n" - # "update interval :", update_interval, "\n" - # "next_update_time :", next_update_time, "\n" - # ) - - # elif diff > val: - # next_update_time = val - await sqlite.update_last_update_time(db_file) - else: - await sqlite.set_last_update_time(db_file) - await action.xmpp_chat_send_unread_items(self, jid_bare) - refresh_task(self, jid_bare, task_message, 'interval') - await start_tasks_xmpp_chat(self, jid_bare, ['status']) - - -def clean_tasks_xmpp_chat(self, jid, tasks=None): - if not tasks: - tasks = ['interval', 'status', 'check'] - logging.info('Stopping tasks {} for JID {}'.format(tasks, jid)) - for task in tasks: - # if self.task_manager[jid][task]: - try: - self.task_manager[jid][task].cancel() - except: - logging.debug('No task {} for JID {} (clean_tasks_xmpp)' - .format(task, jid)) - - -def refresh_task(self, jid_bare, callback, key, val=None): - """ - Apply new setting at runtime. - - Parameters - ---------- - jid : str - Jabber ID. - key : str - Key. - val : str, optional - Value. The default is None. - """ - logging.info('Refreshing task {} for JID {}'.format(callback, jid_bare)) - if not val: - db_file = config.get_pathname_to_database(jid_bare) - if jid_bare not in self.settings: - Config.add_settings_jid(self.settings, jid_bare, db_file) - val = Config.get_setting_value(self.settings, jid_bare, key) - # if self.task_manager[jid][key]: - if jid_bare in self.task_manager: - try: - self.task_manager[jid_bare][key].cancel() - except: - logging.info('No task of type {} to cancel for ' - 'JID {} (refresh_task)'.format(key, jid_bare)) - # self.task_manager[jid][key] = loop.call_at( - # loop.time() + 60 * float(val), - # loop.create_task, - # (callback(self, jid)) - # # send_update(jid) - # ) - self.task_manager[jid_bare][key] = loop.create_task( - wait_and_run(self, callback, jid_bare, val) - ) - # self.task_manager[jid][key] = loop.call_later( - # 60 * float(val), - # loop.create_task, - # send_update(jid) - # ) - # self.task_manager[jid][key] = send_update.loop.call_at( - # send_update.loop.time() + 60 * val, - # send_update.loop.create_task, - # send_update(jid) - # ) - - -async def wait_and_run(self, callback, jid_bare, val): - await asyncio.sleep(60 * float(val)) - await callback(self, jid_bare) - - -# TODO Take this function out of -# -async def check_updates(self, jid_bare): - """ - Start calling for update check up. - - Parameters - ---------- - jid : str - Jabber ID. - """ - # print('Scanning for updates for JID {}'.format(jid_bare)) - logging.info('Scanning for updates for JID {}'.format(jid_bare)) - while True: - db_file = config.get_pathname_to_database(jid_bare) - urls = sqlite.get_active_feeds_url(db_file) - for url in urls: - url = url[0] - # print('STA',url) - - # # Skip Reddit - # if 'reddit.com' in str(url).lower(): - # print('Reddit Atom Syndication feeds are not supported by Slixfeed.') - # print('Skipping URL:', url) - # continue - - result = await fetch.http(url) - status_code = result['status_code'] - feed_id = sqlite.get_feed_id(db_file, url) - feed_id = feed_id[0] - if not result['error']: - await sqlite.update_feed_status(db_file, feed_id, status_code) - document = result['content'] - feed = parse(document) - feed_valid = 0 if feed.bozo else 1 - await sqlite.update_feed_validity(db_file, feed_id, feed_valid) - feed_properties = action.get_properties_of_feed(db_file, - feed_id, feed) - await sqlite.update_feed_properties(db_file, feed_id, - feed_properties) - new_entries = action.get_properties_of_entries( - jid_bare, db_file, url, feed_id, feed) - if new_entries: await sqlite.add_entries_and_update_feed_state( - db_file, feed_id, new_entries) - await asyncio.sleep(50) - val = Config.get_setting_value(self.settings, jid_bare, 'check') - await asyncio.sleep(60 * float(val)) - # Schedule to call this function again in 90 minutes - # loop.call_at( - # loop.time() + 60 * 90, - # loop.create_task, - # self.check_updates(jid) - # ) """ @@ -393,6 +101,7 @@ async def select_file(self): """ Initiate actions by JID (Jabber ID). """ + main_task = [] while True: db_dir = config.get_default_data_directory() if not os.path.isdir(db_dir): diff --git a/slixfeed/url.py b/slixfeed/url.py index ef0d459..4fef810 100644 --- a/slixfeed/url.py +++ b/slixfeed/url.py @@ -19,11 +19,11 @@ TODO """ from email.utils import parseaddr -import logging import os import random import slixfeed.config as config import slixfeed.fetch as fetch +from slixfeed.log import Logger from urllib.parse import ( parse_qs, urlencode, @@ -33,6 +33,8 @@ from urllib.parse import ( urlunsplit ) +logger = Logger(__name__) + # NOTE # hostname and protocol are listed as one in file proxies.toml. @@ -113,12 +115,12 @@ async def replace_hostname(url, url_type): config.update_proxies(proxies_file, proxy_name, proxy_type, proxy_url) except ValueError as e: - logging.error([str(e), proxy_url]) + logger.error([str(e), proxy_url]) url_new = None else: - logging.warning( - "No proxy URLs for {}. Please update proxies.toml" - .format(proxy_name)) + logger.warning('No proxy URLs for {}. ' + 'Please update proxies.toml' + .format(proxy_name)) url_new = url break return url_new diff --git a/slixfeed/utilities.py b/slixfeed/utilities.py new file mode 100644 index 0000000..e06d8f6 --- /dev/null +++ b/slixfeed/utilities.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + +TODO + +1) Function scan at "for entry in entries" + Suppress directly calling function "add_entry" (accept db_file) + Pass a list of valid entries to a new function "add_entries" + (accept db_file) which would call function "add_entry" (accept cur). + * accelerate adding of large set of entries at once. + * prevent (or mitigate halt of consequent actions). + * reduce I/O. + +2) Call sqlite function from function statistics. + Returning a list of values doesn't' seem to be a good practice. + +3) Special statistics for operator: + * Size of database(s); + * Amount of JIDs subscribed; + * Amount of feeds of all JIDs; + * Amount of entries of all JIDs. + +4) Consider to append text to remind to share presence + '✒️ Share online status to receive updates' + +5) Request for subscription + if (await XmppUtilities.get_chat_type(self, jid_bare) == 'chat' and + not self.client_roster[jid_bare]['to']): + XmppPresence.subscription(self, jid_bare, 'subscribe') + await XmppRoster.add(self, jid_bare) + status_message = '✒️ Share online status to receive updates' + XmppPresence.send(self, jid_bare, status_message) + message_subject = 'RSS News Bot' + message_body = 'Share online status to receive updates.' + XmppMessage.send_headline(self, jid_bare, message_subject, + message_body, 'chat') + +""" + +import hashlib +import slixfeed.config as config +from slixfeed.config import Config +from lxml import etree, html +import slixfeed.dt as dt +import slixfeed.fetch as fetch +from slixfeed.log import Logger +import slixfeed.sqlite as sqlite +from slixfeed.url import join_url, complete_url +import sys + +try: + import tomllib +except: + import tomli as tomllib + +logger = Logger(__name__) + + +class Documentation: + + + def manual(filename, section=None, command=None): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: filename: {}'.format(function_name, filename)) + config_dir = config.get_default_config_directory() + with open(config_dir + '/' + filename, mode="rb") as commands: + cmds = tomllib.load(commands) + if section == 'all': + cmd_list = '' + for cmd in cmds: + for i in cmds[cmd]: + cmd_list += cmds[cmd][i] + '\n' + elif command and section: + try: + cmd_list = cmds[section][command] + except KeyError as e: + logger.error(e) + cmd_list = None + elif section: + try: + cmd_list = [] + for cmd in cmds[section]: + cmd_list.extend([cmd]) + except KeyError as e: + logger.error('KeyError:' + str(e)) + cmd_list = None + else: + cmd_list = [] + for cmd in cmds: + cmd_list.extend([cmd]) + return cmd_list + + +class Html: + + + async def extract_image_from_html(url): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: url: {}'.format(function_name, url)) + result = await fetch.http(url) + if not result['error']: + data = result['content'] + tree = html.fromstring(data) + # TODO Exclude banners, class="share" links etc. + images = tree.xpath( + '//img[not(' + 'contains(@src, "avatar") or ' + 'contains(@src, "cc-by-sa") or ' + 'contains(@src, "emoji") or ' + 'contains(@src, "icon") or ' + 'contains(@src, "logo") or ' + 'contains(@src, "letture") or ' + 'contains(@src, "poweredby_mediawi") or ' + 'contains(@src, "search") or ' + 'contains(@src, "share") or ' + 'contains(@src, "smiley")' + ')]/@src') + if len(images): + image = images[0] + image = str(image) + image_url = complete_url(url, image) + return image_url + + + def remove_html_tags(data): + function_name = sys._getframe().f_code.co_name + logger.debug('{}'.format(function_name)) + parser = etree.HTMLParser() + tree = etree.fromstring(data, parser) + data = etree.tostring(tree, encoding='unicode', method='text') + data = data.replace("\n\n", "\n") + return data + + + # /questions/9662346/python-code-to-remove-html-tags-from-a-string + def _remove_html_tags(text): + import xml.etree.ElementTree + return ''.join(xml.etree.ElementTree.fromstring(text).itertext()) + + + def __remove_html_tags(data): + from bs4 import BeautifulSoup + function_name = sys._getframe().f_code.co_name + logger.debug('{}'.format(function_name)) + data = BeautifulSoup(data, "lxml").text + data = data.replace("\n\n", "\n") + return data + + +class MD: + + + def export_to_markdown(jid, filename, results): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: jid: {} filename: {}' + .format(function_name, jid, filename)) + with open(filename, 'w') as file: + file.write('# Subscriptions for {}\n'.format(jid)) + file.write('## Set of feeds exported with Slixfeed\n') + for result in results: + file.write('- [{}]({})\n'.format(result[1], result[2])) + file.write('\n\n* * *\n\nThis list was saved on {} from xmpp:{} using ' + '[Slixfeed](https://slixfeed.woodpeckersnest.space/)\n' + .format(dt.current_date(), jid)) + + + def log_to_markdown(timestamp, filename, jid, message): + """ + Log message to a markdown file. + + Parameters + ---------- + timestamp : str + Time stamp. + filename : str + Jabber ID as name of file. + jid : str + Jabber ID. + message : str + Message content. + + Returns + ------- + None. + + """ + function_name = sys._getframe().f_code.co_name + logger.debug('{}: timestamp: {} filename: {} jid: {} message: {}'.format(function_name, timestamp, filename, jid, message)) + with open(filename + '.md', 'a') as file: + # entry = "{} {}:\n{}\n\n".format(timestamp, jid, message) + entry = '## {}\n### {}\n\n{}\n\n'.format(jid, timestamp, message) + file.write(entry) + + +class SQLiteMaintain: + + + # TODO + # (1) Check for duplications + # (2) append all duplications to a list + # (3) Send the list to a function in module sqlite. + async def remove_nonexistent_entries(self, jid_bare, db_file, url, feed): + """ + Remove entries that don't exist in a given parsed feed. + Check the entries returned from feed and delete read non + existing entries, otherwise move to table archive, if unread. + + Parameters + ---------- + db_file : str + Path to database file. + url : str + Feed URL. + feed : list + Parsed feed document. + """ + function_name = sys._getframe().f_code.co_name + logger.debug('{}: db_file: {} url: {}' + .format(function_name, db_file, url)) + feed_id = sqlite.get_feed_id(db_file, url) + feed_id = feed_id[0] + items = sqlite.get_entries_of_feed(db_file, feed_id) + entries = feed.entries + limit = Config.get_setting_value(self.settings, jid_bare, 'archive') + print(limit) + for item in items: + ix, entry_title, entry_link, entry_id, timestamp = item + read_status = sqlite.is_entry_read(db_file, ix) + read_status = read_status[0] + valid = False + for entry in entries: + title = None + link = None + time = None + # valid = False + # TODO better check and don't repeat code + if entry.has_key("id") and entry_id: + if entry.id == entry_id: + print("compare entry.id == entry_id:", entry.id) + print("compare entry.id == entry_id:", entry_id) + print("============") + valid = True + break + else: + if entry.has_key("title"): + title = entry.title + else: + title = feed["feed"]["title"] + if entry.has_key("link"): + link = join_url(url, entry.link) + else: + link = url + if entry.has_key("published") and timestamp: + print("compare published:", title, link, time) + print("compare published:", entry_title, entry_link, timestamp) + print("============") + time = dt.rfc2822_to_iso8601(entry.published) + if (entry_title == title and + entry_link == link and + timestamp == time): + valid = True + break + else: + if (entry_title == title and + entry_link == link): + print("compare entry_link == link:", title, link) + print("compare entry_title == title:", entry_title, entry_link) + print("============") + valid = True + break + # TODO better check and don't repeat code + if not valid: + # print("id: ", ix) + # if title: + # print("title: ", title) + # print("entry_title: ", entry_title) + # if link: + # print("link: ", link) + # print("entry_link: ", entry_link) + # if entry.id: + # print("last_entry:", entry.id) + # print("entry_id: ", entry_id) + # if time: + # print("time: ", time) + # print("timestamp: ", timestamp) + # print("read: ", read_status) + # breakpoint() + + # TODO Send to table archive + # TODO Also make a regular/routine check for sources that + # have been changed (though that can only happen when + # manually editing) + # ix = item[0] + # print(">>> SOURCE: ", source) + # print(">>> INVALID:", entry_title) + # print("title:", entry_title) + # print("link :", entry_link) + # print("id :", entry_id) + if read_status == 1: + await sqlite.delete_entry_by_id(db_file, ix) + # print(">>> DELETING:", entry_title) + else: + # print(">>> ARCHIVING:", entry_title) + await sqlite.archive_entry(db_file, ix) + await sqlite.maintain_archive(db_file, limit) + + +class Task: + + + def start(self, jid_bare, callback): + callback(self, jid_bare) + + + def stop(self, jid_bare, task): + if (jid_bare in self.task_manager and + task in self.task_manager[jid_bare]): + self.task_manager[jid_bare][task].cancel() + else: + logger.debug('No task {} for JID {} (Task.stop)' + .format(task, jid_bare)) + + +class Utilities: + + + # NOTE Warning: Entry might not have a link + # TODO Handle situation error + def hash_url_to_md5(url): + url_encoded = url.encode() + url_hashed = hashlib.md5(url_encoded) + url_digest = url_hashed.hexdigest() + return url_digest + + + def pick_a_feed(lang=None): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: lang: {}' + .format(function_name, lang)) + config_dir = config.get_default_config_directory() + with open(config_dir + '/' + 'feeds.toml', mode="rb") as feeds: + urls = tomllib.load(feeds) + import random + url = random.choice(urls['feeds']) + return url diff --git a/slixfeed/version.py b/slixfeed/version.py index 9d4c37b..dfa4b9b 100644 --- a/slixfeed/version.py +++ b/slixfeed/version.py @@ -1,2 +1,2 @@ -__version__ = '0.1.76' -__version_info__ = (0, 1, 76) +__version__ = '0.1.77' +__version_info__ = (0, 1, 77) diff --git a/slixfeed/xmpp/chat.py b/slixfeed/xmpp/chat.py index f2bc384..35a7a46 100644 --- a/slixfeed/xmpp/chat.py +++ b/slixfeed/xmpp/chat.py @@ -24,36 +24,29 @@ TODO """ import asyncio -from feedparser import parse -import logging -import os -import slixfeed.action as action +from random import randrange # pending_tasks: Use a list and read the first index (i.e. index 0). import slixfeed.config as config -import slixfeed.crawl as crawl from slixfeed.config import Config -import slixfeed.dt as dt -import slixfeed.fetch as fetch +from slixfeed.log import Logger import slixfeed.sqlite as sqlite -import slixfeed.task as task -import slixfeed.url as uri -from slixfeed.version import __version__ -from slixfeed.xmpp.bookmark import XmppBookmark +from slixfeed.url import ( + remove_tracking_parameters, + replace_hostname, + ) +from slixfeed.syndication import FeedTask +from slixfeed.utilities import Documentation, Html, MD, Task from slixfeed.xmpp.commands import XmppCommands -from slixfeed.xmpp.muc import XmppGroupchat from slixfeed.xmpp.message import XmppMessage from slixfeed.xmpp.presence import XmppPresence -from slixfeed.xmpp.publish import XmppPubsub +from slixfeed.xmpp.privilege import is_operator, is_moderator +from slixfeed.xmpp.status import XmppStatusTask from slixfeed.xmpp.upload import XmppUpload -from slixfeed.xmpp.privilege import is_moderator, is_operator, is_access -from slixfeed.xmpp.utility import get_chat_type +from slixfeed.xmpp.utilities import XmppUtilities +import sys import time -from random import randrange -try: - import tomllib -except: - import tomli as tomllib +logger = Logger(__name__) # for task in main_task: @@ -63,7 +56,9 @@ except: # if not main_task: # await select_file() -class Chat: + +class XmppChat: + async def process_message(self, message): """ @@ -154,7 +149,7 @@ class Chat: command = command[1:] command_lowercase = command.lower() - logging.debug([str(message['from']), ':', command]) + logger.debug([str(message['from']), ':', command]) # Support private message via groupchat # See https://codeberg.org/poezio/slixmpp/issues/3506 @@ -175,7 +170,7 @@ class Chat: 'Usage: `help `' .format(command_list)) case 'help all': - command_list = action.manual('commands.toml', section='all') + command_list = Documentation.manual('commands.toml', section='all') response = ('Complete list of commands:\n' '```\n{}\n```' .format(command_list)) @@ -185,9 +180,8 @@ class Chat: if len(command) == 2: command_root = command[0] command_name = command[1] - command_list = action.manual('commands.toml', - section=command_root, - command=command_name) + command_list = Documentation.manual( + 'commands.toml', section=command_root, command=command_name) if command_list: command_list = ''.join(command_list) response = (command_list) @@ -196,7 +190,7 @@ class Chat: .format(command_root, command_name)) elif len(command) == 1: command = command[0] - command_list = action.manual('commands.toml', command) + command_list = Documentation.manual('commands.toml', command) if command_list: command_list = ' '.join(command_list) response = ('Available command `{}` keys:\n' @@ -224,7 +218,7 @@ class Chat: 'I am an RSS News Bot.\n' 'Send "help" for further instructions.\n' .format(self.alias)) - case _ if command_lowercase.startswith('add '): + case _ if command_lowercase.startswith('add'): command = command[4:] url = command.split(' ')[0] title = ' '.join(command.split(' ')[1:]) @@ -284,10 +278,10 @@ class Chat: else: response = ('This action is restricted. ' 'Type: viewing bookmarks.') - case _ if command_lowercase.startswith('clear '): + case _ if command_lowercase.startswith('clear'): key = command[6:] response = await XmppCommands.clear_filter(db_file, key) - case _ if command_lowercase.startswith('default '): + case _ if command_lowercase.startswith('default'): key = command[8:] response = await XmppCommands.restore_default( self, jid_bare, key=None) @@ -317,15 +311,14 @@ class Chat: response = ('No action has been taken.' '\n' 'Missing keywords.') - case _ if command_lowercase.startswith('disable '): + case _ if command_lowercase.startswith('disable'): response = await XmppCommands.feed_disable( self, db_file, jid_bare, command) - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) - case _ if command_lowercase.startswith('enable '): + XmppStatusTask.restart_task(self, jid_bare) + case _ if command_lowercase.startswith('enable'): response = await XmppCommands.feed_enable( self, db_file, command) - case _ if command_lowercase.startswith('export '): + case _ if command_lowercase.startswith('export'): ext = command[7:] if ext in ('md', 'opml'): # html xbel status_type = 'dnd' @@ -346,12 +339,11 @@ class Chat: # 'Feeds exported successfully to {}.\n{}' # ).format(ex, url) # XmppMessage.send_oob_reply_message(message, url, response) - chat_type = await get_chat_type(self, jid_bare) + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) XmppMessage.send_oob(self, jid_bare, url, chat_type) del self.pending_tasks[jid_bare][pending_tasks_num] # del self.pending_tasks[jid_bare][self.pending_tasks_counter] - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + XmppStatusTask.restart_task(self, jid_bare) else: response = ('Unsupported filetype.\n' 'Try: md or opml') @@ -375,8 +367,7 @@ class Chat: response = XmppCommands.fetch_gemini() case _ if (command_lowercase.startswith('http') and command_lowercase.endswith('.opml')): - key_list = ['status'] - task.clean_tasks_xmpp_chat(self, jid_bare, key_list) + Task.stop(self, jid_bare, 'status') status_type = 'dnd' status_message = '📥️ Procesing request to import feeds...' # pending_tasks_num = len(self.pending_tasks[jid_bare]) @@ -390,14 +381,13 @@ class Chat: self, db_file, jid_bare, command) del self.pending_tasks[jid_bare][pending_tasks_num] # del self.pending_tasks[jid_bare][self.pending_tasks_counter] - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) - case _ if command_lowercase.startswith('pubsub list '): + XmppStatusTask.restart_task(self, jid_bare) + case _ if command_lowercase.startswith('pubsub list'): jid = command[12:] response = 'List of nodes for {}:\n```\n'.format(jid) response = await XmppCommands.pubsub_list(self, jid) response += '```' - case _ if command_lowercase.startswith('pubsub send '): + case _ if command_lowercase.startswith('pubsub send'): if is_operator(self, jid_bare): info = command[12:] info = info.split(' ') @@ -416,7 +406,6 @@ class Chat: command_lowercase.startswith('itpc:/') or command_lowercase.startswith('rss:/')): url = command - # task.clean_tasks_xmpp_chat(self, jid_bare, ['status']) status_type = 'dnd' status_message = ('📫️ Processing request to fetch data from {}' .format(url)) @@ -429,11 +418,9 @@ class Chat: status_type=status_type) response = await XmppCommands.fetch_http( self, command, db_file, jid_bare) - # task.clean_tasks_xmpp_chat(self, jid_bare, ['status']) del self.pending_tasks[jid_bare][pending_tasks_num] # del self.pending_tasks[jid_bare][self.pending_tasks_counter] - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + XmppStatusTask.restart_task(self, jid_bare) # except: # response = ( # '> {}\nNews source is in the process ' @@ -445,6 +432,7 @@ class Chat: if val: response = await XmppCommands.set_interval( self, db_file, jid_bare, val) + XmppChatTask.restart_task(self, jid_bare) else: response = 'Current value for interval: ' response += XmppCommands.get_interval(self, jid_bare) @@ -469,10 +457,10 @@ class Chat: response = await XmppCommands.set_old_off( self, jid_bare, db_file) case _ if command_lowercase.startswith('next'): - await XmppCommands.send_next_update(self, jid_bare, command) - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) - case _ if command_lowercase.startswith('node delete '): + num = command[5:] + await XmppChatAction.send_unread_items(self, jid_bare, num) + XmppStatusTask.restart_task(self, jid_bare) + case _ if command_lowercase.startswith('node delete'): if is_operator(self, jid_bare): info = command[12:] info = info.split(' ') @@ -480,7 +468,7 @@ class Chat: else: response = ('This action is restricted. ' 'Type: sending news to PubSub.') - case _ if command_lowercase.startswith('node purge '): + case _ if command_lowercase.startswith('node purge'): if is_operator(self, jid_bare): info = command[11:] info = info.split(' ') @@ -505,13 +493,12 @@ class Chat: response += XmppCommands.get_quantum(self, jid_bare) case 'random': response = XmppCommands.set_random(self, jid_bare, db_file) - case _ if command_lowercase.startswith('read '): + case _ if command_lowercase.startswith('read'): data = command[5:] data = data.split() url = data[0] if url: - key_list = ['status'] - task.clean_tasks_xmpp_chat(self, jid_bare, key_list) + Task.stop(self, jid_bare, 'status') status_type = 'dnd' status_message = ('📫️ Processing request to fetch data ' 'from {}'.format(url)) @@ -520,12 +507,11 @@ class Chat: response = await XmppCommands.feed_read( self, jid_bare, data, url) del self.pending_tasks[jid_bare][pending_tasks_num] - key_list = ['status'] + XmppStatusTask.restart_task(self, jid_bare) else: response = ('No action has been taken.' '\n' 'Missing URL.') - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) case _ if command_lowercase.startswith('recent'): num = command[7:] if not num: num = 5 @@ -535,23 +521,19 @@ class Chat: response += result + '```\n' else: response = result - case _ if command_lowercase.startswith('remove '): + case _ if command_lowercase.startswith('remove'): ix_url = command[7:] ix_url = ix_url.split(' ') response = await XmppCommands.feed_remove( self, jid_bare, db_file, ix_url) - # refresh_task(self, jid_bare, send_status, 'status', 20) - # task.clean_tasks_xmpp_chat(self, jid_bare, ['status']) - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) - case _ if command_lowercase.startswith('rename '): + XmppStatusTask.restart_task(self, jid_bare) + case _ if command_lowercase.startswith('rename'): response = await XmppCommands.feed_rename( self, db_file, jid_bare, command) case _ if command_lowercase.startswith('reset'): ix_url = command[6:] - ix_url = ix_url.split(' ') - key_list = ['status'] - task.clean_tasks_xmpp_chat(self, jid_bare, key_list) + if ix_url: ix_url = ix_url.split(' ') + Task.stop(self, jid_bare, 'status') status_type = 'dnd' status_message = '📫️ Marking entries as read...' # pending_tasks_num = len(self.pending_tasks[jid_bare]) @@ -562,24 +544,22 @@ class Chat: XmppPresence.send(self, jid_bare, status_message, status_type=status_type) response = await XmppCommands.mark_as_read( - self, jid_bare, db_file, ix_url) + jid_bare, db_file, ix_url) del self.pending_tasks[jid_bare][pending_tasks_num] # del self.pending_tasks[jid_bare][self.pending_tasks_counter] - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) - case _ if command_lowercase.startswith('search '): + XmppStatusTask.restart_task(self, jid_bare) + case _ if command_lowercase.startswith('search'): query = command[7:] - response = XmppCommands.search_items(self, db_file, query) + response = XmppCommands.search_items(db_file, query) case 'start': status_type = 'available' status_message = '📫️ Welcome back!' XmppPresence.send(self, jid_bare, status_message, status_type=status_type) await asyncio.sleep(5) - key_list = ['check', 'status', 'interval'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + tasks = (FeedTask, XmppChatTask, XmppStatusTask) response = await XmppCommands.scheduler_start( - self, db_file, jid_bare) + self, db_file, jid_bare, tasks) case 'stats': response = XmppCommands.print_statistics(db_file) case 'stop': @@ -617,10 +597,10 @@ class Chat: # os.mkdir(data_dir) # if not os.path.isdir(data_dir + '/logs/'): # os.mkdir(data_dir + '/logs/') - # action.log_to_markdown( + # MD.log_to_markdown( # dt.current_time(), os.path.join(data_dir, 'logs', jid_bare), # jid_bare, command) - # action.log_to_markdown( + # MD.log_to_markdown( # dt.current_time(), os.path.join(data_dir, 'logs', jid_bare), # jid_bare, response) @@ -630,3 +610,228 @@ class Chat: # '{}\n' # .format(command, jid_bare, response) # ) + + +class XmppChatAction: + + + async def send_unread_items(self, jid_bare, num=None): + """ + Send news items as messages. + + Parameters + ---------- + jid : str + Jabber ID. + num : str, optional + Number. The default is None. + """ + function_name = sys._getframe().f_code.co_name + logger.debug('{}: jid: {} num: {}'.format(function_name, jid_bare, num)) + db_file = config.get_pathname_to_database(jid_bare) + show_media = Config.get_setting_value(self.settings, jid_bare, 'media') + if not num: + num = Config.get_setting_value(self.settings, jid_bare, 'quantum') + else: + num = int(num) + results = sqlite.get_unread_entries(db_file, num) + news_digest = '' + media = None + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) + for result in results: + ix = result[0] + title_e = result[1] + url = result[2] + summary = result[3] + feed_id = result[4] + date = result[5] + enclosure = sqlite.get_enclosure_by_entry_id(db_file, ix) + if enclosure: enclosure = enclosure[0] + title_f = sqlite.get_feed_title(db_file, feed_id) + title_f = title_f[0] + news_digest += await XmppChatAction.list_unread_entries(self, result, title_f, jid_bare) + # print(db_file) + # print(result[0]) + # breakpoint() + await sqlite.mark_as_read(db_file, ix) + + # Find media + # if url.startswith("magnet:"): + # media = action.get_magnet(url) + # elif enclosure.startswith("magnet:"): + # media = action.get_magnet(enclosure) + # elif enclosure: + if show_media: + if enclosure: + media = enclosure + else: + media = await Html.extract_image_from_html(url) + + if media and news_digest: + # Send textual message + XmppMessage.send(self, jid_bare, news_digest, chat_type) + news_digest = '' + # Send media + XmppMessage.send_oob(self, jid_bare, media, chat_type) + media = None + + if news_digest: + XmppMessage.send(self, jid_bare, news_digest, chat_type) + # TODO Add while loop to assure delivery. + # print(await current_time(), ">>> ACT send_message",jid) + # NOTE Do we need "if statement"? See NOTE at is_muc. + # if chat_type in ('chat', 'groupchat'): + # # TODO Provide a choice (with or without images) + # XmppMessage.send(self, jid, news_digest, chat_type) + # See XEP-0367 + # if media: + # # message = xmpp.Slixfeed.make_message( + # # self, mto=jid, mbody=new, mtype=chat_type) + # message = xmpp.Slixfeed.make_message( + # self, mto=jid, mbody=media, mtype=chat_type) + # message['oob']['url'] = media + # message.send() + + # TODO Do not refresh task before + # verifying that it was completed. + + # XmppStatusTask.restart_task(self, jid_bare) + # XmppCommands.task_start(self, jid_bare, 'interval') + + # interval = await initdb( + # jid, + # sqlite.is_setting_key, + # "interval" + # ) + # self.task_manager[jid]["interval"] = loop.call_at( + # loop.time() + 60 * interval, + # loop.create_task, + # send_update(jid) + # ) + + # print(await current_time(), "asyncio.get_event_loop().time()") + # print(await current_time(), asyncio.get_event_loop().time()) + # await asyncio.sleep(60 * interval) + + # loop.call_later( + # 60 * interval, + # loop.create_task, + # send_update(jid) + # ) + + # print + # await handle_event() + + + async def list_unread_entries(self, result, feed_title, jid): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: feed_title: {} jid: {}' + .format(function_name, feed_title, jid)) + # TODO Add filtering + # TODO Do this when entry is added to list and mark it as read + # DONE! + # results = [] + # if sqlite.is_setting_key(db_file, "deny"): + # while len(results) < num: + # result = cur.execute(sql).fetchone() + # blacklist = sqlite.get_setting_value(db_file, "deny").split(",") + # for i in blacklist: + # if i in result[1]: + # continue + # print("rejected:", result[1]) + # print("accepted:", result[1]) + # results.extend([result]) + + # news_list = "You've got {} news items:\n".format(num) + # NOTE Why doesn't this work without list? + # i.e. for result in results + # for result in results.fetchall(): + ix = str(result[0]) + title = str(result[1]) or '*** No title ***' # [No Title] + # Remove HTML tags + title = Html.remove_html_tags(title) if title else '*** No title ***' + # # TODO Retrieve summary from feed + # # See fetch.view_entry + summary = result[3] + if summary: + summary = Html.remove_html_tags(summary) + # TODO Limit text length + # summary = summary.replace("\n\n\n", "\n\n") + summary = summary.replace('\n', ' ') + summary = summary.replace(' ', ' ') + # summary = summary.replace(' ', ' ') + summary = ' '.join(summary.split()) + length = Config.get_setting_value(self.settings, jid, 'length') + length = int(length) + summary = summary[:length] + " […]" + # summary = summary.strip().split('\n') + # summary = ["> " + line for line in summary] + # summary = "\n".join(summary) + else: + summary = '*** No summary ***' + link = result[2] + link = remove_tracking_parameters(link) + link = await replace_hostname(link, "link") or link + feed_id = result[4] + # news_item = ("\n{}\n{}\n{} [{}]\n").format(str(title), str(link), + # str(feed_title), str(ix)) + formatting = Config.get_setting_value(self.settings, jid, 'formatting') + news_item = formatting.format(feed_title=feed_title, + title=title, + summary=summary, + link=link, + ix=ix, + feed_id=feed_id) + # news_item = news_item.replace('\\n', '\n') + return news_item + + +class XmppChatTask: + + + async def task_message(self, jid_bare): + db_file = config.get_pathname_to_database(jid_bare) + if jid_bare not in self.settings: + Config.add_settings_jid(self.settings, jid_bare, db_file) + while True: + update_interval = Config.get_setting_value(self.settings, jid_bare, 'interval') + update_interval = 60 * int(update_interval) + last_update_time = sqlite.get_last_update_time(db_file) + if last_update_time: + last_update_time = float(last_update_time) + diff = time.time() - last_update_time + if diff < update_interval: + next_update_time = update_interval - diff + await asyncio.sleep(next_update_time) # FIXME! + + # print("jid :", jid, "\n" + # "time :", time.time(), "\n" + # "last_update_time :", last_update_time, "\n" + # "difference :", diff, "\n" + # "update interval :", update_interval, "\n" + # "next_update_time :", next_update_time, "\n" + # ) + + # elif diff > val: + # next_update_time = val + await sqlite.update_last_update_time(db_file) + else: + await sqlite.set_last_update_time(db_file) + await XmppChatAction.send_unread_items(self, jid_bare) + + + def restart_task(self, jid_bare): + if jid_bare == self.boundjid.bare: + return + if jid_bare not in self.task_manager: + self.task_manager[jid_bare] = {} + logger.info('Creating new task manager for JID {}'.format(jid_bare)) + logger.info('Stopping task "interval" for JID {}'.format(jid_bare)) + try: + self.task_manager[jid_bare]['interval'].cancel() + except: + logger.info('No task "interval" for JID {} (XmppChatTask.task_message)' + .format(jid_bare)) + logger.info('Starting tasks "interval" for JID {}'.format(jid_bare)) + self.task_manager[jid_bare]['interval'] = asyncio.create_task( + XmppChatTask.task_message(self, jid_bare)) \ No newline at end of file diff --git a/slixfeed/xmpp/client.py b/slixfeed/xmpp/client.py index 6a2f7a1..304d15c 100644 --- a/slixfeed/xmpp/client.py +++ b/slixfeed/xmpp/client.py @@ -5,12 +5,10 @@ TODO -1) Use loop (with gather) instead of TaskGroup. - -2) Assure message delivery before calling a new task. +1) Assure message delivery before calling a new task. See https://slixmpp.readthedocs.io/en/latest/event_index.html#term-marker_acknowledged -3) XHTTML-IM +2) XHTTML-IM case _ if message_lowercase.startswith("html"): message['html']=" Parse me! @@ -36,8 +34,6 @@ from datetime import datetime import os from feedparser import parse import slixmpp -import slixfeed.task as task -from slixfeed.url import join_url, trim_url # from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound # from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference # from slixmpp.plugins.xep_0048.stanza import Bookmarks @@ -46,32 +42,34 @@ from slixfeed.url import join_url, trim_url # import xml.etree.ElementTree as ET # from lxml import etree -import slixfeed.action as action import slixfeed.config as config from slixfeed.config import Config import slixfeed.crawl as crawl import slixfeed.dt as dt import slixfeed.fetch as fetch from slixfeed.log import Logger -from slixfeed.opml import Opml import slixfeed.sqlite as sqlite +from slixfeed.syndication import Feed, FeedTask, Opml import slixfeed.url as uri +from slixfeed.utilities import Html, Task, Utilities from slixfeed.version import __version__ from slixfeed.xmpp.bookmark import XmppBookmark -from slixfeed.xmpp.chat import Chat -from slixfeed.xmpp.connect import XmppConnect +from slixfeed.xmpp.chat import XmppChat, XmppChatTask +from slixfeed.xmpp.connect import XmppConnect, XmppConnectTask from slixfeed.xmpp.ipc import XmppIpcServer from slixfeed.xmpp.iq import XmppIQ from slixfeed.xmpp.message import XmppMessage -from slixfeed.xmpp.muc import XmppGroupchat +from slixfeed.xmpp.muc import XmppMuc +from slixfeed.xmpp.groupchat import XmppGroupchat from slixfeed.xmpp.presence import XmppPresence -from slixfeed.xmpp.privilege import is_moderator, is_operator, is_access +from slixfeed.xmpp.privilege import is_operator, is_access import slixfeed.xmpp.profile as profile -from slixfeed.xmpp.publish import XmppPubsub +from slixfeed.xmpp.publish import XmppPubsub, XmppPubsubAction, XmppPubsubTask from slixfeed.xmpp.roster import XmppRoster # import slixfeed.xmpp.service as service +from slixfeed.xmpp.status import XmppStatusTask from slixfeed.xmpp.upload import XmppUpload -from slixfeed.xmpp.utility import get_chat_type +from slixfeed.xmpp.utilities import XmppUtilities import sys import time @@ -80,13 +78,6 @@ try: except: import tomli as tomllib - -main_task = [] -jid_tasker = {} -task_manager = {} -loop = asyncio.get_event_loop() -# asyncio.set_event_loop(loop) - # time_now = datetime.now() # time_now = time_now.strftime("%H:%M:%S") @@ -230,10 +221,6 @@ class XmppClient(slixmpp.ClientXMPP): self.on_presence_subscribed) self.add_event_handler("presence_unsubscribed", self.on_presence_unsubscribed) - - # Initialize event loop - # self.loop = asyncio.get_event_loop() - self.add_event_handler('connection_failed', self.on_connection_failed) self.add_event_handler('session_end', @@ -252,7 +239,7 @@ class XmppClient(slixmpp.ClientXMPP): message_log = '{}: jid_full: {}' logger.debug(message_log.format(function_name, jid_full)) muc_jid = message['groupchat_invite']['jid'] - result = await XmppGroupchat.join(self, muc_jid) + result = await XmppMuc.join(self, muc_jid) if result == 'ban': message_body = '{} is banned from {}'.format(self.alias, muc_jid) jid_bare = message['from'].bare @@ -290,7 +277,7 @@ class XmppClient(slixmpp.ClientXMPP): message_log = '{}: jid_full: {}' logger.debug(message_log.format(function_name, jid_full)) muc_jid = message['groupchat_invite']['jid'] - result = await XmppGroupchat.join(self, muc_jid) + result = await XmppMuc.join(self, muc_jid) if result == 'ban': message_body = '{} is banned from {}'.format(self.alias, muc_jid) jid_bare = message['from'].bare @@ -342,21 +329,19 @@ class XmppClient(slixmpp.ClientXMPP): await self['xep_0115'].update_caps() # self.send_presence() await self.get_roster() - # XmppCommand.adhoc_commands(self) # self.service_reactions() - task.task_ping(self) - # NOTE This might take more memory due to - # function sqlite.get_unread_entries_of_feed - results = await XmppPubsub.get_pubsub_services(self) - for result in results + [{'jid' : self.boundjid.bare, - 'name' : self.alias}]: - jid_bare = result['jid'] - if jid_bare not in self.settings: - db_file = config.get_pathname_to_database(jid_bare) - Config.add_settings_jid(self.settings, jid_bare, db_file) - await task.start_tasks_xmpp_pubsub(self, jid_bare) + XmppConnectTask.ping(self) + # results = await XmppPubsub.get_pubsub_services(self) + # for result in results + [{'jid' : self.boundjid.bare, + # 'name' : self.alias}]: + # jid_bare = result['jid'] + # if jid_bare not in self.settings: + # db_file = config.get_pathname_to_database(jid_bare) + # Config.add_settings_jid(self.settings, jid_bare, db_file) + # await FeedTask.check_updates(self, jid_bare) + # XmppPubsubTask.task_publish(self, jid_bare) bookmarks = await XmppBookmark.get_bookmarks(self) - await action.xmpp_muc_autojoin(self, bookmarks) + await XmppGroupchat.autojoin(self, bookmarks) if 'ipc' in self.settings and self.settings['ipc']['bsd']: # Start Inter-Process Communication print('POSIX sockets: Initiating IPC server...') @@ -376,7 +361,7 @@ class XmppClient(slixmpp.ClientXMPP): profile.set_identity(self, 'client') self['xep_0115'].update_caps() bookmarks = await XmppBookmark.get_bookmarks(self) - await action.xmpp_muc_autojoin(self, bookmarks) + await XmppGroupchat.autojoin(self, bookmarks) time_end = time.time() difference = time_end - time_begin if difference > 1: logger.warning('{} (time: {})'.format(function_name, @@ -419,7 +404,7 @@ class XmppClient(slixmpp.ClientXMPP): XmppPresence.send(self, jid_bare, status_message) else: # TODO Request for subscription - # if (await get_chat_type(self, jid_bare) == 'chat' and + # if (await XmppUtilities.get_chat_type(self, jid_bare) == 'chat' and # not self.client_roster[jid_bare]['to']): # XmppPresence.subscription(self, jid_bare, 'subscribe') # await XmppRoster.add(self, jid_bare) @@ -434,7 +419,7 @@ class XmppClient(slixmpp.ClientXMPP): self.pending_tasks[jid_bare] = {} # if jid_full not in self.pending_tasks: # self.pending_tasks[jid_full] = {} - await Chat.process_message(self, message) + await XmppChat.process_message(self, message) # chat_type = message["type"] # message_body = message["body"] # message_reply = message.reply @@ -455,10 +440,14 @@ class XmppClient(slixmpp.ClientXMPP): if jid_bare in self.boundjid.bare: return if presence['show'] in ('away', 'dnd', 'xa'): - key_list = ['interval'] - task.clean_tasks_xmpp_chat(self, jid_bare, key_list) - key_list = ['status', 'check'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + if (jid_bare in self.task_manager and + 'interval' in self.task_manager[jid_bare]): + self.task_manager[jid_bare]['interval'].cancel() + else: + logger.debug('No task "interval" for JID {} (on_changed_status)' + .format(jid_bare)) + XmppStatusTask.restart_task(self, jid_bare) + FeedTask.restart_task(self, jid_bare) time_end = time.time() difference = time_end - time_begin if difference > 1: logger.warning('{} (time: {})'.format(function_name, @@ -516,16 +505,15 @@ class XmppClient(slixmpp.ClientXMPP): function_name = sys._getframe().f_code.co_name message_log = '{}: jid_full: {}' logger.debug(message_log.format(function_name, jid_full)) - # TODO Add function to check whether task is already running or not - # await task.start_tasks(self, presence) - # NOTE Already done inside the start-task function jid_bare = presence['from'].bare if jid_bare in self.boundjid.bare: return # FIXME TODO Find out what is the source responsible for a couple presences with empty message # NOTE This is a temporary solution await asyncio.sleep(10) - await task.start_tasks_xmpp_chat(self, jid_bare) + FeedTask.restart_task(self, jid_bare) + XmppChatTask.restart_task(self, jid_bare) + XmppStatusTask.restart_task(self, jid_bare) self.add_event_handler("presence_unavailable", self.on_presence_unavailable) time_end = time.time() @@ -563,8 +551,8 @@ class XmppClient(slixmpp.ClientXMPP): message_log = '{}: jid_full: {}' logger.debug(message_log.format(function_name, jid_full)) jid_bare = presence['from'].bare - # await task.stop_tasks(self, jid) - task.clean_tasks_xmpp_chat(self, jid_bare) + for task in ('check', 'interval', 'status'): + Task.stop(self, jid_bare, 'status') # NOTE Albeit nice to ~have~ see, this would constantly # send presence messages to server to no end. @@ -591,7 +579,8 @@ class XmppClient(slixmpp.ClientXMPP): message_log = '{}: jid_full: {}' logger.debug(message_log.format(function_name, jid_full)) jid_bare = presence["from"].bare - task.clean_tasks_xmpp_chat(self, jid_bare) + for task in ('check', 'interval', 'status'): + Task.stop(self, jid_bare, 'status') time_end = time.time() difference = time_end - time_begin if difference > 1: logger.warning('{} (time: {})'.format(function_name, @@ -618,8 +607,7 @@ class XmppClient(slixmpp.ClientXMPP): # self.send_presence(pto=jid) # task.clean_tasks_xmpp_chat(self, jid, ['status']) await asyncio.sleep(5) - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + XmppStatusTask.restart_task(self, jid_bare) time_end = time.time() difference = time_end - time_begin if difference > 10: logger.warning('{} (time: {})'.format(function_name, @@ -627,6 +615,7 @@ class XmppClient(slixmpp.ClientXMPP): async def on_chatstate_composing(self, message): + # print('on_chatstate_composing START') time_begin = time.time() jid_full = str(message['from']) function_name = sys._getframe().f_code.co_name @@ -642,13 +631,14 @@ class XmppClient(slixmpp.ClientXMPP): status_message = ('💡 Send "help" for manual, or "info" for ' 'information.') XmppPresence.send(self, jid_bare, status_message) + # print('on_chatstate_composing FINISH') time_end = time.time() difference = time_end - time_begin if difference > 1: logger.warning('{} (time: {})'.format(function_name, difference)) - async def on_chatstate_gone(self, message): + def on_chatstate_gone(self, message): time_begin = time.time() jid_full = str(message['from']) function_name = sys._getframe().f_code.co_name @@ -658,16 +648,14 @@ class XmppClient(slixmpp.ClientXMPP): if jid_bare in self.boundjid.bare: return if message['type'] in ('chat', 'normal'): - # task.clean_tasks_xmpp_chat(self, jid, ['status']) - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + XmppStatusTask.restart_task(self, jid_bare) time_end = time.time() difference = time_end - time_begin if difference > 1: logger.warning('{} (time: {})'.format(function_name, difference)) - async def on_chatstate_inactive(self, message): + def on_chatstate_inactive(self, message): time_begin = time.time() jid_full = str(message['from']) function_name = sys._getframe().f_code.co_name @@ -677,16 +665,14 @@ class XmppClient(slixmpp.ClientXMPP): if jid_bare in self.boundjid.bare: return if message['type'] in ('chat', 'normal'): - # task.clean_tasks_xmpp_chat(self, jid, ['status']) - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + XmppStatusTask.restart_task(self, jid_bare) time_end = time.time() difference = time_end - time_begin if difference > 1: logger.warning('{} (time: {})'.format(function_name, difference)) - async def on_chatstate_paused(self, message): + def on_chatstate_paused(self, message): time_begin = time.time() jid_full = str(message['from']) function_name = sys._getframe().f_code.co_name @@ -696,9 +682,7 @@ class XmppClient(slixmpp.ClientXMPP): if jid_bare in self.boundjid.bare: return if message['type'] in ('chat', 'normal'): - # task.clean_tasks_xmpp_chat(self, jid, ['status']) - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + XmppStatusTask.restart_task(self, jid_bare) time_end = time.time() difference = time_end - time_begin if difference > 1: logger.warning('{} (time: {})'.format(function_name, @@ -857,7 +841,7 @@ class XmppClient(slixmpp.ClientXMPP): logger.debug('{}: jid_full: {}' .format(function_name, jid_full)) jid_bare = session['from'].bare - chat_type = await get_chat_type(self, jid_bare) + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) if is_access(self, jid_bare, jid_full, chat_type): form = self['xep_0004'].make_form('form', 'PubSub') form['instructions'] = 'Publish news items to PubSub nodes.' @@ -898,7 +882,7 @@ class XmppClient(slixmpp.ClientXMPP): logger.debug('{}: jid_full: {}' .format(function_name, jid_full)) jid_bare = session['from'].bare - chat_type = await get_chat_type(self, jid_bare) + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) if is_access(self, jid_bare, jid_full, chat_type): values = payload['values'] form = self['xep_0004'].make_form('form', 'Publish') @@ -1095,7 +1079,7 @@ class XmppClient(slixmpp.ClientXMPP): # xep = None for ix in ixs: - await action.xmpp_pubsub_send_selected_entry(self, jid, jid_bare, node_id, ix) + await XmppPubsubAction.send_selected_entry(self, jid, node_id, ix) text_info = 'Posted {} entries.'.format(len(ixs)) session['allow_prev'] = False session['has_next'] = False @@ -1143,7 +1127,7 @@ class XmppClient(slixmpp.ClientXMPP): if not result['error']: document = result['content'] feed = parse(document) - if action.is_feed(url, feed): + if Feed.is_feed(url, feed): form['instructions'] = 'Select entries to publish.' options = form.add_field(desc='Select entries to post.', ftype='list-multi', @@ -1408,7 +1392,7 @@ class XmppClient(slixmpp.ClientXMPP): logger.debug('{}: jid_full: {}' .format(function_name, jid_full)) jid_bare = session['from'].bare - chat_type = await get_chat_type(self, jid_bare) + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) if is_access(self, jid_bare, jid_full, chat_type): jid = session['from'].bare db_file = config.get_pathname_to_database(jid_bare) @@ -1517,7 +1501,7 @@ class XmppClient(slixmpp.ClientXMPP): logger.debug('{}: jid_full: {}' .format(function_name, jid_full)) jid_bare = session['from'].bare - chat_type = await get_chat_type(self, jid_bare) + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) if is_access(self, jid_bare, jid_full, chat_type): form = self['xep_0004'].make_form('form', 'Subscribe') # form['instructions'] = 'Add a new custom subscription.' @@ -1714,7 +1698,7 @@ class XmppClient(slixmpp.ClientXMPP): # summary = action.get_document_content_as_text(data) summary = sqlite.get_entry_summary(db_file, ix) summary = summary[0] - summary = action.remove_html_tags(summary) if summary else 'No content to show.' + summary = Html.remove_html_tags(summary) if summary else 'No content to show.' form.add_field(ftype="text-multi", label='Article', value=summary) @@ -1821,8 +1805,8 @@ class XmppClient(slixmpp.ClientXMPP): identifier = hostname + ':' + str(counter) else: break - result = await action.add_feed(self, jid_bare, db_file, url, - identifier) + result = await Feed.add_feed(self, jid_bare, db_file, url, + identifier) if result['error']: error_count += 1 elif result['exist']: @@ -1854,8 +1838,8 @@ class XmppClient(slixmpp.ClientXMPP): identifier = hostname + ':' + str(counter) else: break - result = await action.add_feed(self, jid_bare, db_file, url, - identifier) + result = await Feed.add_feed(self, jid_bare, db_file, url, + identifier) # URL is not a feed and URL has returned to feeds if isinstance(result, list): results = result @@ -2037,7 +2021,7 @@ class XmppClient(slixmpp.ClientXMPP): logger.debug('{}: jid_full: {}' .format(function_name, jid_full)) jid_bare = session['from'].bare - chat_type = await get_chat_type(self, jid_bare) + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) if is_access(self, jid_bare, jid_full, chat_type): form = self['xep_0004'].make_form('form', 'Discover & Search') form['instructions'] = 'Discover news subscriptions of all kinds' @@ -2161,7 +2145,7 @@ class XmppClient(slixmpp.ClientXMPP): logger.debug('{}: jid_full: {}' .format(function_name, jid_full)) jid_bare = session['from'].bare - chat_type = await get_chat_type(self, jid_bare) + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) if is_access(self, jid_bare, jid_full, chat_type): form = self['xep_0004'].make_form('form', 'Subscriptions') form['instructions'] = ('Browse, view, toggle or remove ' @@ -2521,7 +2505,7 @@ class XmppClient(slixmpp.ClientXMPP): logger.debug('{}: jid_full: {}' .format(function_name, jid_full)) jid_bare = session['from'].bare - chat_type = await get_chat_type(self, jid_bare) + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) if is_access(self, jid_bare, jid_full, chat_type): form = self['xep_0004'].make_form('form', 'Advanced') form['instructions'] = 'Extended options' @@ -2905,9 +2889,9 @@ class XmppClient(slixmpp.ClientXMPP): # form['instructions'] = ('✅️ Feeds have been exported') exts = values['filetype'] for ext in exts: - filename = action.export_feeds(self, jid_bare, ext) + filename = Feed.export_feeds(jid_bare, ext) url = await XmppUpload.start(self, jid_bare, filename) - chat_type = await get_chat_type(self, jid_bare) + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) XmppMessage.send_oob(self, jid_bare, url, chat_type) url_field = form.add_field(var=ext.upper(), ftype='text-single', @@ -2930,12 +2914,12 @@ class XmppClient(slixmpp.ClientXMPP): .format(function_name, jid_full)) jid_bare = session['from'].bare jid_full = str(session['from']) - chat_type = await get_chat_type(self, jid_bare) + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) if is_access(self, jid_bare, jid_full, chat_type): form = self['xep_0004'].make_form('form', 'Subscribe') # NOTE Refresh button would be of use form['instructions'] = 'Featured subscriptions' - url = action.pick_a_feed() + url = Utilities.pick_a_feed() # options = form.add_field(desc='Click to subscribe.', # ftype="boolean", # label='Subscribe to {}?'.format(url['name']), @@ -2948,7 +2932,7 @@ class XmppClient(slixmpp.ClientXMPP): label='Subscribe', var='subscription') for i in range(10): - url = action.pick_a_feed() + url = Utilities.pick_a_feed() options.addOption(url['name'], url['link']) # jid_bare = session['from'].bare if '@' in jid_bare: @@ -3114,7 +3098,7 @@ class XmppClient(slixmpp.ClientXMPP): var=jid_bare) session['allow_complete'] = True session['has_next'] = False - session['next'] = self._handle_pubsubs_complete + session['next'] = self._handle_pubsub_complete # session['allow_prev'] = True session['payload'] = form # session['prev'] = self._handle_advanced @@ -3256,11 +3240,12 @@ class XmppClient(slixmpp.ClientXMPP): content = '' # TODO Check whether element of type Atom + # NOTE Consider pubsub#type of XEP-0462: PubSub Type Filtering atom_entry = iq['pubsub']['items']['item']['payload'] for element in atom_entry: if element.text: content += element.text + '\n\n' - # content += action.remove_html_tags(element.text) + '\n\n' + # content += Html.remove_html_tags(element.text) + '\n\n' if element.attrib: for i in element.attrib: content += element.attrib[i] + '\n\n' @@ -3273,6 +3258,7 @@ class XmppClient(slixmpp.ClientXMPP): session['payload'] = form return session + # FIXME Undefined name 'jid_bare' async def _handle_node_edit(self, payload, session): jid_full = str(session['from']) function_name = sys._getframe().f_code.co_name @@ -3282,6 +3268,7 @@ class XmppClient(slixmpp.ClientXMPP): jid = values['jid'][0] node = values['node'] properties = await XmppPubsub.get_node_properties(self, jid, node) + form = self['xep_0004'].make_form('form', 'PubSub') form['instructions'] = 'Editing bookmark' jid_split = properties['jid'].split('@') room = jid_split[0] @@ -3361,7 +3348,7 @@ class XmppClient(slixmpp.ClientXMPP): return session - async def _handle_pubsubs_complete(self, payload, session): + async def _handle_pubsub_complete(self, payload, session): jid_full = str(session['from']) function_name = sys._getframe().f_code.co_name logger.debug('{}: jid_full: {}' @@ -3632,7 +3619,7 @@ class XmppClient(slixmpp.ClientXMPP): logger.debug('{}: jid_full: {}' .format(function_name, jid_full)) jid_bare = session['from'].bare - chat_type = await get_chat_type(self, jid_bare) + chat_type = await XmppUtilities.get_chat_type(self, jid_bare) if is_access(self, jid_bare, jid_full, chat_type): db_file = config.get_pathname_to_database(jid_bare) if jid_bare not in self.settings: @@ -3783,15 +3770,16 @@ class XmppClient(slixmpp.ClientXMPP): XmppPresence.send(self, jid_bare, status_message, status_type=status_type) await asyncio.sleep(5) - key_list = ['check', 'status', 'interval'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + FeedTask.restart_task(self, jid_bare) + XmppChatTask.restart_task(self, jid_bare) + XmppStatusTask.restart_task(self, jid_bare) if (key == 'enabled' and val == 0 and str(is_enabled) == 1): logger.info('Slixfeed has been disabled for {}'.format(jid_bare)) - key_list = ['interval', 'status'] - task.clean_tasks_xmpp_chat(self, jid_bare, key_list) + for task in ('interval', 'status'): + Task.stop(self, jid_bare, 'status') status_type = 'xa' status_message = '📪️ Send "Start" to receive updates' XmppPresence.send(self, jid_bare, status_message, @@ -3800,22 +3788,6 @@ class XmppClient(slixmpp.ClientXMPP): await Config.set_setting_value(self.settings, jid_bare, db_file, key, val) val = self.settings[jid_bare][key] - # if key == 'enabled': - # if str(setting.enabled) == 0: - # status_type = 'available' - # status_message = '📫️ Welcome back!' - # XmppPresence.send(self, jid, status_message, - # status_type=status_type) - # await asyncio.sleep(5) - # await task.start_tasks_xmpp_chat(self, jid, ['check', 'status', - # 'interval']) - # else: - # task.clean_tasks_xmpp_chat(self, jid, ['interval', 'status']) - # status_type = 'xa' - # status_message = '📪️ Send "Start" to receive Jabber updates' - # XmppPresence.send(self, jid, status_message, - # status_type=status_type) - if key in ('enabled', 'media', 'old'): if val == '1': val = 'Yes' @@ -3828,17 +3800,6 @@ class XmppClient(slixmpp.ClientXMPP): val = int(val) val = str(val) - # match value: - # case 'enabled': - # pass - # case 'interval': - # pass - - # result = '{}: {}'.format(key.capitalize(), val) - - # form.add_field(var=key, - # ftype='fixed', - # label=result) form = payload form['title'] = 'Done' form['instructions'] = 'has been completed!' diff --git a/slixfeed/xmpp/commands.py b/slixfeed/xmpp/commands.py index b4f3db0..03632d1 100644 --- a/slixfeed/xmpp/commands.py +++ b/slixfeed/xmpp/commands.py @@ -1,28 +1,25 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import asyncio from feedparser import parse from random import randrange -import slixfeed.action as action import slixfeed.config as config -import slixfeed.crawl as crawl from slixfeed.config import Config +import slixfeed.crawl as crawl import slixfeed.dt as dt import slixfeed.fetch as fetch -from slixfeed.opml import Opml +from slixfeed.log import Logger import slixfeed.sqlite as sqlite -import slixfeed.task as task +from slixfeed.syndication import Feed, Opml import slixfeed.url as uri +from slixfeed.utilities import Documentation, Utilities from slixfeed.version import __version__ from slixfeed.xmpp.bookmark import XmppBookmark -from slixfeed.log import Logger -from slixfeed.xmpp.muc import XmppGroupchat -from slixfeed.xmpp.message import XmppMessage -from slixfeed.xmpp.publish import XmppPubsub +from slixfeed.xmpp.muc import XmppMuc +from slixfeed.xmpp.publish import XmppPubsub, XmppPubsubAction from slixfeed.xmpp.presence import XmppPresence -from slixfeed.xmpp.upload import XmppUpload -from slixfeed.xmpp.utility import get_chat_type +from slixfeed.xmpp.status import XmppStatusTask +from slixfeed.xmpp.utilities import XmppUtilities import sys try: @@ -44,20 +41,20 @@ class XmppCommands: def print_help(): - result = action.manual('commands.toml') + result = Documentation.manual('commands.toml') message = '\n'.join(result) return message def print_help_list(): - command_list = action.manual('commands.toml', section='all') + command_list = Documentation.manual('commands.toml', section='all') message = ('Complete list of commands:\n' '```\n{}\n```'.format(command_list)) return message def print_help_specific(command_root, command_name): - command_list = action.manual('commands.toml', + command_list = Documentation.manual('commands.toml', section=command_root, command=command_name) if command_list: @@ -69,7 +66,7 @@ class XmppCommands: def print_help_key(command): - command_list = action.manual('commands.toml', command) + command_list = Documentation.manual('commands.toml', command) if command_list: command_list = ' '.join(command_list) message = ('Available command `{}` keys:\n' @@ -146,22 +143,24 @@ class XmppCommands: document = result['content'] feed = parse(document) feed_valid = 0 if feed.bozo else 1 - await sqlite.update_feed_validity(db_file, feed_id, feed_valid) + await sqlite.update_feed_validity( + db_file, feed_id, feed_valid) if feed.has_key('updated_parsed'): feed_updated = feed.updated_parsed try: - feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) + feed_updated = dt.convert_struct_time_to_iso8601( + feed_updated) except: feed_updated = None else: feed_updated = None - feed_properties = action.get_properties_of_feed( + feed_properties = Feed.get_properties_of_feed( db_file, feed_id, feed) await sqlite.update_feed_properties(db_file, feed_id, feed_properties) feed_id = sqlite.get_feed_id(db_file, url) feed_id = feed_id[0] - new_entries = action.get_properties_of_entries( + new_entries = Feed.get_properties_of_entries( jid_bare, db_file, url, feed_id, feed) if new_entries: await sqlite.add_entries_and_update_feed_state( @@ -179,8 +178,7 @@ class XmppCommands: # if old: # # task.clean_tasks_xmpp_chat(self, jid_bare, ['status']) # # await send_status(jid) - # key_list = ['status'] - # await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + # Task.start(self, jid_bare, 'status') # else: # feed_id = sqlite.get_feed_id(db_file, url) # feed_id = feed_id[0] @@ -252,7 +250,7 @@ class XmppCommands: message = ('Maximum archived items has been set to {} (was: {}).' .format(val_new, val_old)) except: - message = ('No action has been taken. Enter a numeric value only.') + message = 'No action has been taken. Enter a numeric value only.' return message @@ -332,8 +330,8 @@ class XmppCommands: await sqlite.set_filter_value(db_file, ['deny', val]) - def export_feeds(self, jid_bare, ext): - filename = action.export_feeds(self, jid_bare, ext) + def export_feeds(jid_bare, ext): + filename = Feed.export_feeds(jid_bare, ext) message = 'Feeds successfuly exported to {}.'.format(ext) return filename, message @@ -366,14 +364,14 @@ class XmppCommands: # This is similar to send_next_update - async def pubsub_send(self, info, jid): + async def pubsub_send(self, info, jid_bare): # if num: # report = await action.xmpp_pubsub_send_unread_items( # self, jid, num) # else: # report = await action.xmpp_pubsub_send_unread_items( # self, jid) - result = await action.xmpp_pubsub_send_unread_items(self, jid) + result = await XmppPubsubAction.send_unread_items(self, jid_bare) message = '' for url in result: if result[url]: @@ -416,12 +414,13 @@ class XmppCommands: # self.pending_tasks[jid_bare][self.pending_tasks_counter] = status_message XmppPresence.send(self, jid_bare, status_message, status_type=status_type) - if url.startswith('feed:/') or url.startswith('itpc:/') or url.startswith('rss:/'): + if (url.startswith('feed:/') or + url.startswith('itpc:/') or + url.startswith('rss:/')): url = uri.feed_to_http(url) url = (await uri.replace_hostname(url, 'feed')) or url - result = await action.add_feed(self, jid_bare, - db_file, url, - identifier) + result = await Feed.add_feed(self, jid_bare, db_file, url, + identifier) if isinstance(result, list): results = result message = "Syndication feeds found for {}\n\n```\n".format(url) @@ -457,8 +456,7 @@ class XmppCommands: del self.pending_tasks[jid_bare][pending_tasks_num] # del self.pending_tasks[jid_bare][self.pending_tasks_counter] print(self.pending_tasks) - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + XmppStatusTask.restart_task(self, jid_bare) # except: # response = ( # '> {}\nNews source is in the process ' @@ -494,8 +492,7 @@ class XmppCommands: else: break # try: - result = await action.add_feed(self, jid_bare, db_file, url, - identifier) + result = await Feed.add_feed(self, jid_bare, db_file, url, identifier) if isinstance(result, list): results = result message = ("Syndication feeds found for {}\n\n```\n" @@ -547,7 +544,7 @@ class XmppCommands: elif query: message = "No feeds were found for: {}".format(query) else: - url = action.pick_a_feed() + url = Utilities.pick_a_feed() message = ('List of subscriptions is empty. ' 'To add a feed, send a URL.\n' 'Featured news: *{}*\n{}' @@ -569,19 +566,16 @@ class XmppCommands: self.settings, jid_bare, 'interval') await Config.set_setting_value( self.settings, jid_bare, db_file, 'interval', val_new) - # NOTE Perhaps this should be replaced by functions - # clean and start - task.refresh_task(self, jid_bare, - task.task_message, 'interval', val_new) message = ('Updates will be sent every {} minutes ' '(was: {}).'.format(val_new, val_old)) - except: + except Exception as e: + logger.error(str(e)) message = ('No action has been taken. Enter a numeric value only.') return message async def muc_leave(self, jid_bare): - XmppGroupchat.leave(self, jid_bare) + XmppMuc.leave(self, jid_bare) await XmppBookmark.remove(self, jid_bare) @@ -590,7 +584,7 @@ class XmppCommands: muc_jid = uri.check_xmpp_uri(command) if muc_jid: # TODO probe JID and confirm it's a groupchat - result = await XmppGroupchat.join(self, muc_jid) + result = await XmppMuc.join(self, muc_jid) # await XmppBookmark.add(self, jid=muc_jid) if result == 'ban': message = '{} is banned from {}'.format(self.alias, muc_jid) @@ -693,15 +687,6 @@ class XmppCommands: return message - async def send_next_update(self, jid_bare, command): - """Warning! Not to be interfaced with IPC""" - num = command[5:] - if num: - await action.xmpp_chat_send_unread_items(self, jid_bare, num) - else: - await action.xmpp_chat_send_unread_items(self, jid_bare) - - def print_options(self, jid_bare): message = '' for key in self.settings[jid_bare]: @@ -761,8 +746,8 @@ class XmppCommands: if not result['error']: document = result['content'] feed = parse(document) - if action.is_feed(url, feed): - message = action.view_feed(url, feed) + if Feed.is_feed(url, feed): + message = Feed.view_feed(url, feed) break else: result = await crawl.probe_page(url, document) @@ -797,8 +782,8 @@ class XmppCommands: document = result['content'] status = result['status_code'] feed = parse(document) - if action.is_feed(url, feed): - message = action.view_entry(url, feed, num) + if Feed.is_feed(url, feed): + message = Feed.view_entry(url, feed, num) break else: result = await crawl.probe_page(url, document) @@ -901,7 +886,7 @@ class XmppCommands: return message - async def mark_as_read(self, jid_bare, db_file, ix_url=None): + async def mark_as_read(jid_bare, db_file, ix_url=None): if ix_url: sub_marked = [] url_invalid = [] @@ -941,14 +926,12 @@ class XmppCommands: message += '\nThe following indexes do not exist:\n\n{}\n'.format(ixs) message += '\n```' else: - message = ('No action has been taken.' - '\n' - 'Missing argument. ' - 'Enter a subscription URL or index number.') + await sqlite.mark_all_as_read(db_file) + message = 'All subscriptions have been marked as read.' return message - async def search_items(self, db_file, query): + async def search_items(db_file, query): if query: if len(query) > 3: results = sqlite.search_entries(db_file, query) @@ -970,10 +953,12 @@ class XmppCommands: return message - async def scheduler_start(self, db_file, jid_bare): + # Tasks are classes which are passed to this function + # On an occasion in which they would have returned, variable "tasks" might be called "callback" + async def scheduler_start(self, db_file, jid_bare, tasks): await Config.set_setting_value(self.settings, jid_bare, db_file, 'enabled', 1) - key_list = ['check', 'status', 'interval'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + for task in tasks: + task.restart_task(self, jid_bare) message = 'Updates are enabled.' return message @@ -981,8 +966,13 @@ class XmppCommands: async def scheduler_stop(self, db_file, jid_bare): await Config.set_setting_value( self.settings, jid_bare, db_file, 'enabled', 0) - key_list = ['interval', 'status'] - task.clean_tasks_xmpp_chat(self, jid_bare, key_list) + for task in ('interval', 'status'): + if (jid_bare in self.task_manager and + task in self.task_manager[jid_bare]): + self.task_manager[jid_bare][task].cancel() + else: + logger.debug('No task {} for JID {} (Task.stop)' + .format(task, jid_bare)) message = 'Updates are disabled.' return message @@ -1037,8 +1027,7 @@ class XmppCommands: except: message = ('No action has been taken. No news source with index {}.' .format(feed_id)) - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + XmppStatusTask.restart_task(self, jid_bare) return message @@ -1102,7 +1091,7 @@ class XmppCommands: async def invite_jid_to_muc(self, jid_bare): muc_jid = 'slixfeed@chat.woodpeckersnest.space' - if await get_chat_type(self, jid_bare) == 'chat': + if await XmppUtilities.get_chat_type(self, jid_bare) == 'chat': self.plugin['xep_0045'].invite(muc_jid, jid_bare) diff --git a/slixfeed/xmpp/connect.py b/slixfeed/xmpp/connect.py index 1d90f48..0978ad4 100644 --- a/slixfeed/xmpp/connect.py +++ b/slixfeed/xmpp/connect.py @@ -15,9 +15,11 @@ TODO import asyncio from slixfeed.dt import current_time +from slixfeed.log import Logger from slixmpp.exceptions import IqTimeout, IqError from time import sleep -import logging + +logger = Logger(__name__) class XmppConnect: @@ -45,21 +47,21 @@ class XmppConnect: rtt = await self['xep_0199'].ping(jid, ifrom=jid_from, timeout=10) - logging.info('Success! RTT: %s', rtt) + logger.info('Success! RTT: %s', rtt) except IqError as e: - logging.error('Error pinging %s: %s', jid, + logger.error('Error pinging %s: %s', jid, e.iq['error']['condition']) except IqTimeout: - logging.warning('No response from %s', jid) + logger.warning('No response from %s', jid) if not rtt: - logging.warning('Disconnecting...') + logger.warning('Disconnecting...') self.disconnect() break await asyncio.sleep(60 * 1) def recover(self, message): - logging.warning(message) + logger.warning(message) print(current_time(), message, 'Attempting to reconnect.') self.connection_attempts += 1 # if self.connection_attempts <= self.max_connection_attempts: @@ -78,10 +80,20 @@ class XmppConnect: def inspect(self): - print('Disconnected\n' - 'Reconnecting...') + print('Disconnected\nReconnecting...') try: self.reconnect except: self.disconnect() print('Problem reconnecting') + + +class XmppConnectTask: + + + def ping(self): + try: + self.task_ping_instance.cancel() + except: + logger.info('No ping task to cancel.') + self.task_ping_instance = asyncio.create_task(XmppConnect.ping(self)) diff --git a/slixfeed/xmpp/groupchat.py b/slixfeed/xmpp/groupchat.py new file mode 100644 index 0000000..61d3e44 --- /dev/null +++ b/slixfeed/xmpp/groupchat.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + +TODO + +1) Send message to inviter that bot has joined to groupchat. + +2) If groupchat requires captcha, send the consequent message. + +3) If groupchat error is received, send that error message to inviter. + +FIXME + +1) Save name of groupchat instead of jid as name + +""" +from slixfeed.xmpp.bookmark import XmppBookmark +from slixfeed.xmpp.muc import XmppMuc +from slixfeed.log import Logger + +logger = Logger(__name__) + + +class XmppGroupchat: + + async def autojoin(self, bookmarks): + for bookmark in bookmarks: + if bookmark["jid"] and bookmark["autojoin"]: + if not bookmark["nick"]: + bookmark["nick"] = self.alias + logger.error('Alias (i.e. Nicknname) is missing for ' + 'bookmark {}'.format(bookmark['name'])) + alias = bookmark["nick"] + muc_jid = bookmark["jid"] + result = await XmppMuc.join(self, muc_jid, alias) + print(result) + if result == 'ban': + await XmppBookmark.remove(self, muc_jid) + logger.warning('{} is banned from {}'.format(self.alias, muc_jid)) + logger.warning('Groupchat {} has been removed from bookmarks' + .format(muc_jid)) + else: + logger.info('Autojoin groupchat\n' + 'Name : {}\n' + 'JID : {}\n' + 'Alias : {}\n' + .format(bookmark["name"], + bookmark["jid"], + bookmark["nick"])) + elif not bookmark["jid"]: + logger.error('JID is missing for bookmark {}' + .format(bookmark['name'])) diff --git a/slixfeed/xmpp/ipc.py b/slixfeed/xmpp/ipc.py index f50ba8b..123bbe5 100644 --- a/slixfeed/xmpp/ipc.py +++ b/slixfeed/xmpp/ipc.py @@ -12,7 +12,11 @@ socket (i.e. clients[fd]) from the respective client. import asyncio import os import slixfeed.config as config +from slixfeed.syndication import FeedTask +from slixfeed.xmpp.chat import XmppChatTask from slixfeed.xmpp.commands import XmppCommands +from slixfeed.xmpp.chat import XmppChatAction +from slixfeed.xmpp.status import XmppStatusTask import socket class XmppIpcServer: @@ -86,7 +90,7 @@ class XmppIpcServer: else: command = data match command: - case _ if command.startswith('add '): + case _ if command.startswith('add'): command = command[4:] url = command.split(' ')[0] title = ' '.join(command.split(' ')[1:]) @@ -130,10 +134,10 @@ class XmppIpcServer: self, muc_jid) case 'bookmarks': response = await XmppCommands.print_bookmarks(self) - case _ if command.startswith('clear '): + case _ if command.startswith('clear'): key = command[6:] response = await XmppCommands.clear_filter(db_file, key) - case _ if command.startswith('default '): + case _ if command.startswith('default'): key = command[8:] response = await XmppCommands.restore_default( self, jid_bare, key=None) @@ -163,10 +167,10 @@ class XmppIpcServer: response = ('No action has been taken.' '\n' 'Missing keywords.') - case _ if command.startswith('disable '): + case _ if command.startswith('disable'): response = await XmppCommands.feed_disable( self, db_file, jid_bare, command) - case _ if command.startswith('enable '): + case _ if command.startswith('enable'): response = await XmppCommands.feed_enable( self, db_file, command) case _ if command.startswith('export'): @@ -207,12 +211,12 @@ class XmppIpcServer: case 'pubsub list': response = await XmppCommands.pubsub_list( self, jid_bare) - case _ if command.startswith('pubsub list '): + case _ if command.startswith('pubsub list'): jid = command[12:] response = 'List of nodes for {}:\n```\n'.format(jid) response = await XmppCommands.pubsub_list(self, jid) response += '```' - case _ if command.startswith('pubsub send '): + case _ if command.startswith('pubsub send'): info = command[12:] info = info.split(' ') jid = info[0] @@ -233,6 +237,7 @@ class XmppIpcServer: if val: response = await XmppCommands.set_interval( self, db_file, jid_bare, val) + XmppChatTask.restart_task(self, jid_bare) else: response = 'Current value for interval: ' response += XmppCommands.get_interval(self, jid_bare) @@ -257,12 +262,13 @@ class XmppIpcServer: response = await XmppCommands.set_old_off( self, jid_bare, db_file) case _ if command.startswith('next'): - await XmppCommands.send_next_update(self, jid_bare, command) - case _ if command.startswith('node delete '): + num = command[5:] + await XmppChatAction.send_unread_items(self, jid_bare, num) + case _ if command.startswith('node delete'): info = command[12:] info = info.split(' ') response = XmppCommands.node_delete(self, info) - case _ if command.startswith('node purge '): + case _ if command.startswith('node purge'): info = command[11:] info = info.split(' ') response = XmppCommands.node_purge(self, info) @@ -284,7 +290,7 @@ class XmppIpcServer: self, jid_bare) case 'random': response = XmppCommands.set_random(self, jid_bare, db_file) - case _ if command.startswith('read '): + case _ if command.startswith('read'): data = command[5:] data = data.split() url = data[0] @@ -305,26 +311,26 @@ class XmppIpcServer: response += result else: response = result - case _ if command.startswith('remove '): + case _ if command.startswith('remove'): ix_url = command[7:] ix_url = ix_url.split(' ') response = await XmppCommands.feed_remove( self, jid_bare, db_file, ix_url) - case _ if command.startswith('rename '): + case _ if command.startswith('rename'): response = await XmppCommands.feed_rename( self, db_file, jid_bare, command) case _ if command.startswith('reset'): ix_url = command[6:] ix_url = ix_url.split(' ') response = await XmppCommands.mark_as_read( - self, jid_bare, db_file, ix_url) + jid_bare, db_file, ix_url) case _ if command.startswith('search'): query = command[7:] - response = XmppCommands.search_items( - self, db_file, query) + response = XmppCommands.search_items(db_file, query) case 'start': + tasks = (FeedTask, XmppChatTask, XmppStatusTask) response = await XmppCommands.scheduler_start( - self, db_file, jid_bare) + self, db_file, jid_bare, tasks) case 'stats': response = XmppCommands.print_statistics(db_file) case 'stop': diff --git a/slixfeed/xmpp/iq.py b/slixfeed/xmpp/iq.py index 479996e..5081a2a 100644 --- a/slixfeed/xmpp/iq.py +++ b/slixfeed/xmpp/iq.py @@ -1,17 +1,19 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import logging +from slixfeed.log import Logger from slixmpp.exceptions import IqError, IqTimeout +logger = Logger(__name__) + class XmppIQ: async def send(self, iq): try: await iq.send(timeout=15) except IqTimeout as e: - logging.error('Error Timeout') - logging.error(str(e)) + logger.error('Error Timeout') + logger.error(str(e)) except IqError as e: - logging.error('Error XmppIQ') - logging.error(str(e)) + logger.error('Error XmppIQ') + logger.error(str(e)) diff --git a/slixfeed/xmpp/message.py b/slixfeed/xmpp/message.py index 9f4c002..b5ae0b0 100644 --- a/slixfeed/xmpp/message.py +++ b/slixfeed/xmpp/message.py @@ -1,24 +1,11 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import logging -import os -# import slixfeed.action as action -import slixfeed.config as config -from slixfeed.dt import current_time, timestamp -import slixfeed.fetch as fetch -import slixfeed.sqlite as sqlite -import slixfeed.task as task -import slixfeed.url as uri -from slixfeed.xmpp.bookmark import XmppBookmark -# from slixfeed.xmpp.muc import XmppGroupchat -# from slixfeed.xmpp.message import XmppMessage -from slixfeed.xmpp.presence import XmppPresence -from slixfeed.xmpp.upload import XmppUpload -from slixfeed.xmpp.utility import get_chat_type -import time +from slixfeed.log import Logger import xml.sax.saxutils as saxutils +logger = Logger(__name__) + """ NOTE diff --git a/slixfeed/xmpp/muc.py b/slixfeed/xmpp/muc.py deleted file mode 100644 index af9c8ab..0000000 --- a/slixfeed/xmpp/muc.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" - -TODO - -1) Send message to inviter that bot has joined to groupchat. - -2) If groupchat requires captcha, send the consequent message. - -3) If groupchat error is received, send that error message to inviter. - -FIXME - -1) Save name of groupchat instead of jid as name - -""" -import logging -from slixmpp.exceptions import IqError, IqTimeout, PresenceError - -class XmppGroupchat: - - async def join(self, jid, alias=None, password=None): - # token = await initdb( - # muc_jid, - # sqlite.get_setting_value, - # "token" - # ) - # if token != "accepted": - # token = randrange(10000, 99999) - # await initdb( - # muc_jid, - # sqlite.update_setting_value, - # ["token", token] - # ) - # self.send_message( - # mto=inviter, - # mfrom=self.boundjid.bare, - # mbody=( - # "Send activation token {} to groupchat xmpp:{}?join." - # ).format(token, muc_jid) - # ) - logging.info('Joining groupchat\n' - 'JID : {}\n' - .format(jid)) - jid_from = str(self.boundjid) if self.is_component else None - if alias == None: self.alias - try: - await self.plugin['xep_0045'].join_muc_wait(jid, - alias, - presence_options = {"pfrom" : jid_from}, - password=password, - maxchars=0, - maxstanzas=0, - seconds=0, - since=0, - timeout=30) - result = 'joined ' + jid - except IqError as e: - logging.error('Error XmppIQ') - logging.error(str(e)) - logging.error(jid) - result = 'error' - except IqTimeout as e: - logging.error('Timeout XmppIQ') - logging.error(str(e)) - logging.error(jid) - result = 'timeout' - except PresenceError as e: - logging.error('Error Presence') - logging.error(str(e)) - if (e.condition == 'forbidden' and - e.presence['error']['code'] == '403'): - logging.warning('{} is banned from {}'.format(self.alias, jid)) - result = 'ban' - else: - result = 'error' - return result - - - def leave(self, jid): - jid_from = str(self.boundjid) if self.is_component else None - message = ('This news bot will now leave this groupchat.\n' - 'The JID of this news bot is xmpp:{}?message' - .format(self.boundjid.bare)) - status_message = ('This bot has left the group. ' - 'It can be reached directly via {}' - .format(self.boundjid.bare)) - self.send_message(mto=jid, - mfrom=self.boundjid, - mbody=message, - mtype='groupchat') - self.plugin['xep_0045'].leave_muc(jid, - self.alias, - status_message, - jid_from) diff --git a/slixfeed/xmpp/profile.py b/slixfeed/xmpp/profile.py index 0fb4b97..93dc49e 100644 --- a/slixfeed/xmpp/profile.py +++ b/slixfeed/xmpp/profile.py @@ -28,10 +28,12 @@ TODO import glob from slixfeed.config import Config import slixfeed.config as config +from slixfeed.log import Logger from slixmpp.exceptions import IqTimeout, IqError -import logging import os +logger = Logger(__name__) + # class XmppProfile: async def update(self): @@ -39,19 +41,19 @@ async def update(self): try: await set_vcard(self) except IqTimeout as e: - logging.error('Profile vCard: Error Timeout') - logging.error(str(e)) + logger.error('Profile vCard: Error Timeout') + logger.error(str(e)) except IqError as e: - logging.error('Profile vCard: Error XmppIQ') - logging.error(str(e)) + logger.error('Profile vCard: Error XmppIQ') + logger.error(str(e)) try: await set_avatar(self) except IqTimeout as e: - logging.error('Profile Photo: Error Timeout') - logging.error(str(e)) + logger.error('Profile Photo: Error Timeout') + logger.error(str(e)) except IqError as e: - logging.error('Profile Photo: Error XmppIQ') - logging.error(str(e)) + logger.error('Profile Photo: Error XmppIQ') + logger.error(str(e)) async def set_avatar(self): @@ -74,7 +76,14 @@ async def set_avatar(self): with open(image_file, 'rb') as avatar_file: avatar = avatar_file.read() # await self.plugin['xep_0084'].publish_avatar(avatar) - await self.plugin['xep_0153'].set_avatar(avatar=avatar) + try: + await self.plugin['xep_0153'].set_avatar(avatar=avatar) + except IqTimeout as e: + logger.error('Profile Photo: Error Timeout 222') + logger.error(str(e)) + except IqError as e: + logger.error('Profile Photo: Error XmppIQ 222') + logger.error(str(e)) def set_identity(self, category): diff --git a/slixfeed/xmpp/publish.py b/slixfeed/xmpp/publish.py index d889357..2588bd7 100644 --- a/slixfeed/xmpp/publish.py +++ b/slixfeed/xmpp/publish.py @@ -7,8 +7,22 @@ Functions create_node and create_entry are derived from project atomtopubsub. """ +import asyncio +import hashlib import slixmpp.plugins.xep_0060.stanza.pubsub as pubsub from slixmpp.xmlstream import ET +import slixfeed.config as config +from slixfeed.config import Config +from slixfeed.log import Logger +import slixfeed.sqlite as sqlite +from slixfeed.syndication import Feed +import slixfeed.url as uri +from slixfeed.utilities import Utilities +from slixfeed.xmpp.iq import XmppIQ +import sys + +logger = Logger(__name__) + class XmppPubsub: @@ -110,13 +124,9 @@ class XmppPubsub: form.addField('pubsub#deliver_payloads', ftype='boolean', value=0) - - # TODO - form.addField('pubsub#type', ftype='text-single', value='http://www.w3.org/2005/Atom') - return iq @@ -243,3 +253,137 @@ class XmppPubsub: iq['pubsub']['publish'].append(item) return iq + + +class XmppPubsubAction: + + + async def send_selected_entry(self, jid_bare, node_id, entry_id): + function_name = sys._getframe().f_code.co_name + logger.debug('{}: jid_bare: {}'.format(function_name, jid_bare)) + db_file = config.get_pathname_to_database(jid_bare) + report = {} + if jid_bare == self.boundjid.bare: + node_id = 'urn:xmpp:microblog:0' + node_subtitle = None + node_title = None + else: + feed_id = sqlite.get_feed_id_by_entry_index(db_file, entry_id) + feed_id = feed_id[0] + node_id, node_title, node_subtitle = sqlite.get_feed_properties(db_file, feed_id) + print('THIS IS A TEST') + print(node_id) + print(node_title) + print(node_subtitle) + print('THIS IS A TEST') + xep = None + iq_create_node = XmppPubsub.create_node( + self, jid_bare, node_id, xep, node_title, node_subtitle) + await XmppIQ.send(self, iq_create_node) + entry = sqlite.get_entry_properties(db_file, entry_id) + print('xmpp_pubsub_send_selected_entry',jid_bare) + print(node_id) + entry_dict = Feed.pack_entry_into_dict(db_file, entry) + node_item = Feed.create_rfc4287_entry(entry_dict) + entry_url = entry_dict['link'] + item_id = Utilities.hash_url_to_md5(entry_url) + iq_create_entry = XmppPubsub.create_entry( + self, jid_bare, node_id, item_id, node_item) + await XmppIQ.send(self, iq_create_entry) + await sqlite.mark_as_read(db_file, entry_id) + report = entry_url + return report + + + async def send_unread_items(self, jid_bare): + """ + + Parameters + ---------- + jid_bare : TYPE + Bare Jabber ID. + + Returns + ------- + report : dict + URL and Number of processed entries. + + """ + function_name = sys._getframe().f_code.co_name + logger.debug('{}: jid_bare: {}'.format(function_name, jid_bare)) + db_file = config.get_pathname_to_database(jid_bare) + report = {} + subscriptions = sqlite.get_active_feeds_url(db_file) + for url in subscriptions: + url = url[0] + # feed_id = sqlite.get_feed_id(db_file, url) + # feed_id = feed_id[0] + # feed_properties = sqlite.get_feed_properties(db_file, feed_id) + feed_id = sqlite.get_feed_id(db_file, url) + feed_id = feed_id[0] + + # Publish to node 'urn:xmpp:microblog:0' for own JID + # Publish to node based on feed identifier for PubSub service. + + if jid_bare == self.boundjid.bare: + node_id = 'urn:xmpp:microblog:0' + node_subtitle = None + node_title = None + else: + # node_id = feed_properties[2] + # node_title = feed_properties[3] + # node_subtitle = feed_properties[5] + node_id = sqlite.get_feed_identifier(db_file, feed_id) + node_id = node_id[0] + if not node_id: + counter = 0 + hostname = uri.get_hostname(url) + hostname = hostname.replace('.','-') + identifier = hostname + ':' + str(counter) + while True: + if sqlite.check_identifier_exist(db_file, identifier): + counter += 1 + identifier = hostname + ':' + str(counter) + else: + break + await sqlite.update_feed_identifier(db_file, feed_id, identifier) + node_id = sqlite.get_feed_identifier(db_file, feed_id) + node_id = node_id[0] + node_title = sqlite.get_feed_title(db_file, feed_id) + node_title = node_title[0] + node_subtitle = sqlite.get_feed_subtitle(db_file, feed_id) + node_subtitle = node_subtitle[0] + xep = None + node_exist = await XmppPubsub.get_node_configuration(self, jid_bare, node_id) + if not node_exist: + iq_create_node = XmppPubsub.create_node( + self, jid_bare, node_id, xep, node_title, node_subtitle) + await XmppIQ.send(self, iq_create_node) + entries = sqlite.get_unread_entries_of_feed(db_file, feed_id) + report[url] = len(entries) + for entry in entries: + feed_entry = Feed.pack_entry_into_dict(db_file, entry) + node_entry = Feed.create_rfc4287_entry(feed_entry) + entry_url = feed_entry['link'] + item_id = Utilities.hash_url_to_md5(entry_url) + print('PubSub node item was sent to', jid_bare, node_id) + print(entry_url) + print(item_id) + iq_create_entry = XmppPubsub.create_entry( + self, jid_bare, node_id, item_id, node_entry) + await XmppIQ.send(self, iq_create_entry) + ix = entry[0] + await sqlite.mark_as_read(db_file, ix) + return report + + +class XmppPubsubTask: + + + async def task_publish(self, jid_bare): + db_file = config.get_pathname_to_database(jid_bare) + if jid_bare not in self.settings: + Config.add_settings_jid(self.settings, jid_bare, db_file) + while True: + await XmppPubsubAction.send_unread_items(self, jid_bare) + await asyncio.sleep(60 * 180) diff --git a/slixfeed/xmpp/status.py b/slixfeed/xmpp/status.py new file mode 100644 index 0000000..d4d0561 --- /dev/null +++ b/slixfeed/xmpp/status.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import asyncio +from slixfeed.config import Config +import slixfeed.config as config +import slixfeed.sqlite as sqlite +from slixfeed.log import Logger +from slixfeed.xmpp.presence import XmppPresence +import sys + +logger = Logger(__name__) + + +class XmppStatus: + + + def send_status_message(self, jid_bare): + """ + Send status message. + + Parameters + ---------- + jid : str + Jabber ID. + """ + function_name = sys._getframe().f_code.co_name + logger.debug('{}: jid: {}'.format(function_name, jid_bare)) + status_text = '📜️ Slixfeed RSS News Bot' + db_file = config.get_pathname_to_database(jid_bare) + enabled = Config.get_setting_value(self.settings, jid_bare, 'enabled') + if enabled: + jid_task = self.pending_tasks[jid_bare] if jid_bare in self.pending_tasks else None + if jid_task and len(jid_task): + # print('status dnd for ' + jid_bare) + status_mode = 'dnd' + status_text = jid_task[list(jid_task.keys())[0]] + else: + # print('status enabled for ' + jid_bare) + feeds = sqlite.get_number_of_items(db_file, 'feeds_properties') + if not feeds: + # print('status no feeds for ' + jid_bare) + status_mode = 'available' + status_text = '📪️ Send a URL from a blog or a news site' + else: + unread = sqlite.get_number_of_entries_unread(db_file) + if unread: + # print('status unread for ' + jid_bare) + status_mode = 'chat' + status_text = '📬️ There are {} news items'.format(str(unread)) + else: + # print('status no news for ' + jid_bare) + status_mode = 'available' + status_text = '📭️ No news' + else: + # print('status disabled for ' + jid_bare) + status_mode = 'xa' + status_text = '📪️ Send "Start" to receive updates' + XmppPresence.send(self, jid_bare, status_text, status_type=status_mode) + + +class XmppStatusTask: + + + async def task_status(self, jid_bare): + while True: + XmppStatus.send_status_message(self, jid_bare) + await asyncio.sleep(60 * 90) + + + def restart_task(self, jid_bare): + if jid_bare == self.boundjid.bare: + return + if jid_bare not in self.task_manager: + self.task_manager[jid_bare] = {} + logger.info('Creating new task manager for JID {}'.format(jid_bare)) + logger.info('Stopping task "status" for JID {}'.format(jid_bare)) + try: + self.task_manager[jid_bare]['status'].cancel() + except: + logger.info('No task "status" for JID {} (XmppStatusTask.start_task)' + .format(jid_bare)) + logger.info('Starting tasks "status" for JID {}'.format(jid_bare)) + self.task_manager[jid_bare]['status'] = asyncio.create_task( + XmppStatusTask.task_status(self, jid_bare)) + + + def stop_task(self, jid_bare): + if (jid_bare in self.task_manager and + 'status' in self.task_manager[jid_bare]): + self.task_manager[jid_bare]['status'].cancel() + else: + logger.debug('No task "status" for JID {}' + .format(jid_bare)) \ No newline at end of file diff --git a/slixfeed/xmpp/upload.py b/slixfeed/xmpp/upload.py index eefa31c..c548d6c 100644 --- a/slixfeed/xmpp/upload.py +++ b/slixfeed/xmpp/upload.py @@ -6,15 +6,17 @@ Based on http_upload.py example from project slixmpp https://codeberg.org/poezio/slixmpp/src/branch/master/examples/http_upload.py """ -import logging +from slixfeed.log import Logger from slixmpp.exceptions import IqTimeout, IqError from slixmpp.plugins.xep_0363.http_upload import HTTPError + +logger = Logger(__name__) # import sys class XmppUpload: async def start(self, jid, filename, domain=None): - logging.info('Uploading file %s...', filename) + logger.info('Uploading file %s...', filename) try: upload_file = self['xep_0363'].upload_file # if self.encrypted and not self['xep_0454']: @@ -31,19 +33,21 @@ class XmppUpload: url = await upload_file( filename, domain, timeout=10, ) - logging.info('Upload successful!') - logging.info('Sending file to %s', jid) + logger.info('Upload successful!') + logger.info('Sending file to %s', jid) except HTTPError: - url = ( - "Error: It appears that this server doesn't support " - "HTTP File Upload." - ) - logging.error( - "It appears that this server doesn't support HTTP File Upload." - ) + url = ('Error: It appears that this server does not support ' + 'HTTP File Upload.') + logger.error('It appears that this server does not support ' + 'HTTP File Upload.') # raise HTTPError( # "This server doesn't appear to support HTTP File Upload" # ) - except IqTimeout: - raise TimeoutError('Could not send message in time') + except IqError as e: + logger.error('Could not send message') + logger.error(e) + except IqTimeout as e: + # raise TimeoutError('Could not send message in time') + logger.error('Could not send message in time') + logger.error(e) return url diff --git a/slixfeed/xmpp/utilities.py b/slixfeed/xmpp/utilities.py new file mode 100644 index 0000000..c1dc3e1 --- /dev/null +++ b/slixfeed/xmpp/utilities.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from slixfeed.log import Logger +from slixmpp.exceptions import IqError, IqTimeout + +logger = Logger(__name__) + +# class XmppChat +# class XmppUtility: + + +class XmppUtilities: + + + async def get_chat_type(self, jid): + """ + Check chat (i.e. JID) type. + + If iqresult["disco_info"]["features"] contains XML namespace + of 'http://jabber.org/protocol/muc', then it is a 'groupchat'. + + Unless it has forward slash, which would indicate that it is + a chat which is conducted through a groupchat. + + Otherwise, determine type 'chat'. + + Parameters + ---------- + jid : str + Jabber ID. + + Returns + ------- + result : str + 'chat' or 'groupchat' or 'error'. + """ + try: + iqresult = await self["xep_0030"].get_info(jid=jid) + features = iqresult["disco_info"]["features"] + # identity = iqresult['disco_info']['identities'] + # if 'account' in indentity: + # if 'conference' in indentity: + if ('http://jabber.org/protocol/muc' in features) and not ('/' in jid): + result = "groupchat" + # TODO elif + # NOTE Is it needed? We do not interact with gateways or services + else: + result = "chat" + logger.info('Jabber ID: {}\n' + 'Chat Type: {}'.format(jid, result)) + except (IqError, IqTimeout) as e: + logger.warning('Chat type could not be determined for {}'.format(jid)) + logger.error(e) + result = 'error' + # except BaseException as e: + # logger.error('BaseException', str(e)) + # finally: + # logger.info('Chat type is:', chat_type) + return result diff --git a/slixfeed/xmpp/utility.py b/slixfeed/xmpp/utility.py deleted file mode 100644 index 3d20434..0000000 --- a/slixfeed/xmpp/utility.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -from slixmpp.exceptions import IqError, IqTimeout -import logging - -# class XmppChat -# class XmppUtility: - - -# TODO Rename to get_jid_type -async def get_chat_type(self, jid): - """ - Check chat (i.e. JID) type. - - If iqresult["disco_info"]["features"] contains XML namespace - of 'http://jabber.org/protocol/muc', then it is a 'groupchat'. - - Unless it has forward slash, which would indicate that it is - a chat which is conducted through a groupchat. - - Otherwise, determine type 'chat'. - - Parameters - ---------- - jid : str - Jabber ID. - - Returns - ------- - result : str - 'chat' or 'groupchat' or 'error'. - """ - try: - iqresult = await self["xep_0030"].get_info(jid=jid) - features = iqresult["disco_info"]["features"] - # identity = iqresult['disco_info']['identities'] - # if 'account' in indentity: - # if 'conference' in indentity: - if ('http://jabber.org/protocol/muc' in features) and not ('/' in jid): - result = "groupchat" - # TODO elif - # NOTE Is it needed? We do not interact with gateways or services - else: - result = "chat" - logging.info('Jabber ID: {}\n' - 'Chat Type: {}'.format(jid, result)) - except (IqError, IqTimeout) as e: - logging.warning('Chat type could not be determined for {}'.format(jid)) - logging.error(e) - result = 'error' - # except BaseException as e: - # logging.error('BaseException', str(e)) - # finally: - # logging.info('Chat type is:', chat_type) - return result