From c9c552e33ffae800a29d2dc6d28389766be78d35 Mon Sep 17 00:00:00 2001 From: Schimon Jehudah Date: Tue, 23 Jan 2024 14:37:10 +0000 Subject: [PATCH] Add support for ePUB and Text. Import OPML only from a moderator. Fix error for sqlite.py module. --- slixfeed/action.py | 54 +++++++++++++++++++++++++++++++++++----- slixfeed/sqlite.py | 2 +- slixfeed/xmpp/process.py | 22 +++++++++++++--- 3 files changed, 67 insertions(+), 11 deletions(-) diff --git a/slixfeed/action.py b/slixfeed/action.py index 5236dc0..3311afd 100644 --- a/slixfeed/action.py +++ b/slixfeed/action.py @@ -31,6 +31,7 @@ from http.client import IncompleteRead import json import logging from lxml import html +import os import slixfeed.config as config import slixfeed.crawl as crawl from slixfeed.dt import ( @@ -50,6 +51,7 @@ from slixfeed.url import ( import slixfeed.xmpp.bookmark as bookmark from urllib import error from urllib.parse import parse_qs, urlsplit +import xml2epub import xml.etree.ElementTree as ET try: @@ -1015,6 +1017,8 @@ def generate_document(data, url, ext, filename): logging.warning( "Check that package readability is installed.") match ext: + case "epub": + generate_epub(content, filename) case "html": generate_html(content, filename) case "md": @@ -1022,7 +1026,8 @@ def generate_document(data, url, ext, filename): generate_markdown(content, filename) except: logging.warning( - "Check that package html2text is installed.") + "Check that package html2text is installed, " + "or try again.") error = ( "Package html2text was not found.") case "pdf": @@ -1031,9 +1036,13 @@ def generate_document(data, url, ext, filename): except: logging.warning( "Check that packages pdfkit and wkhtmltopdf " - "are installed.") + "are installed, or try again.") error = ( "Package pdfkit or wkhtmltopdf was not found.") + case "text": + generate_txt(content, filename) + case "txt": + generate_txt(content, filename) if error: return error @@ -1095,15 +1104,34 @@ async def extract_image_from_html(url): return image_url +def generate_epub(text, pathname): + ## create an empty eBook + pathname_list = pathname.split("/") + filename = pathname_list.pop() + directory = "/".join(pathname_list) + book = xml2epub.Epub(filename) + ## create chapters by url + # chapter0 = xml2epub.create_chapter_from_string(text, title=filename, strict=False) + chapter0 = xml2epub.create_chapter_from_string(text, strict=False) + #### create chapter objects + # chapter1 = xml2epub.create_chapter_from_url("https://dev.to/devteam/top-7-featured-dev-posts-from-the-past-week-h6h") + # chapter2 = xml2epub.create_chapter_from_url("https://dev.to/ks1912/getting-started-with-docker-34g6") + ## add chapters to your eBook + book.add_chapter(chapter0) + # book.add_chapter(chapter1) + # book.add_chapter(chapter2) + ## generate epub file + filename_tmp = "slixfeedepub" + book.create_epub(directory, epub_name=filename_tmp) + pathname_tmp = os.path.join(directory, filename_tmp) + ".epub" + os.rename(pathname_tmp, pathname) + + def generate_html(text, filename): with open(filename, 'w') as file: file.write(text) -def generate_pdf(text, filename): - pdfkit.from_string(text, filename) - - def generate_markdown(text, filename): h2m = html2text.HTML2Text() # Convert HTML to Markdown @@ -1112,6 +1140,20 @@ def generate_markdown(text, filename): file.write(markdown) +def generate_pdf(text, filename): + pdfkit.from_string(text, filename) + + +def generate_txt(text, filename): + text = remove_html_tags(text) + with open(filename, 'w') as file: + file.write(text) + +def remove_html_tags(data): + data = BeautifulSoup(data, "lxml").text + data = data.replace("\n\n", "\n") + return data + # TODO Add support for eDonkey, Gnutella, Soulseek async def get_magnet(link): parted_link = urlsplit(link) diff --git a/slixfeed/sqlite.py b/slixfeed/sqlite.py index be5368b..877620a 100644 --- a/slixfeed/sqlite.py +++ b/slixfeed/sqlite.py @@ -256,7 +256,7 @@ async def import_feeds(db_file, feeds): try: cur.execute(sql, par) except IntegrityError as e: - logging.warning("Skipping: " + url) + logging.warning("Skipping: " + str(url)) logging.error(e) diff --git a/slixfeed/xmpp/process.py b/slixfeed/xmpp/process.py index 5825587..149ea5e 100644 --- a/slixfeed/xmpp/process.py +++ b/slixfeed/xmpp/process.py @@ -83,12 +83,26 @@ async def message(self, message): jid = message["from"].bare message_text = " ".join(message["body"].split()) - if (message["type"] == "groupchat" and - message['muc']['nick'] == self.nick): + # if (message["type"] == "groupchat" and + # message['muc']['nick'] == self.nick): + # return + + # FIXME Code repetition. See below. + if message["type"] == "groupchat": + if (message['muc']['nick'] == self.nick): + return + jid_full = str(message["from"]) + role = self.plugin['xep_0045'].get_jid_property( + jid, + jid_full[jid_full.index("/")+1:], + "role") + if role != "moderator": return # NOTE This is an exceptional case in which we treat - # type groupchat the same as type chat. + # type groupchat the same as type chat in a way that + # doesn't require an exclamation mark for actionable + # command. if (message_text.lower().startswith("http") and message_text.lower().endswith(".opml")): url = message_text @@ -471,7 +485,7 @@ async def message(self, message): ext = ext if ext else 'pdf' url = None error = None - if ext in ("html", "md", "pdf"): + if ext in ("epub", "html", "md", "pdf", "txt"): status_type = "dnd" status_message = ( "📃️ Procesing request to produce {} document..."