Add support for ePUB and Text.

Import OPML only from a moderator. Fix error for sqlite.py module.
2024-01-23 14:37:10 +00:00 · 2024-01-23 14:37:10 +00:00 · c9c552e33f
commit c9c552e33f
parent 13d87e2be7
3 changed files with 67 additions and 11 deletions
--- a/slixfeed/action.py
+++ b/slixfeed/action.py
@ -31,6 +31,7 @@ from http.client import IncompleteRead
 import json
 import logging
 from lxml import html
+import os
 import slixfeed.config as config
 import slixfeed.crawl as crawl
 from slixfeed.dt import (
@ -50,6 +51,7 @@ from slixfeed.url import (
 import slixfeed.xmpp.bookmark as bookmark
 from urllib import error
 from urllib.parse import parse_qs, urlsplit
+import xml2epub
 import xml.etree.ElementTree as ET

 try:
@ -1015,6 +1017,8 @@ def generate_document(data, url, ext, filename):
        logging.warning(
            "Check that package readability is installed.")
    match ext:
+        case "epub":
+            generate_epub(content, filename)
        case "html":
            generate_html(content, filename)
        case "md":
@ -1022,7 +1026,8 @@ def generate_document(data, url, ext, filename):
                generate_markdown(content, filename)
            except:
                logging.warning(
-                    "Check that package html2text is installed.")
+                    "Check that package html2text is installed, "
+                    "or try again.")
                error = (
                    "Package html2text was not found.")
        case "pdf":
@ -1031,9 +1036,13 @@ def generate_document(data, url, ext, filename):
            except:
                logging.warning(
                    "Check that packages pdfkit and wkhtmltopdf "
-                    "are installed.")
+                    "are installed, or try again.")
                error = (
                    "Package pdfkit or wkhtmltopdf was not found.")
+        case "text":
+            generate_txt(content, filename)
+        case "txt":
+            generate_txt(content, filename)
    if error:
        return error

@ -1095,15 +1104,34 @@ async def extract_image_from_html(url):
            return image_url


+def generate_epub(text, pathname):
+    ## create an empty eBook
+    pathname_list = pathname.split("/")
+    filename = pathname_list.pop()
+    directory = "/".join(pathname_list)
+    book = xml2epub.Epub(filename)
+    ## create chapters by url
+    # chapter0 = xml2epub.create_chapter_from_string(text, title=filename, strict=False)
+    chapter0 = xml2epub.create_chapter_from_string(text, strict=False)
+    #### create chapter objects
+    # chapter1 = xml2epub.create_chapter_from_url("https://dev.to/devteam/top-7-featured-dev-posts-from-the-past-week-h6h")
+    # chapter2 = xml2epub.create_chapter_from_url("https://dev.to/ks1912/getting-started-with-docker-34g6")
+    ## add chapters to your eBook
+    book.add_chapter(chapter0)
+    # book.add_chapter(chapter1)
+    # book.add_chapter(chapter2)
+    ## generate epub file
+    filename_tmp = "slixfeedepub"
+    book.create_epub(directory, epub_name=filename_tmp)
+    pathname_tmp = os.path.join(directory, filename_tmp) + ".epub"
+    os.rename(pathname_tmp, pathname)
+
+
 def generate_html(text, filename):
    with open(filename, 'w') as file:
        file.write(text)


-def generate_pdf(text, filename):
-    pdfkit.from_string(text, filename)
-
-
 def generate_markdown(text, filename):
    h2m = html2text.HTML2Text()
    # Convert HTML to Markdown
@ -1112,6 +1140,20 @@ def generate_markdown(text, filename):
        file.write(markdown)


+def generate_pdf(text, filename):
+    pdfkit.from_string(text, filename)
+
+
+def generate_txt(text, filename):
+    text = remove_html_tags(text)
+    with open(filename, 'w') as file:
+        file.write(text)
+
+def remove_html_tags(data):
+    data = BeautifulSoup(data, "lxml").text
+    data = data.replace("\n\n", "\n")
+    return data
+
 # TODO Add support for eDonkey, Gnutella, Soulseek
 async def get_magnet(link):
    parted_link = urlsplit(link)
--- a/slixfeed/sqlite.py
+++ b/slixfeed/sqlite.py
@ -256,7 +256,7 @@ async def import_feeds(db_file, feeds):
                try:
                    cur.execute(sql, par)
                except IntegrityError as e:
-                    logging.warning("Skipping: " + url)
+                    logging.warning("Skipping: " + str(url))
                    logging.error(e)


--- a/slixfeed/xmpp/process.py
+++ b/slixfeed/xmpp/process.py
@ -83,12 +83,26 @@ async def message(self, message):
        jid = message["from"].bare
        message_text = " ".join(message["body"].split())

-        if (message["type"] == "groupchat" and
-            message['muc']['nick'] == self.nick):
+        # if (message["type"] == "groupchat" and
+        #     message['muc']['nick'] == self.nick):
+        #         return
+
+        # FIXME Code repetition. See below.
+        if message["type"] == "groupchat":
+            if (message['muc']['nick'] == self.nick):
+                return
+            jid_full = str(message["from"])
+            role = self.plugin['xep_0045'].get_jid_property(
+                jid,
+                jid_full[jid_full.index("/")+1:],
+                "role")
+            if role != "moderator":
                return

        # NOTE This is an exceptional case in which we treat
-        # type groupchat the same as type chat.
+        # type groupchat the same as type chat in a way that
+        # doesn't require an exclamation mark for actionable
+        # command.
        if (message_text.lower().startswith("http") and
            message_text.lower().endswith(".opml")):
            url = message_text
@ -471,7 +485,7 @@ async def message(self, message):
                ext = ext if ext else 'pdf'
                url = None
                error = None
-                if ext in ("html", "md", "pdf"):
+                if ext in ("epub", "html", "md", "pdf", "txt"):
                    status_type = "dnd"
                    status_message = (
                        "📃️ Procesing request to produce {} document..."