Add support for ePUB and Text.

Import OPML only from a moderator.
Fix error for sqlite.py module.
This commit is contained in:
Schimon Jehudah 2024-01-23 14:37:10 +00:00
parent 13d87e2be7
commit c9c552e33f
3 changed files with 67 additions and 11 deletions

View file

@ -31,6 +31,7 @@ from http.client import IncompleteRead
import json import json
import logging import logging
from lxml import html from lxml import html
import os
import slixfeed.config as config import slixfeed.config as config
import slixfeed.crawl as crawl import slixfeed.crawl as crawl
from slixfeed.dt import ( from slixfeed.dt import (
@ -50,6 +51,7 @@ from slixfeed.url import (
import slixfeed.xmpp.bookmark as bookmark import slixfeed.xmpp.bookmark as bookmark
from urllib import error from urllib import error
from urllib.parse import parse_qs, urlsplit from urllib.parse import parse_qs, urlsplit
import xml2epub
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
try: try:
@ -1015,6 +1017,8 @@ def generate_document(data, url, ext, filename):
logging.warning( logging.warning(
"Check that package readability is installed.") "Check that package readability is installed.")
match ext: match ext:
case "epub":
generate_epub(content, filename)
case "html": case "html":
generate_html(content, filename) generate_html(content, filename)
case "md": case "md":
@ -1022,7 +1026,8 @@ def generate_document(data, url, ext, filename):
generate_markdown(content, filename) generate_markdown(content, filename)
except: except:
logging.warning( logging.warning(
"Check that package html2text is installed.") "Check that package html2text is installed, "
"or try again.")
error = ( error = (
"Package html2text was not found.") "Package html2text was not found.")
case "pdf": case "pdf":
@ -1031,9 +1036,13 @@ def generate_document(data, url, ext, filename):
except: except:
logging.warning( logging.warning(
"Check that packages pdfkit and wkhtmltopdf " "Check that packages pdfkit and wkhtmltopdf "
"are installed.") "are installed, or try again.")
error = ( error = (
"Package pdfkit or wkhtmltopdf was not found.") "Package pdfkit or wkhtmltopdf was not found.")
case "text":
generate_txt(content, filename)
case "txt":
generate_txt(content, filename)
if error: if error:
return error return error
@ -1095,15 +1104,34 @@ async def extract_image_from_html(url):
return image_url return image_url
def generate_epub(text, pathname):
## create an empty eBook
pathname_list = pathname.split("/")
filename = pathname_list.pop()
directory = "/".join(pathname_list)
book = xml2epub.Epub(filename)
## create chapters by url
# chapter0 = xml2epub.create_chapter_from_string(text, title=filename, strict=False)
chapter0 = xml2epub.create_chapter_from_string(text, strict=False)
#### create chapter objects
# chapter1 = xml2epub.create_chapter_from_url("https://dev.to/devteam/top-7-featured-dev-posts-from-the-past-week-h6h")
# chapter2 = xml2epub.create_chapter_from_url("https://dev.to/ks1912/getting-started-with-docker-34g6")
## add chapters to your eBook
book.add_chapter(chapter0)
# book.add_chapter(chapter1)
# book.add_chapter(chapter2)
## generate epub file
filename_tmp = "slixfeedepub"
book.create_epub(directory, epub_name=filename_tmp)
pathname_tmp = os.path.join(directory, filename_tmp) + ".epub"
os.rename(pathname_tmp, pathname)
def generate_html(text, filename): def generate_html(text, filename):
with open(filename, 'w') as file: with open(filename, 'w') as file:
file.write(text) file.write(text)
def generate_pdf(text, filename):
pdfkit.from_string(text, filename)
def generate_markdown(text, filename): def generate_markdown(text, filename):
h2m = html2text.HTML2Text() h2m = html2text.HTML2Text()
# Convert HTML to Markdown # Convert HTML to Markdown
@ -1112,6 +1140,20 @@ def generate_markdown(text, filename):
file.write(markdown) file.write(markdown)
def generate_pdf(text, filename):
pdfkit.from_string(text, filename)
def generate_txt(text, filename):
text = remove_html_tags(text)
with open(filename, 'w') as file:
file.write(text)
def remove_html_tags(data):
data = BeautifulSoup(data, "lxml").text
data = data.replace("\n\n", "\n")
return data
# TODO Add support for eDonkey, Gnutella, Soulseek # TODO Add support for eDonkey, Gnutella, Soulseek
async def get_magnet(link): async def get_magnet(link):
parted_link = urlsplit(link) parted_link = urlsplit(link)

View file

@ -256,7 +256,7 @@ async def import_feeds(db_file, feeds):
try: try:
cur.execute(sql, par) cur.execute(sql, par)
except IntegrityError as e: except IntegrityError as e:
logging.warning("Skipping: " + url) logging.warning("Skipping: " + str(url))
logging.error(e) logging.error(e)

View file

@ -83,12 +83,26 @@ async def message(self, message):
jid = message["from"].bare jid = message["from"].bare
message_text = " ".join(message["body"].split()) message_text = " ".join(message["body"].split())
if (message["type"] == "groupchat" and # if (message["type"] == "groupchat" and
message['muc']['nick'] == self.nick): # message['muc']['nick'] == self.nick):
# return
# FIXME Code repetition. See below.
if message["type"] == "groupchat":
if (message['muc']['nick'] == self.nick):
return
jid_full = str(message["from"])
role = self.plugin['xep_0045'].get_jid_property(
jid,
jid_full[jid_full.index("/")+1:],
"role")
if role != "moderator":
return return
# NOTE This is an exceptional case in which we treat # NOTE This is an exceptional case in which we treat
# type groupchat the same as type chat. # type groupchat the same as type chat in a way that
# doesn't require an exclamation mark for actionable
# command.
if (message_text.lower().startswith("http") and if (message_text.lower().startswith("http") and
message_text.lower().endswith(".opml")): message_text.lower().endswith(".opml")):
url = message_text url = message_text
@ -471,7 +485,7 @@ async def message(self, message):
ext = ext if ext else 'pdf' ext = ext if ext else 'pdf'
url = None url = None
error = None error = None
if ext in ("html", "md", "pdf"): if ext in ("epub", "html", "md", "pdf", "txt"):
status_type = "dnd" status_type = "dnd"
status_message = ( status_message = (
"📃️ Procesing request to produce {} document..." "📃️ Procesing request to produce {} document..."