Add support for ePUB and Text.
Import OPML only from a moderator. Fix error for sqlite.py module.
This commit is contained in:
parent
13d87e2be7
commit
c9c552e33f
3 changed files with 67 additions and 11 deletions
|
@ -31,6 +31,7 @@ from http.client import IncompleteRead
|
|||
import json
|
||||
import logging
|
||||
from lxml import html
|
||||
import os
|
||||
import slixfeed.config as config
|
||||
import slixfeed.crawl as crawl
|
||||
from slixfeed.dt import (
|
||||
|
@ -50,6 +51,7 @@ from slixfeed.url import (
|
|||
import slixfeed.xmpp.bookmark as bookmark
|
||||
from urllib import error
|
||||
from urllib.parse import parse_qs, urlsplit
|
||||
import xml2epub
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
try:
|
||||
|
@ -1015,6 +1017,8 @@ def generate_document(data, url, ext, filename):
|
|||
logging.warning(
|
||||
"Check that package readability is installed.")
|
||||
match ext:
|
||||
case "epub":
|
||||
generate_epub(content, filename)
|
||||
case "html":
|
||||
generate_html(content, filename)
|
||||
case "md":
|
||||
|
@ -1022,7 +1026,8 @@ def generate_document(data, url, ext, filename):
|
|||
generate_markdown(content, filename)
|
||||
except:
|
||||
logging.warning(
|
||||
"Check that package html2text is installed.")
|
||||
"Check that package html2text is installed, "
|
||||
"or try again.")
|
||||
error = (
|
||||
"Package html2text was not found.")
|
||||
case "pdf":
|
||||
|
@ -1031,9 +1036,13 @@ def generate_document(data, url, ext, filename):
|
|||
except:
|
||||
logging.warning(
|
||||
"Check that packages pdfkit and wkhtmltopdf "
|
||||
"are installed.")
|
||||
"are installed, or try again.")
|
||||
error = (
|
||||
"Package pdfkit or wkhtmltopdf was not found.")
|
||||
case "text":
|
||||
generate_txt(content, filename)
|
||||
case "txt":
|
||||
generate_txt(content, filename)
|
||||
if error:
|
||||
return error
|
||||
|
||||
|
@ -1095,15 +1104,34 @@ async def extract_image_from_html(url):
|
|||
return image_url
|
||||
|
||||
|
||||
def generate_epub(text, pathname):
|
||||
## create an empty eBook
|
||||
pathname_list = pathname.split("/")
|
||||
filename = pathname_list.pop()
|
||||
directory = "/".join(pathname_list)
|
||||
book = xml2epub.Epub(filename)
|
||||
## create chapters by url
|
||||
# chapter0 = xml2epub.create_chapter_from_string(text, title=filename, strict=False)
|
||||
chapter0 = xml2epub.create_chapter_from_string(text, strict=False)
|
||||
#### create chapter objects
|
||||
# chapter1 = xml2epub.create_chapter_from_url("https://dev.to/devteam/top-7-featured-dev-posts-from-the-past-week-h6h")
|
||||
# chapter2 = xml2epub.create_chapter_from_url("https://dev.to/ks1912/getting-started-with-docker-34g6")
|
||||
## add chapters to your eBook
|
||||
book.add_chapter(chapter0)
|
||||
# book.add_chapter(chapter1)
|
||||
# book.add_chapter(chapter2)
|
||||
## generate epub file
|
||||
filename_tmp = "slixfeedepub"
|
||||
book.create_epub(directory, epub_name=filename_tmp)
|
||||
pathname_tmp = os.path.join(directory, filename_tmp) + ".epub"
|
||||
os.rename(pathname_tmp, pathname)
|
||||
|
||||
|
||||
def generate_html(text, filename):
|
||||
with open(filename, 'w') as file:
|
||||
file.write(text)
|
||||
|
||||
|
||||
def generate_pdf(text, filename):
|
||||
pdfkit.from_string(text, filename)
|
||||
|
||||
|
||||
def generate_markdown(text, filename):
|
||||
h2m = html2text.HTML2Text()
|
||||
# Convert HTML to Markdown
|
||||
|
@ -1112,6 +1140,20 @@ def generate_markdown(text, filename):
|
|||
file.write(markdown)
|
||||
|
||||
|
||||
def generate_pdf(text, filename):
|
||||
pdfkit.from_string(text, filename)
|
||||
|
||||
|
||||
def generate_txt(text, filename):
|
||||
text = remove_html_tags(text)
|
||||
with open(filename, 'w') as file:
|
||||
file.write(text)
|
||||
|
||||
def remove_html_tags(data):
|
||||
data = BeautifulSoup(data, "lxml").text
|
||||
data = data.replace("\n\n", "\n")
|
||||
return data
|
||||
|
||||
# TODO Add support for eDonkey, Gnutella, Soulseek
|
||||
async def get_magnet(link):
|
||||
parted_link = urlsplit(link)
|
||||
|
|
|
@ -256,7 +256,7 @@ async def import_feeds(db_file, feeds):
|
|||
try:
|
||||
cur.execute(sql, par)
|
||||
except IntegrityError as e:
|
||||
logging.warning("Skipping: " + url)
|
||||
logging.warning("Skipping: " + str(url))
|
||||
logging.error(e)
|
||||
|
||||
|
||||
|
|
|
@ -83,12 +83,26 @@ async def message(self, message):
|
|||
jid = message["from"].bare
|
||||
message_text = " ".join(message["body"].split())
|
||||
|
||||
if (message["type"] == "groupchat" and
|
||||
message['muc']['nick'] == self.nick):
|
||||
# if (message["type"] == "groupchat" and
|
||||
# message['muc']['nick'] == self.nick):
|
||||
# return
|
||||
|
||||
# FIXME Code repetition. See below.
|
||||
if message["type"] == "groupchat":
|
||||
if (message['muc']['nick'] == self.nick):
|
||||
return
|
||||
jid_full = str(message["from"])
|
||||
role = self.plugin['xep_0045'].get_jid_property(
|
||||
jid,
|
||||
jid_full[jid_full.index("/")+1:],
|
||||
"role")
|
||||
if role != "moderator":
|
||||
return
|
||||
|
||||
# NOTE This is an exceptional case in which we treat
|
||||
# type groupchat the same as type chat.
|
||||
# type groupchat the same as type chat in a way that
|
||||
# doesn't require an exclamation mark for actionable
|
||||
# command.
|
||||
if (message_text.lower().startswith("http") and
|
||||
message_text.lower().endswith(".opml")):
|
||||
url = message_text
|
||||
|
@ -471,7 +485,7 @@ async def message(self, message):
|
|||
ext = ext if ext else 'pdf'
|
||||
url = None
|
||||
error = None
|
||||
if ext in ("html", "md", "pdf"):
|
||||
if ext in ("epub", "html", "md", "pdf", "txt"):
|
||||
status_type = "dnd"
|
||||
status_message = (
|
||||
"📃️ Procesing request to produce {} document..."
|
||||
|
|
Loading…
Reference in a new issue