forked from sch/Slixfeed
Add support for ePUB and Text.
Import OPML only from a moderator. Fix error for sqlite.py module.
This commit is contained in:
parent
13d87e2be7
commit
c9c552e33f
3 changed files with 67 additions and 11 deletions
|
@ -31,6 +31,7 @@ from http.client import IncompleteRead
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
import os
|
||||||
import slixfeed.config as config
|
import slixfeed.config as config
|
||||||
import slixfeed.crawl as crawl
|
import slixfeed.crawl as crawl
|
||||||
from slixfeed.dt import (
|
from slixfeed.dt import (
|
||||||
|
@ -50,6 +51,7 @@ from slixfeed.url import (
|
||||||
import slixfeed.xmpp.bookmark as bookmark
|
import slixfeed.xmpp.bookmark as bookmark
|
||||||
from urllib import error
|
from urllib import error
|
||||||
from urllib.parse import parse_qs, urlsplit
|
from urllib.parse import parse_qs, urlsplit
|
||||||
|
import xml2epub
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -1015,6 +1017,8 @@ def generate_document(data, url, ext, filename):
|
||||||
logging.warning(
|
logging.warning(
|
||||||
"Check that package readability is installed.")
|
"Check that package readability is installed.")
|
||||||
match ext:
|
match ext:
|
||||||
|
case "epub":
|
||||||
|
generate_epub(content, filename)
|
||||||
case "html":
|
case "html":
|
||||||
generate_html(content, filename)
|
generate_html(content, filename)
|
||||||
case "md":
|
case "md":
|
||||||
|
@ -1022,7 +1026,8 @@ def generate_document(data, url, ext, filename):
|
||||||
generate_markdown(content, filename)
|
generate_markdown(content, filename)
|
||||||
except:
|
except:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
"Check that package html2text is installed.")
|
"Check that package html2text is installed, "
|
||||||
|
"or try again.")
|
||||||
error = (
|
error = (
|
||||||
"Package html2text was not found.")
|
"Package html2text was not found.")
|
||||||
case "pdf":
|
case "pdf":
|
||||||
|
@ -1031,9 +1036,13 @@ def generate_document(data, url, ext, filename):
|
||||||
except:
|
except:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
"Check that packages pdfkit and wkhtmltopdf "
|
"Check that packages pdfkit and wkhtmltopdf "
|
||||||
"are installed.")
|
"are installed, or try again.")
|
||||||
error = (
|
error = (
|
||||||
"Package pdfkit or wkhtmltopdf was not found.")
|
"Package pdfkit or wkhtmltopdf was not found.")
|
||||||
|
case "text":
|
||||||
|
generate_txt(content, filename)
|
||||||
|
case "txt":
|
||||||
|
generate_txt(content, filename)
|
||||||
if error:
|
if error:
|
||||||
return error
|
return error
|
||||||
|
|
||||||
|
@ -1095,15 +1104,34 @@ async def extract_image_from_html(url):
|
||||||
return image_url
|
return image_url
|
||||||
|
|
||||||
|
|
||||||
|
def generate_epub(text, pathname):
|
||||||
|
## create an empty eBook
|
||||||
|
pathname_list = pathname.split("/")
|
||||||
|
filename = pathname_list.pop()
|
||||||
|
directory = "/".join(pathname_list)
|
||||||
|
book = xml2epub.Epub(filename)
|
||||||
|
## create chapters by url
|
||||||
|
# chapter0 = xml2epub.create_chapter_from_string(text, title=filename, strict=False)
|
||||||
|
chapter0 = xml2epub.create_chapter_from_string(text, strict=False)
|
||||||
|
#### create chapter objects
|
||||||
|
# chapter1 = xml2epub.create_chapter_from_url("https://dev.to/devteam/top-7-featured-dev-posts-from-the-past-week-h6h")
|
||||||
|
# chapter2 = xml2epub.create_chapter_from_url("https://dev.to/ks1912/getting-started-with-docker-34g6")
|
||||||
|
## add chapters to your eBook
|
||||||
|
book.add_chapter(chapter0)
|
||||||
|
# book.add_chapter(chapter1)
|
||||||
|
# book.add_chapter(chapter2)
|
||||||
|
## generate epub file
|
||||||
|
filename_tmp = "slixfeedepub"
|
||||||
|
book.create_epub(directory, epub_name=filename_tmp)
|
||||||
|
pathname_tmp = os.path.join(directory, filename_tmp) + ".epub"
|
||||||
|
os.rename(pathname_tmp, pathname)
|
||||||
|
|
||||||
|
|
||||||
def generate_html(text, filename):
|
def generate_html(text, filename):
|
||||||
with open(filename, 'w') as file:
|
with open(filename, 'w') as file:
|
||||||
file.write(text)
|
file.write(text)
|
||||||
|
|
||||||
|
|
||||||
def generate_pdf(text, filename):
|
|
||||||
pdfkit.from_string(text, filename)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_markdown(text, filename):
|
def generate_markdown(text, filename):
|
||||||
h2m = html2text.HTML2Text()
|
h2m = html2text.HTML2Text()
|
||||||
# Convert HTML to Markdown
|
# Convert HTML to Markdown
|
||||||
|
@ -1112,6 +1140,20 @@ def generate_markdown(text, filename):
|
||||||
file.write(markdown)
|
file.write(markdown)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_pdf(text, filename):
|
||||||
|
pdfkit.from_string(text, filename)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_txt(text, filename):
|
||||||
|
text = remove_html_tags(text)
|
||||||
|
with open(filename, 'w') as file:
|
||||||
|
file.write(text)
|
||||||
|
|
||||||
|
def remove_html_tags(data):
|
||||||
|
data = BeautifulSoup(data, "lxml").text
|
||||||
|
data = data.replace("\n\n", "\n")
|
||||||
|
return data
|
||||||
|
|
||||||
# TODO Add support for eDonkey, Gnutella, Soulseek
|
# TODO Add support for eDonkey, Gnutella, Soulseek
|
||||||
async def get_magnet(link):
|
async def get_magnet(link):
|
||||||
parted_link = urlsplit(link)
|
parted_link = urlsplit(link)
|
||||||
|
|
|
@ -256,7 +256,7 @@ async def import_feeds(db_file, feeds):
|
||||||
try:
|
try:
|
||||||
cur.execute(sql, par)
|
cur.execute(sql, par)
|
||||||
except IntegrityError as e:
|
except IntegrityError as e:
|
||||||
logging.warning("Skipping: " + url)
|
logging.warning("Skipping: " + str(url))
|
||||||
logging.error(e)
|
logging.error(e)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -83,12 +83,26 @@ async def message(self, message):
|
||||||
jid = message["from"].bare
|
jid = message["from"].bare
|
||||||
message_text = " ".join(message["body"].split())
|
message_text = " ".join(message["body"].split())
|
||||||
|
|
||||||
if (message["type"] == "groupchat" and
|
# if (message["type"] == "groupchat" and
|
||||||
message['muc']['nick'] == self.nick):
|
# message['muc']['nick'] == self.nick):
|
||||||
|
# return
|
||||||
|
|
||||||
|
# FIXME Code repetition. See below.
|
||||||
|
if message["type"] == "groupchat":
|
||||||
|
if (message['muc']['nick'] == self.nick):
|
||||||
|
return
|
||||||
|
jid_full = str(message["from"])
|
||||||
|
role = self.plugin['xep_0045'].get_jid_property(
|
||||||
|
jid,
|
||||||
|
jid_full[jid_full.index("/")+1:],
|
||||||
|
"role")
|
||||||
|
if role != "moderator":
|
||||||
return
|
return
|
||||||
|
|
||||||
# NOTE This is an exceptional case in which we treat
|
# NOTE This is an exceptional case in which we treat
|
||||||
# type groupchat the same as type chat.
|
# type groupchat the same as type chat in a way that
|
||||||
|
# doesn't require an exclamation mark for actionable
|
||||||
|
# command.
|
||||||
if (message_text.lower().startswith("http") and
|
if (message_text.lower().startswith("http") and
|
||||||
message_text.lower().endswith(".opml")):
|
message_text.lower().endswith(".opml")):
|
||||||
url = message_text
|
url = message_text
|
||||||
|
@ -471,7 +485,7 @@ async def message(self, message):
|
||||||
ext = ext if ext else 'pdf'
|
ext = ext if ext else 'pdf'
|
||||||
url = None
|
url = None
|
||||||
error = None
|
error = None
|
||||||
if ext in ("html", "md", "pdf"):
|
if ext in ("epub", "html", "md", "pdf", "txt"):
|
||||||
status_type = "dnd"
|
status_type = "dnd"
|
||||||
status_message = (
|
status_message = (
|
||||||
"📃️ Procesing request to produce {} document..."
|
"📃️ Procesing request to produce {} document..."
|
||||||
|
|
Loading…
Reference in a new issue