From 46a08192291c48b0ae5f1bbfb2e477394fb42192 Mon Sep 17 00:00:00 2001 From: Schimon Jehudah Date: Tue, 9 Jan 2024 22:36:16 +0000 Subject: [PATCH] Add functionality to display images from first link to provide an image (experimental) --- slixfeed/action.py | 22 ++++++++-- slixfeed/task.py | 16 +++++++- slixfeed/xmpp/process.py | 87 +++++++++++++++++++++++----------------- slixfeed/xmpp/upload.py | 2 +- 4 files changed, 84 insertions(+), 43 deletions(-) diff --git a/slixfeed/action.py b/slixfeed/action.py index 046461b..f15ff68 100644 --- a/slixfeed/action.py +++ b/slixfeed/action.py @@ -16,6 +16,7 @@ import html2text from http.client import IncompleteRead from feedparser import parse import logging +from lxml import html import pdfkit from readability import Document import slixfeed.config as config @@ -28,7 +29,7 @@ from slixfeed.datetime import ( import slixfeed.fetch as fetch import slixfeed.sqlite as sqlite from slixfeed.url import ( - # complete_url, + complete_url, join_url, remove_tracking_parameters, replace_hostname, @@ -663,12 +664,14 @@ async def scan(db_file, url): await sqlite.set_date(db_file, url) -async def get_content(db_file, ix): - url = sqlite.get_entry_url(db_file, ix) +async def get_content(url): result = await fetch.download_feed(url) if result[0]: document = Document(result[0]) - return document.summary() + content = document.summary() + else: + content = None + return content # TODO Either adapt it to filename # or change it to something else #filename = document.title() @@ -677,6 +680,17 @@ async def get_content(db_file, ix): # file.write(html_doc) +def extract_first_image(url, content): + tree = html.fromstring(content) + images = tree.xpath('//img/@src') + if len(images): + image = images[0] + image = str(image) + image_url = complete_url(url, image) + else: + image_url = None + return image_url + def generate_html(text, filename): with open(filename, 'w') as file: file.write(text) diff --git a/slixfeed/task.py b/slixfeed/task.py index 14a8f0b..dabfd42 100644 --- a/slixfeed/task.py +++ b/slixfeed/task.py @@ -227,6 +227,7 @@ async def send_update(self, jid, num=None): num = int(num) news_digest = [] results = await get_unread_entries(db_file, num) + image_url = None for result in results: ix = result[0] title_e = result[1] @@ -240,18 +241,29 @@ async def send_update(self, jid, num=None): # print(result[0]) # breakpoint() await mark_as_read(db_file, result[0]) + if not image_url: + content = await action.get_content(url) + image_url = action.extract_first_image(url, content) new = " ".join(news_digest) # breakpoint() if new: - # print("if new") - # breakpoint() # TODO Add while loop to assure delivery. # print(await current_time(), ">>> ACT send_message",jid) chat_type = await utility.jid_type(self, jid) # NOTE Do we need "if statement"? See NOTE at is_muc. if chat_type in ("chat", "groupchat"): + # TODO Provide a choice (with or without images) xmpp.Slixfeed.send_message( self, mto=jid, mbody=new, mtype=chat_type) + if image_url: + # message = xmpp.Slixfeed.make_message( + # self, mto=jid, mbody=new, mtype=chat_type) + message = xmpp.Slixfeed.make_message( + self, mto=jid, mbody=image_url, mtype=chat_type) + message['oob']['url'] = image_url + print(image_url) + message.send() + # TODO Do not refresh task before # verifying that it was completed. await refresh_task( diff --git a/slixfeed/xmpp/process.py b/slixfeed/xmpp/process.py index 954f953..e42af54 100644 --- a/slixfeed/xmpp/process.py +++ b/slixfeed/xmpp/process.py @@ -394,8 +394,8 @@ async def message(self, message): response = "Missing keywords." send_reply_message(self, message, response) case _ if message_lowercase.startswith("export "): - key = message_text[7:] - if key in ("opml", "html", "md", "xbel"): + ex = message_text[7:] + if ex in ("opml", "html", "md", "xbel"): status_type = "dnd" status_message = ( "📤️ Procesing request to export feeds into {} ..." @@ -423,9 +423,9 @@ async def message(self, message): case "xbel": response = "Not yet implemented." url = await upload.start(self, jid, filename) - response = ( - "Feeds exported successfully to {}.\n{}" - ).format(key, url) + # response = ( + # "Feeds exported successfully to {}.\n{}" + # ).format(key, url) # send_oob_reply_message(message, url, response) await send_oob_message( self, jid, url) @@ -441,39 +441,54 @@ async def message(self, message): # TODO xHTML, HTMLZ, Markdown, MHTML, PDF, TXT case _ if (message_lowercase.startswith("get ")): message_text = message_text[4:] - ix = message_text.split(" ")[0] - ex = " ".join(message_text.split(" ")[1:]) - ex = ex if ex else 'pdf' - db_file = get_pathname_to_database(jid) - data_dir = get_default_data_directory() - if ix: - if not os.path.isdir(data_dir): - os.mkdir(data_dir) - if not os.path.isdir(data_dir + '/readability'): - os.mkdir(data_dir + '/readability') - filename = os.path.join( - data_dir, "readability", "saved_article_" + timestamp() + "." + ex) - try: - text = await action.get_content(db_file, ix) - except: - response = "No entry Id with {}".format(ix) - if text: - match ex: - case "html": - action.generate_html(text, filename) - case "md": - action.generate_markdown(text, filename) - case "pdf": - action.generate_pdf(text, filename) - url = await upload.start( - self, jid, filename) - print(url) - await send_oob_message( - self, jid, url) + ix_url = message_text.split(" ")[0] + ext = " ".join(message_text.split(" ")[1:]) + ext = ext if ext else 'pdf' + if ext in ("html", "md", "pdf"): + status_type = "dnd" + status_message = ( + "📃️ Procesing request to produce {} document ..." + ).format(ext) + send_status_message( + self, jid, status_type, status_message) + db_file = get_pathname_to_database(jid) + data_dir = get_default_data_directory() + if ix_url: + if not os.path.isdir(data_dir): + os.mkdir(data_dir) + if not os.path.isdir(data_dir + '/readability'): + os.mkdir(data_dir + '/readability') + filename = os.path.join( + data_dir, "readability", "saved_article_" + timestamp() + "." + ext) + try: + ix = int(ix_url) + try: + url = sqlite.get_entry_url(db_file, ix) + except: + response = "No entry Id with {}".format(ix) + except: + url = ix_url + content = await action.get_content(url) + if content: + match ext: + case "html": + action.generate_html(content, filename) + case "md": + action.generate_markdown(content, filename) + case "pdf": + action.generate_pdf(content, filename) + url = await upload.start( + self, jid, filename) + await send_oob_message( + self, jid, url) + await task.start_tasks_xmpp( + self, jid, ["status"]) + else: + response = "Failed to fetch resource." else: - response = "Failed to fetch resource." + response = "Missing entry Id." else: - response = "Missing entry Id." + response = "Unsupported filetype." if response: send_reply_message(self, message, response) # case _ if (message_lowercase.startswith("http")) and( diff --git a/slixfeed/xmpp/upload.py b/slixfeed/xmpp/upload.py index ce6eeb7..6a32f76 100644 --- a/slixfeed/xmpp/upload.py +++ b/slixfeed/xmpp/upload.py @@ -30,6 +30,6 @@ async def start(self, jid, filename, domain=None): ) except IqTimeout: raise TimeoutError('Could not send message in time') - logging.info('Upload success!') + logging.info('Upload successful!') logging.info('Sending file to %s', jid) return url