Add functionality to display images from first link to provide an image (experimental)

This commit is contained in:
Schimon Jehudah 2024-01-09 22:36:16 +00:00
parent 8b313acd91
commit 46a0819229
4 changed files with 84 additions and 43 deletions

View file

@ -16,6 +16,7 @@ import html2text
from http.client import IncompleteRead from http.client import IncompleteRead
from feedparser import parse from feedparser import parse
import logging import logging
from lxml import html
import pdfkit import pdfkit
from readability import Document from readability import Document
import slixfeed.config as config import slixfeed.config as config
@ -28,7 +29,7 @@ from slixfeed.datetime import (
import slixfeed.fetch as fetch import slixfeed.fetch as fetch
import slixfeed.sqlite as sqlite import slixfeed.sqlite as sqlite
from slixfeed.url import ( from slixfeed.url import (
# complete_url, complete_url,
join_url, join_url,
remove_tracking_parameters, remove_tracking_parameters,
replace_hostname, replace_hostname,
@ -663,12 +664,14 @@ async def scan(db_file, url):
await sqlite.set_date(db_file, url) await sqlite.set_date(db_file, url)
async def get_content(db_file, ix): async def get_content(url):
url = sqlite.get_entry_url(db_file, ix)
result = await fetch.download_feed(url) result = await fetch.download_feed(url)
if result[0]: if result[0]:
document = Document(result[0]) document = Document(result[0])
return document.summary() content = document.summary()
else:
content = None
return content
# TODO Either adapt it to filename # TODO Either adapt it to filename
# or change it to something else # or change it to something else
#filename = document.title() #filename = document.title()
@ -677,6 +680,17 @@ async def get_content(db_file, ix):
# file.write(html_doc) # file.write(html_doc)
def extract_first_image(url, content):
tree = html.fromstring(content)
images = tree.xpath('//img/@src')
if len(images):
image = images[0]
image = str(image)
image_url = complete_url(url, image)
else:
image_url = None
return image_url
def generate_html(text, filename): def generate_html(text, filename):
with open(filename, 'w') as file: with open(filename, 'w') as file:
file.write(text) file.write(text)

View file

@ -227,6 +227,7 @@ async def send_update(self, jid, num=None):
num = int(num) num = int(num)
news_digest = [] news_digest = []
results = await get_unread_entries(db_file, num) results = await get_unread_entries(db_file, num)
image_url = None
for result in results: for result in results:
ix = result[0] ix = result[0]
title_e = result[1] title_e = result[1]
@ -240,18 +241,29 @@ async def send_update(self, jid, num=None):
# print(result[0]) # print(result[0])
# breakpoint() # breakpoint()
await mark_as_read(db_file, result[0]) await mark_as_read(db_file, result[0])
if not image_url:
content = await action.get_content(url)
image_url = action.extract_first_image(url, content)
new = " ".join(news_digest) new = " ".join(news_digest)
# breakpoint() # breakpoint()
if new: if new:
# print("if new")
# breakpoint()
# TODO Add while loop to assure delivery. # TODO Add while loop to assure delivery.
# print(await current_time(), ">>> ACT send_message",jid) # print(await current_time(), ">>> ACT send_message",jid)
chat_type = await utility.jid_type(self, jid) chat_type = await utility.jid_type(self, jid)
# NOTE Do we need "if statement"? See NOTE at is_muc. # NOTE Do we need "if statement"? See NOTE at is_muc.
if chat_type in ("chat", "groupchat"): if chat_type in ("chat", "groupchat"):
# TODO Provide a choice (with or without images)
xmpp.Slixfeed.send_message( xmpp.Slixfeed.send_message(
self, mto=jid, mbody=new, mtype=chat_type) self, mto=jid, mbody=new, mtype=chat_type)
if image_url:
# message = xmpp.Slixfeed.make_message(
# self, mto=jid, mbody=new, mtype=chat_type)
message = xmpp.Slixfeed.make_message(
self, mto=jid, mbody=image_url, mtype=chat_type)
message['oob']['url'] = image_url
print(image_url)
message.send()
# TODO Do not refresh task before # TODO Do not refresh task before
# verifying that it was completed. # verifying that it was completed.
await refresh_task( await refresh_task(

View file

@ -394,8 +394,8 @@ async def message(self, message):
response = "Missing keywords." response = "Missing keywords."
send_reply_message(self, message, response) send_reply_message(self, message, response)
case _ if message_lowercase.startswith("export "): case _ if message_lowercase.startswith("export "):
key = message_text[7:] ex = message_text[7:]
if key in ("opml", "html", "md", "xbel"): if ex in ("opml", "html", "md", "xbel"):
status_type = "dnd" status_type = "dnd"
status_message = ( status_message = (
"📤️ Procesing request to export feeds into {} ..." "📤️ Procesing request to export feeds into {} ..."
@ -423,9 +423,9 @@ async def message(self, message):
case "xbel": case "xbel":
response = "Not yet implemented." response = "Not yet implemented."
url = await upload.start(self, jid, filename) url = await upload.start(self, jid, filename)
response = ( # response = (
"Feeds exported successfully to {}.\n{}" # "Feeds exported successfully to {}.\n{}"
).format(key, url) # ).format(key, url)
# send_oob_reply_message(message, url, response) # send_oob_reply_message(message, url, response)
await send_oob_message( await send_oob_message(
self, jid, url) self, jid, url)
@ -441,39 +441,54 @@ async def message(self, message):
# TODO xHTML, HTMLZ, Markdown, MHTML, PDF, TXT # TODO xHTML, HTMLZ, Markdown, MHTML, PDF, TXT
case _ if (message_lowercase.startswith("get ")): case _ if (message_lowercase.startswith("get ")):
message_text = message_text[4:] message_text = message_text[4:]
ix = message_text.split(" ")[0] ix_url = message_text.split(" ")[0]
ex = " ".join(message_text.split(" ")[1:]) ext = " ".join(message_text.split(" ")[1:])
ex = ex if ex else 'pdf' ext = ext if ext else 'pdf'
if ext in ("html", "md", "pdf"):
status_type = "dnd"
status_message = (
"📃️ Procesing request to produce {} document ..."
).format(ext)
send_status_message(
self, jid, status_type, status_message)
db_file = get_pathname_to_database(jid) db_file = get_pathname_to_database(jid)
data_dir = get_default_data_directory() data_dir = get_default_data_directory()
if ix: if ix_url:
if not os.path.isdir(data_dir): if not os.path.isdir(data_dir):
os.mkdir(data_dir) os.mkdir(data_dir)
if not os.path.isdir(data_dir + '/readability'): if not os.path.isdir(data_dir + '/readability'):
os.mkdir(data_dir + '/readability') os.mkdir(data_dir + '/readability')
filename = os.path.join( filename = os.path.join(
data_dir, "readability", "saved_article_" + timestamp() + "." + ex) data_dir, "readability", "saved_article_" + timestamp() + "." + ext)
try: try:
text = await action.get_content(db_file, ix) ix = int(ix_url)
try:
url = sqlite.get_entry_url(db_file, ix)
except: except:
response = "No entry Id with {}".format(ix) response = "No entry Id with {}".format(ix)
if text: except:
match ex: url = ix_url
content = await action.get_content(url)
if content:
match ext:
case "html": case "html":
action.generate_html(text, filename) action.generate_html(content, filename)
case "md": case "md":
action.generate_markdown(text, filename) action.generate_markdown(content, filename)
case "pdf": case "pdf":
action.generate_pdf(text, filename) action.generate_pdf(content, filename)
url = await upload.start( url = await upload.start(
self, jid, filename) self, jid, filename)
print(url)
await send_oob_message( await send_oob_message(
self, jid, url) self, jid, url)
await task.start_tasks_xmpp(
self, jid, ["status"])
else: else:
response = "Failed to fetch resource." response = "Failed to fetch resource."
else: else:
response = "Missing entry Id." response = "Missing entry Id."
else:
response = "Unsupported filetype."
if response: if response:
send_reply_message(self, message, response) send_reply_message(self, message, response)
# case _ if (message_lowercase.startswith("http")) and( # case _ if (message_lowercase.startswith("http")) and(

View file

@ -30,6 +30,6 @@ async def start(self, jid, filename, domain=None):
) )
except IqTimeout: except IqTimeout:
raise TimeoutError('Could not send message in time') raise TimeoutError('Could not send message in time')
logging.info('Upload success!') logging.info('Upload successful!')
logging.info('Sending file to %s', jid) logging.info('Sending file to %s', jid)
return url return url