Add functionality to display images from first link to provide an image (experimental)
This commit is contained in:
parent
8b313acd91
commit
46a0819229
4 changed files with 84 additions and 43 deletions
|
@ -16,6 +16,7 @@ import html2text
|
||||||
from http.client import IncompleteRead
|
from http.client import IncompleteRead
|
||||||
from feedparser import parse
|
from feedparser import parse
|
||||||
import logging
|
import logging
|
||||||
|
from lxml import html
|
||||||
import pdfkit
|
import pdfkit
|
||||||
from readability import Document
|
from readability import Document
|
||||||
import slixfeed.config as config
|
import slixfeed.config as config
|
||||||
|
@ -28,7 +29,7 @@ from slixfeed.datetime import (
|
||||||
import slixfeed.fetch as fetch
|
import slixfeed.fetch as fetch
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
from slixfeed.url import (
|
from slixfeed.url import (
|
||||||
# complete_url,
|
complete_url,
|
||||||
join_url,
|
join_url,
|
||||||
remove_tracking_parameters,
|
remove_tracking_parameters,
|
||||||
replace_hostname,
|
replace_hostname,
|
||||||
|
@ -663,12 +664,14 @@ async def scan(db_file, url):
|
||||||
await sqlite.set_date(db_file, url)
|
await sqlite.set_date(db_file, url)
|
||||||
|
|
||||||
|
|
||||||
async def get_content(db_file, ix):
|
async def get_content(url):
|
||||||
url = sqlite.get_entry_url(db_file, ix)
|
|
||||||
result = await fetch.download_feed(url)
|
result = await fetch.download_feed(url)
|
||||||
if result[0]:
|
if result[0]:
|
||||||
document = Document(result[0])
|
document = Document(result[0])
|
||||||
return document.summary()
|
content = document.summary()
|
||||||
|
else:
|
||||||
|
content = None
|
||||||
|
return content
|
||||||
# TODO Either adapt it to filename
|
# TODO Either adapt it to filename
|
||||||
# or change it to something else
|
# or change it to something else
|
||||||
#filename = document.title()
|
#filename = document.title()
|
||||||
|
@ -677,6 +680,17 @@ async def get_content(db_file, ix):
|
||||||
# file.write(html_doc)
|
# file.write(html_doc)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_first_image(url, content):
|
||||||
|
tree = html.fromstring(content)
|
||||||
|
images = tree.xpath('//img/@src')
|
||||||
|
if len(images):
|
||||||
|
image = images[0]
|
||||||
|
image = str(image)
|
||||||
|
image_url = complete_url(url, image)
|
||||||
|
else:
|
||||||
|
image_url = None
|
||||||
|
return image_url
|
||||||
|
|
||||||
def generate_html(text, filename):
|
def generate_html(text, filename):
|
||||||
with open(filename, 'w') as file:
|
with open(filename, 'w') as file:
|
||||||
file.write(text)
|
file.write(text)
|
||||||
|
|
|
@ -227,6 +227,7 @@ async def send_update(self, jid, num=None):
|
||||||
num = int(num)
|
num = int(num)
|
||||||
news_digest = []
|
news_digest = []
|
||||||
results = await get_unread_entries(db_file, num)
|
results = await get_unread_entries(db_file, num)
|
||||||
|
image_url = None
|
||||||
for result in results:
|
for result in results:
|
||||||
ix = result[0]
|
ix = result[0]
|
||||||
title_e = result[1]
|
title_e = result[1]
|
||||||
|
@ -240,18 +241,29 @@ async def send_update(self, jid, num=None):
|
||||||
# print(result[0])
|
# print(result[0])
|
||||||
# breakpoint()
|
# breakpoint()
|
||||||
await mark_as_read(db_file, result[0])
|
await mark_as_read(db_file, result[0])
|
||||||
|
if not image_url:
|
||||||
|
content = await action.get_content(url)
|
||||||
|
image_url = action.extract_first_image(url, content)
|
||||||
new = " ".join(news_digest)
|
new = " ".join(news_digest)
|
||||||
# breakpoint()
|
# breakpoint()
|
||||||
if new:
|
if new:
|
||||||
# print("if new")
|
|
||||||
# breakpoint()
|
|
||||||
# TODO Add while loop to assure delivery.
|
# TODO Add while loop to assure delivery.
|
||||||
# print(await current_time(), ">>> ACT send_message",jid)
|
# print(await current_time(), ">>> ACT send_message",jid)
|
||||||
chat_type = await utility.jid_type(self, jid)
|
chat_type = await utility.jid_type(self, jid)
|
||||||
# NOTE Do we need "if statement"? See NOTE at is_muc.
|
# NOTE Do we need "if statement"? See NOTE at is_muc.
|
||||||
if chat_type in ("chat", "groupchat"):
|
if chat_type in ("chat", "groupchat"):
|
||||||
|
# TODO Provide a choice (with or without images)
|
||||||
xmpp.Slixfeed.send_message(
|
xmpp.Slixfeed.send_message(
|
||||||
self, mto=jid, mbody=new, mtype=chat_type)
|
self, mto=jid, mbody=new, mtype=chat_type)
|
||||||
|
if image_url:
|
||||||
|
# message = xmpp.Slixfeed.make_message(
|
||||||
|
# self, mto=jid, mbody=new, mtype=chat_type)
|
||||||
|
message = xmpp.Slixfeed.make_message(
|
||||||
|
self, mto=jid, mbody=image_url, mtype=chat_type)
|
||||||
|
message['oob']['url'] = image_url
|
||||||
|
print(image_url)
|
||||||
|
message.send()
|
||||||
|
|
||||||
# TODO Do not refresh task before
|
# TODO Do not refresh task before
|
||||||
# verifying that it was completed.
|
# verifying that it was completed.
|
||||||
await refresh_task(
|
await refresh_task(
|
||||||
|
|
|
@ -394,8 +394,8 @@ async def message(self, message):
|
||||||
response = "Missing keywords."
|
response = "Missing keywords."
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
case _ if message_lowercase.startswith("export "):
|
case _ if message_lowercase.startswith("export "):
|
||||||
key = message_text[7:]
|
ex = message_text[7:]
|
||||||
if key in ("opml", "html", "md", "xbel"):
|
if ex in ("opml", "html", "md", "xbel"):
|
||||||
status_type = "dnd"
|
status_type = "dnd"
|
||||||
status_message = (
|
status_message = (
|
||||||
"📤️ Procesing request to export feeds into {} ..."
|
"📤️ Procesing request to export feeds into {} ..."
|
||||||
|
@ -423,9 +423,9 @@ async def message(self, message):
|
||||||
case "xbel":
|
case "xbel":
|
||||||
response = "Not yet implemented."
|
response = "Not yet implemented."
|
||||||
url = await upload.start(self, jid, filename)
|
url = await upload.start(self, jid, filename)
|
||||||
response = (
|
# response = (
|
||||||
"Feeds exported successfully to {}.\n{}"
|
# "Feeds exported successfully to {}.\n{}"
|
||||||
).format(key, url)
|
# ).format(key, url)
|
||||||
# send_oob_reply_message(message, url, response)
|
# send_oob_reply_message(message, url, response)
|
||||||
await send_oob_message(
|
await send_oob_message(
|
||||||
self, jid, url)
|
self, jid, url)
|
||||||
|
@ -441,39 +441,54 @@ async def message(self, message):
|
||||||
# TODO xHTML, HTMLZ, Markdown, MHTML, PDF, TXT
|
# TODO xHTML, HTMLZ, Markdown, MHTML, PDF, TXT
|
||||||
case _ if (message_lowercase.startswith("get ")):
|
case _ if (message_lowercase.startswith("get ")):
|
||||||
message_text = message_text[4:]
|
message_text = message_text[4:]
|
||||||
ix = message_text.split(" ")[0]
|
ix_url = message_text.split(" ")[0]
|
||||||
ex = " ".join(message_text.split(" ")[1:])
|
ext = " ".join(message_text.split(" ")[1:])
|
||||||
ex = ex if ex else 'pdf'
|
ext = ext if ext else 'pdf'
|
||||||
|
if ext in ("html", "md", "pdf"):
|
||||||
|
status_type = "dnd"
|
||||||
|
status_message = (
|
||||||
|
"📃️ Procesing request to produce {} document ..."
|
||||||
|
).format(ext)
|
||||||
|
send_status_message(
|
||||||
|
self, jid, status_type, status_message)
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
data_dir = get_default_data_directory()
|
data_dir = get_default_data_directory()
|
||||||
if ix:
|
if ix_url:
|
||||||
if not os.path.isdir(data_dir):
|
if not os.path.isdir(data_dir):
|
||||||
os.mkdir(data_dir)
|
os.mkdir(data_dir)
|
||||||
if not os.path.isdir(data_dir + '/readability'):
|
if not os.path.isdir(data_dir + '/readability'):
|
||||||
os.mkdir(data_dir + '/readability')
|
os.mkdir(data_dir + '/readability')
|
||||||
filename = os.path.join(
|
filename = os.path.join(
|
||||||
data_dir, "readability", "saved_article_" + timestamp() + "." + ex)
|
data_dir, "readability", "saved_article_" + timestamp() + "." + ext)
|
||||||
try:
|
try:
|
||||||
text = await action.get_content(db_file, ix)
|
ix = int(ix_url)
|
||||||
|
try:
|
||||||
|
url = sqlite.get_entry_url(db_file, ix)
|
||||||
except:
|
except:
|
||||||
response = "No entry Id with {}".format(ix)
|
response = "No entry Id with {}".format(ix)
|
||||||
if text:
|
except:
|
||||||
match ex:
|
url = ix_url
|
||||||
|
content = await action.get_content(url)
|
||||||
|
if content:
|
||||||
|
match ext:
|
||||||
case "html":
|
case "html":
|
||||||
action.generate_html(text, filename)
|
action.generate_html(content, filename)
|
||||||
case "md":
|
case "md":
|
||||||
action.generate_markdown(text, filename)
|
action.generate_markdown(content, filename)
|
||||||
case "pdf":
|
case "pdf":
|
||||||
action.generate_pdf(text, filename)
|
action.generate_pdf(content, filename)
|
||||||
url = await upload.start(
|
url = await upload.start(
|
||||||
self, jid, filename)
|
self, jid, filename)
|
||||||
print(url)
|
|
||||||
await send_oob_message(
|
await send_oob_message(
|
||||||
self, jid, url)
|
self, jid, url)
|
||||||
|
await task.start_tasks_xmpp(
|
||||||
|
self, jid, ["status"])
|
||||||
else:
|
else:
|
||||||
response = "Failed to fetch resource."
|
response = "Failed to fetch resource."
|
||||||
else:
|
else:
|
||||||
response = "Missing entry Id."
|
response = "Missing entry Id."
|
||||||
|
else:
|
||||||
|
response = "Unsupported filetype."
|
||||||
if response:
|
if response:
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
# case _ if (message_lowercase.startswith("http")) and(
|
# case _ if (message_lowercase.startswith("http")) and(
|
||||||
|
|
|
@ -30,6 +30,6 @@ async def start(self, jid, filename, domain=None):
|
||||||
)
|
)
|
||||||
except IqTimeout:
|
except IqTimeout:
|
||||||
raise TimeoutError('Could not send message in time')
|
raise TimeoutError('Could not send message in time')
|
||||||
logging.info('Upload success!')
|
logging.info('Upload successful!')
|
||||||
logging.info('Sending file to %s', jid)
|
logging.info('Sending file to %s', jid)
|
||||||
return url
|
return url
|
||||||
|
|
Loading…
Reference in a new issue