Add functionality to download article via ad-hoc.

Add helper for URL.
This commit is contained in:
Schimon Jehudah 2024-02-23 02:55:31 +00:00
parent 6e2a7305c2
commit 9bb37bc932
6 changed files with 315 additions and 195 deletions

View file

@ -1224,87 +1224,6 @@ async def scan(db_file, url):
new_entries) new_entries)
async def download_document(self, message, jid, jid_file, message_text, ix_url,
readability):
ext = ' '.join(message_text.split(' ')[1:])
ext = ext if ext else 'pdf'
url = None
error = None
response = None
if ext in ('epub', 'html', 'markdown', 'md', 'pdf', 'text', 'txt'):
match ext:
case 'markdown':
ext = 'md'
case 'text':
ext = 'txt'
status_type = 'dnd'
status_message = ('📃️ Procesing request to produce {} document...'
.format(ext.upper()))
XmppPresence.send(self, jid, status_message, status_type=status_type)
db_file = config.get_pathname_to_database(jid_file)
cache_dir = config.get_default_cache_directory()
if ix_url:
if not os.path.isdir(cache_dir):
os.mkdir(cache_dir)
if not os.path.isdir(cache_dir + '/readability'):
os.mkdir(cache_dir + '/readability')
try:
ix = int(ix_url)
try:
url = sqlite.get_entry_url(db_file, ix)
url = url[0]
except:
response = 'No entry with index {}'.format(ix)
except:
url = ix_url
if url:
logging.info('Original URL: {}'
.format(url))
url = uri.remove_tracking_parameters(url)
logging.info('Processed URL (tracker removal): {}'
.format(url))
url = (uri.replace_hostname(url, 'link')) or url
logging.info('Processed URL (replace hostname): {}'
.format(url))
result = await fetch.http(url)
if not result['error']:
data = result['content']
code = result['status_code']
title = get_document_title(data)
title = title.strip().lower()
for i in (' ', '-'):
title = title.replace(i, '_')
for i in ('?', '"', '\'', '!'):
title = title.replace(i, '')
filename = os.path.join(
cache_dir, 'readability',
title + '_' + dt.timestamp() + '.' + ext)
error = generate_document(data, url, ext, filename,
readability)
if error:
response = ('> {}\n'
'Failed to export {}. Reason: {}'
.format(url, ext.upper(), error))
else:
url = await XmppUpload.start(self, jid, filename)
chat_type = await get_chat_type(self, jid)
XmppMessage.send_oob(self, jid, url, chat_type)
else:
response = ('> {}\n'
'Failed to fetch URL. Reason: {}'
.format(url, code))
await task.start_tasks_xmpp(self, jid, ['status'])
else:
response = 'Missing entry index number.'
else:
response = ('Unsupported filetype.\n'
'Try: epub, html, md (markdown), '
'pdf, or txt (text)')
if response:
logging.warning('Error for URL {}: {}'.format(url, error))
XmppMessage.send_reply(self, message, response)
def get_document_title(data): def get_document_title(data):
try: try:
document = Document(data) document = Document(data)

View file

@ -190,6 +190,17 @@ https://codeberg.org/poezio/slixmpp
software = [ software = [
""" """
Canto
Canto is an Atom/RSS feed reader for the console that is meant to \
be quick, concise, and colorful. Its meant to provide a minimal, yet \
information packed interface. No navigating menus. No dense blocks of \
unreadable white text. An interface with almost infinite customization \
and extensibility using the excellent Python programming language.
https://codezen.org/canto-ng/
CommaFeed CommaFeed
A self-hosted RSS reader, based on Dropwizard and React/TypeScript. A self-hosted RSS reader, based on Dropwizard and React/TypeScript.
@ -233,6 +244,24 @@ and more reliably from the sites you trust.
https://netnewswire.com/ https://netnewswire.com/
QuiteRSS
QuiteRSS is a free and convenient program for reading RSS/Atom news \
feeds.
http://quiterss.org/
Raven Reader
Raven is a open source desktop news reader with flexible settings to \
optimize your experience. No login is required, and no personal data \
is collected. Just select the websites you want to curate articles \
from and enjoy!
https://ravenreader.app/
Spot-On Spot-On
Spot-On is a software carnival which brings chat, email, news, \ Spot-On is a software carnival which brings chat, email, news, \

View file

@ -5,14 +5,10 @@
TODO TODO
0) Move functions send_status and send_update to module action
1) Deprecate "add" (see above) and make it interactive. 1) Deprecate "add" (see above) and make it interactive.
Slixfeed: Do you still want to add this URL to subscription list? Slixfeed: Do you still want to add this URL to subscription list?
See: case _ if message_lowercase.startswith("add"): See: case _ if message_lowercase.startswith("add"):
2) Use loop (with gather) instead of TaskGroup.
3) Assure message delivery before calling a new task. 3) Assure message delivery before calling a new task.
See https://slixmpp.readthedocs.io/en/latest/event_index.html#term-marker_acknowledged See https://slixmpp.readthedocs.io/en/latest/event_index.html#term-marker_acknowledged

View file

@ -1,2 +1,2 @@
__version__ = '0.1.14' __version__ = '0.1.15'
__version_info__ = (0, 1, 14) __version_info__ = (0, 1, 15)

View file

@ -49,8 +49,8 @@ from slixmpp.plugins.xep_0048.stanza import Bookmarks
import slixfeed.action as action import slixfeed.action as action
import slixfeed.config as config import slixfeed.config as config
import slixfeed.crawl as crawl import slixfeed.crawl as crawl
import slixfeed.dt as dt
import slixfeed.fetch as fetch import slixfeed.fetch as fetch
from slixfeed.dt import timestamp
import slixfeed.sqlite as sqlite import slixfeed.sqlite as sqlite
import slixfeed.url as uri import slixfeed.url as uri
from slixfeed.version import __version__ from slixfeed.version import __version__
@ -600,6 +600,7 @@ class Slixfeed(slixmpp.ClientXMPP):
ftype='text-single', ftype='text-single',
label='URL', label='URL',
desc='Enter subscription URL.', desc='Enter subscription URL.',
value='http://',
required=True) required=True)
# form.add_field(var='scan', # form.add_field(var='scan',
# ftype='boolean', # ftype='boolean',
@ -647,7 +648,7 @@ class Slixfeed(slixmpp.ClientXMPP):
db_file = config.get_pathname_to_database(jid_file) db_file = config.get_pathname_to_database(jid_file)
title = sqlite.get_entry_title(db_file, ix) title = sqlite.get_entry_title(db_file, ix)
title = title[0] if title else 'Untitled' title = title[0] if title else 'Untitled'
form = self['xep_0004'].make_form('result', 'Updates') form = self['xep_0004'].make_form('form', 'Article')
url = sqlite.get_entry_url(db_file, ix) url = sqlite.get_entry_url(db_file, ix)
url = url[0] url = url[0]
logging.info('Original URL: {}'.format(url)) logging.info('Original URL: {}'.format(url))
@ -665,6 +666,9 @@ class Slixfeed(slixmpp.ClientXMPP):
label=title, label=title,
value=summary) value=summary)
field_url = form.add_field(var='url', field_url = form.add_field(var='url',
ftype='hidden',
value=url)
field_url = form.add_field(var='url_link',
label='Link', label='Link',
ftype='text-single', ftype='text-single',
value=url) value=url)
@ -678,7 +682,19 @@ class Slixfeed(slixmpp.ClientXMPP):
ftype='text-single', ftype='text-single',
value=feed_url) value=feed_url)
field_feed['validate']['datatype'] = 'xs:anyURI' field_feed['validate']['datatype'] = 'xs:anyURI'
options = form.add_field(var='filetype',
ftype='list-single',
label='Save as',
desc=('Select file type.'),
value='pdf',
required=True)
options.addOption('ePUB', 'epub')
options.addOption('HTML', 'html')
options.addOption('Markdown', 'md')
options.addOption('PDF', 'pdf')
options.addOption('Plain Text', 'txt')
form['instructions'] = 'Proceed to download article.' form['instructions'] = 'Proceed to download article.'
session['allow_complete'] = False
session['allow_prev'] = True session['allow_prev'] = True
session['has_next'] = True session['has_next'] = True
session['next'] = self._handle_recent_action session['next'] = self._handle_recent_action
@ -688,9 +704,52 @@ class Slixfeed(slixmpp.ClientXMPP):
async def _handle_recent_action(self, payload, session): async def _handle_recent_action(self, payload, session):
# TODO await action.download_document ext = payload['values']['filetype']
text_note = 'This feature is not yet available.' url = payload['values']['url'][0]
session['notes'] = [['info', text_note]] jid = session['from'].bare
jid_file = jid
db_file = config.get_pathname_to_database(jid_file)
cache_dir = config.get_default_cache_directory()
if not os.path.isdir(cache_dir):
os.mkdir(cache_dir)
if not os.path.isdir(cache_dir + '/readability'):
os.mkdir(cache_dir + '/readability')
url = uri.remove_tracking_parameters(url)
url = (uri.replace_hostname(url, 'link')) or url
result = await fetch.http(url)
if not result['error']:
data = result['content']
code = result['status_code']
title = action.get_document_title(data)
title = title.strip().lower()
for i in (' ', '-'):
title = title.replace(i, '_')
for i in ('?', '"', '\'', '!'):
title = title.replace(i, '')
filename = os.path.join(
cache_dir, 'readability',
title + '_' + dt.timestamp() + '.' + ext)
error = action.generate_document(data, url, ext, filename,
readability=True)
if error:
text_error = ('Failed to export {} fot {}'
'\n\n'
'Reason: {}'.format(ext.upper(), url, error))
session['notes'] = [['error', text_error]]
else:
url = await XmppUpload.start(self, jid, filename)
form = self['xep_0004'].make_form('result', 'Download')
form['instructions'] = ('Download {} document.'
.format(ext.upper()))
field_url = form.add_field(var='url',
label='Link',
ftype='text-single',
value=url)
field_url['validate']['datatype'] = 'xs:anyURI'
session['payload'] = form
session['allow_complete'] = True
session['next'] = None
session['prev'] = None
return session return session
@ -700,6 +759,36 @@ class Slixfeed(slixmpp.ClientXMPP):
db_file = config.get_pathname_to_database(jid_file) db_file = config.get_pathname_to_database(jid_file)
# scan = payload['values']['scan'] # scan = payload['values']['scan']
url = payload['values']['subscription'] url = payload['values']['subscription']
if isinstance(url, list) and len(url) > 1:
urls = url
agree_count = 0
error_count = 0
exist_count = 0
for url in urls:
result = await action.add_feed(db_file, url)
if result['error']:
error_count += 1
elif result['exist']:
exist_count += 1
else:
agree_count += 1
form = self['xep_0004'].make_form('form', 'Subscription')
if agree_count:
response = ('Added {} new subscription(s) out of {}'
.format(agree_count, len(url)))
session['notes'] = [['info', response]]
else:
response = ('No new subscription was added. '
'Exist: {} Error: {}.'
.format(exist_count, error_count))
session['notes'] = [['error', response]]
session['allow_prev'] = True
session['next'] = None
session['payload'] = None
session['prev'] = self._handle_subscription_add
else:
if isinstance(url, list):
url = url[0]
result = await action.add_feed(db_file, url) result = await action.add_feed(db_file, url)
if isinstance(result, list): if isinstance(result, list):
results = result results = result
@ -707,9 +796,9 @@ class Slixfeed(slixmpp.ClientXMPP):
form['instructions'] = ('Discovered {} subscriptions for {}' form['instructions'] = ('Discovered {} subscriptions for {}'
.format(len(results), url)) .format(len(results), url))
options = form.add_field(var='subscription', options = form.add_field(var='subscription',
ftype='list-single', ftype='list-multi',
label='Subscribe', label='Subscribe',
desc=('Select a subscription to add.'), desc=('Select subscriptions to add.'),
required=True) required=True)
for result in results: for result in results:
options.addOption(result['name'], result['link']) options.addOption(result['name'], result['link'])
@ -736,7 +825,7 @@ class Slixfeed(slixmpp.ClientXMPP):
# session['notes'] = [['warn', response]] # Not supported by Gajim # session['notes'] = [['warn', response]] # Not supported by Gajim
# session['notes'] = [['info', response]] # session['notes'] = [['info', response]]
form = self['xep_0004'].make_form('form', 'Subscription') form = self['xep_0004'].make_form('form', 'Subscription')
form['instructions'] = ('Subscription is assigned at index {}.' form['instructions'] = ('Subscription is already assigned at index {}.'
'\n' '\n'
'{}' '{}'
.format(result['index'], result['name'])) .format(result['index'], result['name']))
@ -775,14 +864,14 @@ class Slixfeed(slixmpp.ClientXMPP):
form.add_field(var='subscription', form.add_field(var='subscription',
ftype='hidden', ftype='hidden',
value=result['link']) value=result['link'])
session['allow_complete'] = True session['allow_complete'] = False
session['allow_prev'] = False session['has_next'] = True
# session['allow_prev'] = False
# Gajim: Will offer next dialog but as a result, not as form. # Gajim: Will offer next dialog but as a result, not as form.
# session['has_next'] = False # session['has_next'] = False
session['has_next'] = True
session['next'] = self._handle_subscription_editor session['next'] = self._handle_subscription_editor
session['payload'] = form session['payload'] = form
session['prev'] = None # session['prev'] = None
return session return session
@ -985,8 +1074,7 @@ class Slixfeed(slixmpp.ClientXMPP):
ftype='list-single', ftype='list-single',
label='Action', label='Action',
desc='Select action type.', desc='Select action type.',
required=True, required=True)
value='edit')
options.addOption('Enable subscriptions', 'enable') options.addOption('Enable subscriptions', 'enable')
options.addOption('Disable subscriptions', 'disable') options.addOption('Disable subscriptions', 'disable')
options.addOption('Modify subscriptions', 'edit') options.addOption('Modify subscriptions', 'edit')
@ -1279,8 +1367,7 @@ class Slixfeed(slixmpp.ClientXMPP):
options = form.add_field(var='option', options = form.add_field(var='option',
ftype='list-single', ftype='list-single',
label='Choose', label='Choose',
required=True, required=True)
value='import')
# options.addOption('Activity', 'activity') # options.addOption('Activity', 'activity')
# options.addOption('Filters', 'filter') # options.addOption('Filters', 'filter')
# options.addOption('Statistics', 'statistics') # options.addOption('Statistics', 'statistics')
@ -1337,7 +1424,7 @@ class Slixfeed(slixmpp.ClientXMPP):
'Details:\n' 'Details:\n'
' Jabber ID: {}\n' ' Jabber ID: {}\n'
' Timestamp: {}\n' ' Timestamp: {}\n'
.format(jid, timestamp())) .format(jid, dt.timestamp()))
text_warn = 'This resource is restricted.' text_warn = 'This resource is restricted.'
session['notes'] = [['warn', text_warn]] session['notes'] = [['warn', text_warn]]
session['has_next'] = False session['has_next'] = False
@ -1403,8 +1490,7 @@ class Slixfeed(slixmpp.ClientXMPP):
options = form.add_field(var='option', options = form.add_field(var='option',
ftype='list-single', ftype='list-single',
label='About', label='About',
required=True, required=True)
value='')
options.addOption('Slixfeed', 'about') options.addOption('Slixfeed', 'about')
options.addOption('RSS Task Force', 'rtf') options.addOption('RSS Task Force', 'rtf')
# options.addOption('Manual', 'manual') # options.addOption('Manual', 'manual')
@ -1609,15 +1695,24 @@ class Slixfeed(slixmpp.ClientXMPP):
# TODO Attempt to look up for feeds of hostname of JID (i.e. scan # TODO Attempt to look up for feeds of hostname of JID (i.e. scan
# jabber.de for feeds for juliet@jabber.de) # jabber.de for feeds for juliet@jabber.de)
async def _handle_promoted(self, iq, session): async def _handle_promoted(self, iq, session):
url = action.pick_a_feed()
form = self['xep_0004'].make_form('form', 'Subscribe') form = self['xep_0004'].make_form('form', 'Subscribe')
# NOTE Refresh button would be of use # NOTE Refresh button would be of use
form['instructions'] = 'Featured subscriptions' form['instructions'] = 'Featured subscriptions'
url = action.pick_a_feed()
# options = form.add_field(var='choice',
# ftype="boolean",
# label='Subscribe to {}?'.format(url['name']),
# desc='Click to subscribe.')
# form.add_field(var='subscription',
# ftype='hidden',
# value=url['link'])
options = form.add_field(var='subscription', options = form.add_field(var='subscription',
ftype="list-single", ftype="list-single",
label='Subscribe', label='Subscribe',
desc='Click to subscribe.', desc='Click to subscribe.')
value=url['link']) for i in range(10):
url = action.pick_a_feed()
options.addOption(url['name'], url['link']) options.addOption(url['name'], url['link'])
jid = session['from'].bare jid = session['from'].bare
if '@' in jid: if '@' in jid:
@ -1638,7 +1733,7 @@ class Slixfeed(slixmpp.ClientXMPP):
url = result url = result
# Automatically set priority to 5 (highest) # Automatically set priority to 5 (highest)
if url['link']: options.addOption(url['name'], url['link']) if url['link']: options.addOption(url['name'], url['link'])
session['allow_complete'] = True session['allow_complete'] = False
session['allow_prev'] = True session['allow_prev'] = True
# singpolyma: Don't use complete action if there may be more steps # singpolyma: Don't use complete action if there may be more steps
# https://gitgud.io/sjehuda/slixfeed/-/merge_requests/13 # https://gitgud.io/sjehuda/slixfeed/-/merge_requests/13
@ -1690,7 +1785,7 @@ class Slixfeed(slixmpp.ClientXMPP):
options = form.add_field(var='action', options = form.add_field(var='action',
ftype='list-single', ftype='list-single',
label='Action', label='Action',
value='view') required=True)
options.addOption('Display', 'view') options.addOption('Display', 'view')
options.addOption('Edit', 'edit') options.addOption('Edit', 'edit')
session['has_next'] = True session['has_next'] = True
@ -1701,7 +1796,8 @@ class Slixfeed(slixmpp.ClientXMPP):
options = form.add_field(var='action', options = form.add_field(var='action',
ftype='list-single', ftype='list-single',
label='Action', label='Action',
value='message') value='message',
required=True)
options.addOption('Request authorization From', 'from') options.addOption('Request authorization From', 'from')
options.addOption('Resend authorization To', 'to') options.addOption('Resend authorization To', 'to')
options.addOption('Send message', 'message') options.addOption('Send message', 'message')

View file

@ -511,16 +511,96 @@ async def message(self, message):
response = 'Gemini and Gopher are not supported yet.' response = 'Gemini and Gopher are not supported yet.'
XmppMessage.send_reply(self, message, response) XmppMessage.send_reply(self, message, response)
# TODO xHTML, HTMLZ, MHTML # TODO xHTML, HTMLZ, MHTML
case _ if (message_lowercase.startswith('page')): case _ if (message_lowercase.startswith('content') or
message_text = message_text[5:] message_lowercase.startswith('page')):
ix_url = message_text.split(' ')[0] if message_lowercase.startswith('content'):
await action.download_document(self, message, jid, jid_file,
message_text, ix_url, False)
case _ if (message_lowercase.startswith('content')):
message_text = message_text[8:] message_text = message_text[8:]
readability = True
else:
message_text = message_text[5:]
readability = False
ix_url = message_text.split(' ')[0] ix_url = message_text.split(' ')[0]
await action.download_document(self, message, jid, jid_file, ext = ' '.join(message_text.split(' ')[1:])
message_text, ix_url, True) ext = ext if ext else 'pdf'
url = None
error = None
response = None
if ext in ('epub', 'html', 'markdown', 'md', 'pdf', 'text',
'txt'):
match ext:
case 'markdown':
ext = 'md'
case 'text':
ext = 'txt'
status_type = 'dnd'
status_message = ('📃️ Procesing request to produce {} '
'document...'.format(ext.upper()))
XmppPresence.send(self, jid, status_message,
status_type=status_type)
db_file = config.get_pathname_to_database(jid_file)
cache_dir = config.get_default_cache_directory()
if not os.path.isdir(cache_dir):
os.mkdir(cache_dir)
if not os.path.isdir(cache_dir + '/readability'):
os.mkdir(cache_dir + '/readability')
if ix_url:
try:
ix = int(ix_url)
try:
url = sqlite.get_entry_url(db_file, ix)
url = url[0]
except:
response = 'No entry with index {}'.format(ix)
except:
url = ix_url
if url:
url = uri.remove_tracking_parameters(url)
url = (uri.replace_hostname(url, 'link')) or url
result = await fetch.http(url)
if not result['error']:
data = result['content']
code = result['status_code']
title = get_document_title(data)
title = title.strip().lower()
for i in (' ', '-'):
title = title.replace(i, '_')
for i in ('?', '"', '\'', '!'):
title = title.replace(i, '')
filename = os.path.join(
cache_dir, 'readability',
title + '_' + dt.timestamp() + '.' + ext)
error = action.generate_document(data, url,
ext, filename,
readability)
if error:
response = ('> {}\n'
'Failed to export {}. '
'Reason: {}'.format(url,
ext.upper(),
error))
else:
url = await XmppUpload.start(self, jid,
filename)
chat_type = await get_chat_type(self, jid)
XmppMessage.send_oob(self, jid, url,
chat_type)
else:
response = ('> {}\n'
'Failed to fetch URL. Reason: {}'
.format(url, code))
await task.start_tasks_xmpp(self, jid, ['status'])
else:
response = ('No action has been taken.'
'\n'
'Missing argument. '
'Enter URL or entry index number.')
else:
response = ('Unsupported filetype.\n'
'Try: epub, html, md (markdown), '
'pdf, or txt (text)')
if response:
logging.warning('Error for URL {}: {}'.format(url, error))
XmppMessage.send_reply(self, message, response)
case _ if (message_lowercase.startswith('http')) and( case _ if (message_lowercase.startswith('http')) and(
message_lowercase.endswith('.opml')): message_lowercase.endswith('.opml')):
url = message_text url = message_text