2023-07-16 17:23:10 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# TODO
#
# 0) sql prepared statements
# 1) Autodetect feed:
# if page is not feed (or HTML) and contains <link rel="alternate">
# 2) OPML import/export
# 3) 2022-12-30 reduce async to (maybe) prevent inner lock. async on task: commands, downloader, updater
# vars and their meanings:
# jid = Jabber ID (XMPP)
# res = response (HTTP)
import os
from argparse import ArgumentParser
from asyncio . exceptions import IncompleteReadError
from datetime import date
from getpass import getpass
from http . client import IncompleteRead
from urllib import error
import asyncio
import logging
import sys
import time
import aiohttp
from bs4 import BeautifulSoup
2023-09-29 13:49:24 +02:00
from xml . etree . ElementTree import ElementTree , ParseError
from urllib . parse import urlparse
from lxml import html
2023-07-16 17:23:10 +02:00
import feedparser
import slixmpp
2023-09-29 13:49:24 +02:00
# from eliot import start_action, to_file
# # to_file(open("slixfeed.log", "w"))
# # with start_action(action_type="set_date()", jid=jid):
# # with start_action(action_type="message()", msg=msg):
2023-07-16 17:23:10 +02:00
2023-09-29 13:49:24 +02:00
import database
2023-07-16 17:23:10 +02:00
class Slixfeed ( slixmpp . ClientXMPP ) :
"""
2023-10-04 14:37:31 +02:00
Slixmpp news bot that will send updates
from feeds it receives .
2023-07-16 17:23:10 +02:00
"""
def __init__ ( self , jid , password ) :
slixmpp . ClientXMPP . __init__ ( self , jid , password )
# The session_start event will be triggered when
# the bot establishes its connection with the server
# and the XML streams are ready for use. We want to
# listen for this event so that we we can initialize
# our roster.
self . add_event_handler ( " session_start " , self . start )
2023-10-10 15:41:20 +02:00
self . add_event_handler ( " session_start " , self . select_file )
# self.add_event_handler("session_start", self.send_status)
# self.add_event_handler("session_start", self.check_updates)
2023-07-16 17:23:10 +02:00
# The message event is triggered whenever a message
# stanza is received. Be aware that that includes
# MUC messages and error messages.
self . add_event_handler ( " message " , self . message )
self . add_event_handler ( " disconnected " , self . reconnect )
2023-10-12 13:09:31 +02:00
# Initialize event loop
self . loop = asyncio . get_event_loop ( )
2023-07-16 17:23:10 +02:00
async def start ( self , event ) :
"""
Process the session_start event .
Typical actions for the session_start event are
requesting the roster and broadcasting an initial
presence stanza .
Arguments :
event - - An empty dictionary . The session_start
event does not provide any additional
data .
"""
self . send_presence ( )
await self . get_roster ( )
async def message ( self , msg ) :
"""
Process incoming message stanzas . Be aware that this also
includes MUC messages and error messages . It is usually
a good idea to check the messages ' s type before processing
or sending replies .
Arguments :
msg - - The received message stanza . See the documentation
for stanza objects and the Message stanza to see
how it may be used .
"""
2023-09-29 13:49:24 +02:00
if msg [ ' type ' ] in ( ' chat ' , ' normal ' ) :
message = " " . join ( msg [ ' body ' ] . split ( ) )
if message . lower ( ) . startswith ( ' help ' ) :
action = print_help ( )
# NOTE: Might not need it
2023-10-04 14:37:31 +02:00
elif message . lower ( ) . startswith ( ' add ' ) :
action = await initdb ( msg [ ' from ' ] . bare , add_feed , message [ 4 : ] )
2023-10-10 15:41:20 +02:00
# action = "> " + message + "\n" + action
elif message . lower ( ) . startswith ( ' quantum ' ) :
action = await initdb ( msg [ ' from ' ] . bare , database . set_settings_value , [ message [ : 7 ] , message [ 8 : ] ] )
# action = "Every update will contain {} news items.".format(action)
elif message . lower ( ) . startswith ( ' disable ' ) :
action = await initdb ( msg [ ' from ' ] . bare , database . set_settings_value , message )
# action = "Updates are disabled."
elif message . lower ( ) . startswith ( ' enable ' ) :
action = await initdb ( msg [ ' from ' ] . bare , database . set_settings_value , message )
# action = "Updates are enabled."
elif message . lower ( ) . startswith ( ' interval ' ) :
action = await initdb ( msg [ ' from ' ] . bare , database . set_settings_value , [ message [ : 8 ] , message [ 9 : ] ] )
# action = "Updates will be sent every {} minutes.".format(action)
elif message . lower ( ) . startswith ( ' list ' ) :
action = await initdb ( msg [ ' from ' ] . bare , database . list_subscriptions )
elif message . lower ( ) . startswith ( ' recent ' ) :
action = await initdb ( msg [ ' from ' ] . bare , database . last_entries , message [ 7 : ] )
2023-10-04 14:37:31 +02:00
elif message . lower ( ) . startswith ( ' remove ' ) :
action = await initdb ( msg [ ' from ' ] . bare , database . remove_feed , message [ 7 : ] )
2023-10-10 15:41:20 +02:00
elif message . lower ( ) . startswith ( ' search ' ) :
action = await initdb ( msg [ ' from ' ] . bare , database . search_entries , message [ 7 : ] )
2023-10-04 14:37:31 +02:00
elif message . lower ( ) . startswith ( ' status ' ) :
action = await initdb ( msg [ ' from ' ] . bare , database . toggle_status , message [ 7 : ] )
elif message . lower ( ) . startswith ( ' unread ' ) :
action = await initdb ( msg [ ' from ' ] . bare , database . statistics )
2023-09-29 13:49:24 +02:00
else :
2023-10-10 15:41:20 +02:00
action = " Unknown command. Press \" help \" for list of commands "
2023-09-29 13:49:24 +02:00
msg . reply ( action ) . send ( )
2023-07-16 17:23:10 +02:00
2023-10-04 14:37:31 +02:00
print ( " COMMAND: " , message )
print ( " ACCOUNT: " + str ( msg [ ' from ' ] ) )
2023-10-10 15:41:20 +02:00
async def select_file ( self , event ) :
"""
Initiate actions by JID ( Jabber ID ) .
: param self :
: param event :
"""
2023-09-29 13:49:24 +02:00
while True :
db_dir = get_default_dbdir ( )
if not os . path . isdir ( db_dir ) :
msg = ( " Slixfeed can not work without a database. \n "
" To create a database, follow these steps: \n "
" Add Slixfeed contact to your roster \n "
" Send a feed to the bot by: \n "
" feed add https://reclaimthenet.org/feed/ " )
print ( msg )
else :
os . chdir ( db_dir )
files = os . listdir ( )
2023-10-10 15:41:20 +02:00
async with asyncio . TaskGroup ( ) as tg :
for file in files :
if file . endswith ( ' .db ' ) and not file . endswith ( ' .db-jour.db ' ) :
jid = file [ : - 3 ]
tg . create_task ( self . jid ( event , jid ) )
async def jid ( self , event , jid ) :
"""
JID ( Jabber ID ) task manager .
: param self :
: param event :
: param jid : Jabber ID
"""
enabled = await initdb ( jid , database . get_settings_value , ' enabled ' )
print ( " enabled " , enabled , jid )
if enabled :
async with asyncio . TaskGroup ( ) as tg :
tg . create_task ( self . check_updates ( event , jid ) )
tg . create_task ( self . send_update ( event , jid ) )
tg . create_task ( self . send_status ( event , jid ) )
async def check_updates ( self , event , jid ) :
"""
Start calling for update check up .
: param self :
: param event :
: param jid : Jabber ID
"""
2023-09-29 13:49:24 +02:00
while True :
2023-10-10 15:41:20 +02:00
print ( " > CHCK UPDATE " , jid )
await initdb ( jid , download_updates )
await asyncio . sleep ( 60 * 90 )
2023-10-12 13:09:31 +02:00
# Schedule to call this function again in 90 minutes
# self.loop.call_at(self.loop.time() + 60 * 90, self.loop.create_task, self.check_updates(event, jid))
2023-09-29 13:49:24 +02:00
2023-10-10 15:41:20 +02:00
async def send_update ( self , event , jid ) :
"""
Send news items as messages .
: param self :
: param event :
: param jid : Jabber ID
"""
new = await initdb (
jid ,
database . get_entry_unread
)
if new :
2023-10-12 13:09:31 +02:00
print ( " > SEND UPDATE " , jid )
2023-10-10 15:41:20 +02:00
self . send_message (
mto = jid ,
mbody = new ,
mtype = ' chat '
)
interval = await initdb ( jid , database . get_settings_value , ' interval ' )
2023-10-12 13:09:31 +02:00
# await asyncio.sleep(60 * interval)
self . loop . call_at ( self . loop . time ( ) + 60 * interval , self . loop . create_task , self . send_update ( event , jid ) )
2023-10-10 15:41:20 +02:00
async def send_status ( self , event , jid ) :
"""
Send status message .
: param self :
: param event :
: param jid : Jabber ID
"""
print ( " > SEND STATUS " , jid )
unread = await initdb (
jid ,
database . get_number_of_entries_unread
)
if unread :
msg_status = ( ' 📰 News items: ' , str ( unread ) )
else :
msg_status = ' 🗞 No News '
# print(msg_status, 'for', jid)
self . send_presence (
pstatus = msg_status ,
pto = jid ,
#pfrom=None
)
2023-10-12 13:09:31 +02:00
# await asyncio.sleep(60 * 20)
self . loop . call_at ( self . loop . time ( ) + 60 * 20 , self . loop . create_task , self . send_status ( event , jid ) )
2023-07-16 17:23:10 +02:00
def print_help ( ) :
2023-10-04 14:37:31 +02:00
"""
Print help manual .
"""
2023-07-16 17:23:10 +02:00
msg = ( " Slixfeed - News syndication bot for Jabber/XMPP \n "
" \n "
" DESCRIPTION: \n "
" Slixfeed is a news aggregator bot for online news feeds. \n "
2023-10-04 14:37:31 +02:00
" Supported filetypes: Atom, RDF and RSS. \n "
2023-07-16 17:23:10 +02:00
" \n "
" BASIC USAGE: \n "
" enable \n "
" Send updates. \n "
" disable \n "
" Stop sending updates. \n "
2023-10-04 14:37:31 +02:00
" batch N \n "
" Send N updates on ech interval. \n "
" interval N \n "
" Send an update each N minutes. \n "
2023-07-16 17:23:10 +02:00
" feed list \n "
" List subscriptions. \n "
" \n "
" EDIT OPTIONS: \n "
2023-10-04 14:37:31 +02:00
" add URL \n "
2023-07-16 17:23:10 +02:00
" Add URL to subscription list. \n "
2023-10-04 14:37:31 +02:00
" remove ID \n "
2023-07-16 17:23:10 +02:00
" Remove feed from subscription list. \n "
2023-10-04 14:37:31 +02:00
" status ID \n "
2023-07-16 17:23:10 +02:00
" Toggle update status of feed. \n "
" \n "
" SEARCH OPTIONS: \n "
2023-10-04 14:37:31 +02:00
" search TEXT \n "
2023-07-16 17:23:10 +02:00
" Search news items by given keywords. \n "
2023-10-04 14:37:31 +02:00
" recent N \n "
2023-07-16 17:23:10 +02:00
" List recent N news items (up to 50 items). \n "
" \n "
2023-10-04 14:37:31 +02:00
" STATISTICS OPTIONS: \n "
" analyses \n "
" Show report and statistics of feeds. \n "
" obsolete \n "
" List feeds that are not available. \n "
" unread \n "
" Print number of unread news items. \n "
" \n "
2023-07-16 17:23:10 +02:00
" BACKUP OPTIONS: \n "
" export opml \n "
" Send an OPML file with your feeds. \n "
" backup news html \n "
" Send an HTML formatted file of your news items. \n "
" backup news md \n "
" Send a Markdown file of your news items. \n "
" backup news text \n "
" Send a Plain Text file of your news items. \n "
" \n "
" DOCUMENTATION: \n "
" Slixfeed \n "
" https://gitgud.io/sjehuda/slixfeed \n "
" Slixmpp \n "
" https://slixmpp.readthedocs.io/ \n "
" feedparser \n "
" https://pythonhosted.org/feedparser " )
return msg
2023-10-04 14:37:31 +02:00
# Function from jarun/buku
2023-07-16 17:23:10 +02:00
# Arun Prakash Jana (jarun)
# Dmitry Marakasov (AMDmi3)
def get_default_dbdir ( ) :
""" Determine the directory path where dbfile will be stored.
If $ XDG_DATA_HOME is defined , use it
else if $ HOME exists , use it
else if the platform is Windows , use % APPDATA %
else use the current directory .
2023-10-04 14:37:31 +02:00
: return : Path to database file .
Note
- - - -
This code was taken from the buku project .
2023-07-16 17:23:10 +02:00
"""
# data_home = xdg.BaseDirectory.xdg_data_home
data_home = os . environ . get ( ' XDG_DATA_HOME ' )
if data_home is None :
if os . environ . get ( ' HOME ' ) is None :
if sys . platform == ' win32 ' :
data_home = os . environ . get ( ' APPDATA ' )
if data_home is None :
return os . path . abspath ( ' . ' )
else :
return os . path . abspath ( ' . ' )
else :
data_home = os . path . join ( os . environ . get ( ' HOME ' ) , ' .local ' , ' share ' )
return os . path . join ( data_home , ' slixfeed ' )
# TODO Perhaps this needs to be executed
# just once per program execution
async def initdb ( jid , callback , message = None ) :
2023-10-04 14:37:31 +02:00
"""
Callback function to instantiate action on database .
: param jid : JID ( Jabber ID ) .
: param callback : Function name .
: param massage : Optional kwarg when a message is a part or required argument .
"""
2023-09-29 13:49:24 +02:00
db_dir = get_default_dbdir ( )
if not os . path . isdir ( db_dir ) :
os . mkdir ( db_dir )
db_file = os . path . join ( db_dir , r " {} .db " . format ( jid ) )
database . create_tables ( db_file )
2023-10-10 15:41:20 +02:00
# await database.set_default_values(db_file)
2023-09-29 13:49:24 +02:00
if message :
return await callback ( db_file , message )
else :
return await callback ( db_file )
2023-07-16 17:23:10 +02:00
# NOTE I don't think there should be "return"
# because then we might stop scanning next URLs
async def download_updates ( db_file ) :
2023-10-04 14:37:31 +02:00
"""
Chack feeds for new entries .
: param db_file : Database filename .
"""
2023-09-29 13:49:24 +02:00
urls = await database . get_subscriptions ( db_file )
2023-07-16 17:23:10 +02:00
for url in urls :
2023-10-12 13:09:31 +02:00
# print(os.path.basename(db_file), url[0])
2023-09-29 13:49:24 +02:00
source = url [ 0 ]
res = await download_feed ( source )
# TypeError: 'NoneType' object is not subscriptable
if res is None :
# Skip to next feed
# urls.next()
# next(urls)
continue
2023-07-16 17:23:10 +02:00
2023-09-29 13:49:24 +02:00
await database . update_source_status ( db_file , res [ 1 ] , source )
if res [ 0 ] :
try :
feed = feedparser . parse ( res [ 0 ] )
if feed . bozo :
2023-10-10 15:41:20 +02:00
# bozo = ("WARNING: Bozo detected for feed <{}>. "
# "For more information, visit "
# "https://pythonhosted.org/feedparser/bozo.html"
# .format(source))
# print(bozo)
2023-09-29 13:49:24 +02:00
valid = 0
else :
valid = 1
await database . update_source_validity ( db_file , source , valid )
except ( IncompleteReadError , IncompleteRead , error . URLError ) as e :
print ( e )
# return
# TODO Place these couple of lines back down
# NOTE Need to correct the SQL statement to do so
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
if res [ 1 ] == 200 :
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
# TODO Place these couple of lines back down
# NOTE Need to correct the SQL statement to do so
entries = feed . entries
2023-10-04 14:37:31 +02:00
# length = len(entries)
2023-09-29 13:49:24 +02:00
# await database.remove_entry(db_file, source, length)
await database . remove_nonexistent_entries ( db_file , feed , source )
new_entry = 0
for entry in entries :
if entry . has_key ( " title " ) :
title = entry . title
else :
title = feed [ " feed " ] [ " title " ]
if entry . has_key ( " link " ) :
link = entry . link
else :
link = source
exist = await database . check_entry_exist ( db_file , title , link )
if not exist :
new_entry = new_entry + 1
# TODO Enhance summary
if entry . has_key ( " summary " ) :
summary = entry . summary
# Remove HTML tags
summary = BeautifulSoup ( summary , " lxml " ) . text
# TODO Limit text length
summary = summary . replace ( " \n \n " , " \n " ) [ : 300 ] + " ⃨ "
else :
summary = ' *** No summary *** '
entry = ( title , summary , link , source , 0 ) ;
await database . add_entry_and_set_date ( db_file , source , entry )
2023-07-16 17:23:10 +02:00
async def download_feed ( url ) :
2023-10-04 14:37:31 +02:00
"""
Download content of given URL .
: param url : URL .
: return : Document or error message .
"""
2023-09-29 13:49:24 +02:00
timeout = aiohttp . ClientTimeout ( total = 10 )
async with aiohttp . ClientSession ( ) as session :
2023-10-04 14:37:31 +02:00
# async with aiohttp.ClientSession(trust_env=True) as session:
2023-09-29 13:49:24 +02:00
try :
async with session . get ( url , timeout = timeout ) as response :
status = response . status
if response . status == 200 :
2023-10-04 14:37:31 +02:00
try :
doc = await response . text ( )
# print (response.content_type)
return [ doc , status ]
except :
return [ False , " The content of this document doesn ' t appear to be textual " ]
2023-09-29 13:49:24 +02:00
else :
2023-10-04 14:37:31 +02:00
return [ False , " HTTP Error: " + str ( status ) ]
2023-09-29 13:49:24 +02:00
except aiohttp . ClientError as e :
print ( ' Error ' , str ( e ) )
2023-10-04 14:37:31 +02:00
return [ False , " Error: " + str ( e ) ]
2023-09-29 13:49:24 +02:00
except asyncio . TimeoutError as e :
2023-10-10 15:41:20 +02:00
# print('Timeout:', str(e))
2023-10-04 14:37:31 +02:00
return [ False , " Timeout " ]
2023-07-16 17:23:10 +02:00
2023-09-29 13:49:24 +02:00
async def add_feed ( db_file , url ) :
2023-07-16 17:23:10 +02:00
"""
2023-10-04 14:37:31 +02:00
Check whether feed exist , otherwise process it .
: param db_file : Database filename .
: param url : URL .
: return : Status message .
2023-07-16 17:23:10 +02:00
"""
2023-09-29 13:49:24 +02:00
exist = await database . check_feed_exist ( db_file , url )
2023-07-16 17:23:10 +02:00
if not exist :
res = await download_feed ( url )
2023-09-29 13:49:24 +02:00
if res [ 0 ] :
feed = feedparser . parse ( res [ 0 ] )
2023-10-12 13:09:31 +02:00
title = await get_title ( url , feed )
2023-09-29 13:49:24 +02:00
if feed . bozo :
bozo = ( " WARNING: Bozo detected. Failed to load < {} >. " . format ( url ) )
print ( bozo )
try :
2023-10-04 14:37:31 +02:00
# tree = etree.fromstring(res[0]) # etree is for xml
2023-09-29 13:49:24 +02:00
tree = html . fromstring ( res [ 0 ] )
except :
2023-10-04 14:37:31 +02:00
return " Failed to parse URL < {} > as feed " . format ( url )
2023-09-29 13:49:24 +02:00
print ( " RSS Auto-Discovery Engaged " )
xpath_query = """ //link[(@rel= " alternate " ) and (@type= " application/atom+xml " or @type= " application/rdf+xml " or @type= " application/rss+xml " )] """
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
feeds = tree . xpath ( xpath_query )
if len ( feeds ) > 1 :
msg = " RSS Auto-Discovery has found {} feeds: \n \n " . format ( len ( feeds ) )
for feed in feeds :
# # The following code works;
# # The following code will catch
# # only valid resources (i.e. not 404);
# # The following code requires more bandwidth.
# res = await download_feed(feed)
# if res[0]:
# disco = feedparser.parse(res[0])
# title = disco["feed"]["title"]
# msg += "{} \n {} \n\n".format(title, feed)
feed_name = feed . xpath ( ' @title ' ) [ 0 ]
feed_addr = feed . xpath ( ' @href ' ) [ 0 ]
msg + = " {} \n {} \n \n " . format ( feed_name , feed_addr )
msg + = " The above feeds were extracted from \n {} " . format ( url )
return msg
elif feeds :
url = feeds [ 0 ] . xpath ( ' @href ' ) [ 0 ]
# Why wouldn't add_feed return a message
# upon success unless return is explicitly
# mentioned, yet upon failure it wouldn't?
return await add_feed ( db_file , url )
# Search for feeds by file extension and path
2023-10-04 14:37:31 +02:00
paths = [
" /app.php/feed " , # phpbb
" /atom " ,
" /atom.php " ,
" /atom.xml " ,
" /content-feeds/ " ,
" /external.php?type=RSS2 " ,
" /feed " , # good practice
" /feed.atom " ,
# "/feed.json",
" /feed.php " ,
" /feed.rdf " ,
" /feed.rss " ,
" /feed.xml " ,
" /feed/atom/ " ,
" /feeds/news_feed " ,
" /feeds/rss/news.xml.php " ,
" /forum_rss.php " ,
" /index.php/feed " ,
" /index.php?type=atom;action=.xml " , #smf
" /index.php?type=rss;action=.xml " , #smf
" /index.rss " ,
" /latest.rss " ,
" /news " ,
" /news.xml " ,
" /news.xml.php " ,
" /news/feed " ,
" /posts.rss " , # discourse
" /rdf " ,
" /rdf.php " ,
" /rdf.xml " ,
" /rss " ,
# "/rss.json",
" /rss.php " ,
" /rss.xml " ,
" /timeline.rss " ,
" /xml/feed.rss " ,
# "?format=atom",
# "?format=rdf",
# "?format=rss",
# "?format=xml"
]
2023-09-29 13:49:24 +02:00
print ( " RSS Scan Mode Engaged " )
feeds = { }
for path in paths :
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
xpath_query = " //a[contains(@href, ' {} ' )] " . format ( path )
addresses = tree . xpath ( xpath_query )
parted_url = urlparse ( url )
for address in addresses :
address = address . xpath ( ' @href ' ) [ 0 ]
if address . startswith ( ' / ' ) :
2023-10-04 14:37:31 +02:00
address = parted_url . scheme + ' :// ' + parted_url . netloc + address
2023-09-29 13:49:24 +02:00
res = await download_feed ( address )
if res [ 1 ] == 200 :
try :
feeds [ address ] = feedparser . parse ( res [ 0 ] ) [ " feed " ] [ " title " ]
except :
continue
if len ( feeds ) > 1 :
msg = " RSS URL scan has found {} feeds: \n \n " . format ( len ( feeds ) )
for feed in feeds :
# try:
# res = await download_feed(feed)
# except:
# continue
feed_name = feeds [ feed ]
feed_addr = feed
msg + = " {} \n {} \n \n " . format ( feed_name , feed_addr )
msg + = " The above feeds were extracted from \n {} " . format ( url )
return msg
elif feeds :
url = list ( feeds ) [ 0 ]
return await add_feed ( db_file , url )
# (HTTP) Request(s) Paths
print ( " RSS Arbitrary Mode Engaged " )
feeds = { }
parted_url = urlparse ( url )
for path in paths :
2023-10-04 14:37:31 +02:00
address = parted_url . scheme + ' :// ' + parted_url . netloc + path
res = await download_feed ( address )
if res [ 1 ] == 200 :
# print(feedparser.parse(res[0])["feed"]["title"])
# feeds[address] = feedparser.parse(res[0])["feed"]["title"]
try :
title = feedparser . parse ( res [ 0 ] ) [ " feed " ] [ " title " ]
except :
title = ' *** No Title *** '
feeds [ address ] = title
# Check whether URL has path (i.e. not root)
2023-09-29 13:49:24 +02:00
if parted_url . path . split ( ' / ' ) [ 1 ] :
paths . extend ( [ " .atom " , " .feed " , " .rdf " , " .rss " ] ) if ' .rss ' not in paths else - 1
# if paths.index('.rss'):
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
address = parted_url . scheme + ' :// ' + parted_url . netloc + ' / ' + parted_url . path . split ( ' / ' ) [ 1 ] + path
res = await download_feed ( address )
if res [ 1 ] == 200 :
2023-10-04 14:37:31 +02:00
print ( ' ATTENTION ' )
print ( address )
try :
title = feedparser . parse ( res [ 0 ] ) [ " feed " ] [ " title " ]
except :
title = ' *** No Title *** '
feeds [ address ] = title
2023-09-29 13:49:24 +02:00
if len ( feeds ) > 1 :
msg = " RSS URL discovery has found {} feeds: \n \n " . format ( len ( feeds ) )
for feed in feeds :
feed_name = feeds [ feed ]
feed_addr = feed
msg + = " {} \n {} \n \n " . format ( feed_name , feed_addr )
msg + = " The above feeds were extracted from \n {} " . format ( url )
elif feeds :
url = list ( feeds ) [ 0 ]
2023-10-10 15:41:20 +02:00
msg = await add_feed ( db_file , url )
2023-09-29 13:49:24 +02:00
else :
2023-10-10 15:41:20 +02:00
msg = " No news feeds were found for URL < {} >. " . format ( url )
2023-09-29 13:49:24 +02:00
else :
2023-10-12 13:09:31 +02:00
msg = await database . add_feed ( db_file , title , url , res )
2023-09-29 13:49:24 +02:00
else :
2023-10-10 15:41:20 +02:00
msg = " Failed to get URL < {} >. Reason: {} " . format ( url , res [ 1 ] )
2023-07-16 17:23:10 +02:00
else :
2023-10-04 14:37:31 +02:00
ix = exist [ 0 ]
2023-10-10 15:41:20 +02:00
name = exist [ 1 ]
msg = " > {} \n News source \" {} \" is already listed in the subscription list at index {} " . format ( url , name , ix )
return msg
2023-07-16 17:23:10 +02:00
2023-10-12 13:09:31 +02:00
async def get_title ( url , feed ) :
"""
Get title of feed .
: param url : URL
: param feed : Parsed feed
: return : Title or URL hostname .
"""
try :
title = feed [ " feed " ] [ " title " ]
except :
title = urlparse ( url ) . netloc
return title
2023-07-16 17:23:10 +02:00
def toggle_state ( jid , state ) :
"""
2023-10-04 14:37:31 +02:00
Set status of update .
: param jid : JID ( Jabber ID ) .
: param state : True or False .
: return : Status message .
2023-07-16 17:23:10 +02:00
"""
2023-09-29 13:49:24 +02:00
db_dir = get_default_dbdir ( )
db_file = os . path . join ( db_dir , r " {} .db " . format ( jid ) )
bk_file = os . path . join ( db_dir , r " {} .db.bak " . format ( jid ) )
if state :
if os . path . exists ( db_file ) :
return " Updates are already enabled "
elif os . path . exists ( bk_file ) :
os . renames ( bk_file , db_file )
return " Updates are now enabled "
else :
if os . path . exists ( bk_file ) :
return " Updates are already disabled "
elif os . path . exists ( db_file ) :
os . renames ( db_file , bk_file )
return " Updates are now disabled "
2023-07-16 17:23:10 +02:00
if __name__ == ' __main__ ' :
# Setup the command line arguments.
parser = ArgumentParser ( description = Slixfeed . __doc__ )
# Output verbosity options.
parser . add_argument (
" -q " , " --quiet " , help = " set logging to ERROR " ,
action = " store_const " , dest = " loglevel " ,
const = logging . ERROR , default = logging . INFO
)
parser . add_argument (
" -d " , " --debug " , help = " set logging to DEBUG " ,
action = " store_const " , dest = " loglevel " ,
const = logging . DEBUG , default = logging . INFO
)
# JID and password options.
parser . add_argument ( " -j " , " --jid " , dest = " jid " ,
help = " JID to use " )
parser . add_argument ( " -p " , " --password " , dest = " password " ,
help = " password to use " )
args = parser . parse_args ( )
# Setup logging.
logging . basicConfig ( level = args . loglevel ,
format = ' %(levelname)-8s %(message)s ' )
if args . jid is None :
args . jid = input ( " Username: " )
if args . password is None :
args . password = getpass ( " Password: " )
# Setup the Slixfeed and register plugins. Note that while plugins may
# have interdependencies, the order in which you register them does
# not matter.
xmpp = Slixfeed ( args . jid , args . password )
xmpp . register_plugin ( ' xep_0004 ' ) # Data Forms
xmpp . register_plugin ( ' xep_0030 ' ) # Service Discovery
xmpp . register_plugin ( ' xep_0045 ' ) # Multi-User Chat
xmpp . register_plugin ( ' xep_0060 ' ) # PubSub
xmpp . register_plugin ( ' xep_0199 ' ) # XMPP Ping
# Connect to the XMPP server and start processing XMPP stanzas.
xmpp . connect ( )
xmpp . process ( )