forked from sch/Slixfeed
Initial commit
This commit is contained in:
parent
c5c99765b4
commit
2d8ecaeb22
1 changed files with 619 additions and 0 deletions
619
slixfeed.py
Normal file
619
slixfeed.py
Normal file
|
@ -0,0 +1,619 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Sun May 15 17:09:05 2022
|
||||
|
||||
@author: Schimon Jehudah, Adv.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from asyncio.exceptions import IncompleteReadError
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import date
|
||||
from getpass import getpass
|
||||
from http.client import IncompleteRead
|
||||
from urllib import error
|
||||
#from urllib.parse import urlparse
|
||||
#from xdg import BaseDirectory
|
||||
|
||||
import asyncio
|
||||
import feedparser
|
||||
import logging
|
||||
import os
|
||||
import slixmpp
|
||||
import sqlite3
|
||||
from sqlite3 import Error
|
||||
import sys
|
||||
#import xdg
|
||||
|
||||
class EchoBot(slixmpp.ClientXMPP):
|
||||
"""
|
||||
Slixmpp bot that will send updates of feeds it
|
||||
receives.
|
||||
"""
|
||||
|
||||
def __init__(self, jid, password):
|
||||
slixmpp.ClientXMPP.__init__(self, jid, password)
|
||||
|
||||
# The session_start event will be triggered when
|
||||
# the bot establishes its connection with the server
|
||||
# and the XML streams are ready for use. We want to
|
||||
# listen for this event so that we we can initialize
|
||||
# our roster.
|
||||
self.add_event_handler("session_start", self.start)
|
||||
|
||||
# The message event is triggered whenever a message
|
||||
# stanza is received. Be aware that that includes
|
||||
# MUC messages and error messages.
|
||||
self.add_event_handler("message", self.message)
|
||||
|
||||
async def start(self, event):
|
||||
"""
|
||||
Process the session_start event.
|
||||
|
||||
Typical actions for the session_start event are
|
||||
requesting the roster and broadcasting an initial
|
||||
presence stanza.
|
||||
|
||||
Arguments:
|
||||
event -- An empty dictionary. The session_start
|
||||
event does not provide any additional
|
||||
data.
|
||||
"""
|
||||
self.send_presence()
|
||||
await self.get_roster()
|
||||
|
||||
def message(self, msg):
|
||||
"""
|
||||
Process incoming message stanzas. Be aware that this also
|
||||
includes MUC messages and error messages. It is usually
|
||||
a good idea to check the messages's type before processing
|
||||
or sending replies.
|
||||
|
||||
Arguments:
|
||||
msg -- The received message stanza. See the documentation
|
||||
for stanza objects and the Message stanza to see
|
||||
how it may be used.
|
||||
"""
|
||||
if msg['type'] in ('chat', 'normal'):
|
||||
# download_updates(msg['from'])
|
||||
message = " ".join(msg['body'].split())
|
||||
if message.startswith('update'):
|
||||
print("sending to: + msg['from'].bare")
|
||||
print("sending to: " + msg['from'].bare)
|
||||
news = initdb(msg['from'].bare,
|
||||
False,
|
||||
download_updates)
|
||||
if news:
|
||||
today = str(date.today())
|
||||
news.insert = [0, 'News fetched on: ' + today]
|
||||
for new in news:
|
||||
msg.reply(new).send()
|
||||
action = "End of News update"
|
||||
else:
|
||||
action = "No News update"
|
||||
elif message.startswith('recent updates '):
|
||||
action = initdb(msg['from'].bare,
|
||||
message[14:],
|
||||
last_entries)
|
||||
elif message.startswith('list feeds'):
|
||||
action = initdb(msg['from'].bare,
|
||||
False,
|
||||
list_subscriptions)
|
||||
elif message.startswith('add feed '):
|
||||
action = initdb(msg['from'].bare,
|
||||
message[9:],
|
||||
add_feed)
|
||||
elif message.startswith('remove feed '):
|
||||
action = initdb(msg['from'].bare,
|
||||
message[12:],
|
||||
remove_feed)
|
||||
elif message.startswith('status '):
|
||||
action = initdb(msg['from'].bare,
|
||||
message[7:],
|
||||
toggle_status)
|
||||
msg.reply(action).send()
|
||||
|
||||
|
||||
async def check_updates(self):
|
||||
|
||||
while True:
|
||||
db_dir = get_default_dbdir()
|
||||
if not os.path.isdir(db_dir):
|
||||
msg = """
|
||||
No database directory was found. \n
|
||||
To create News database,send these messages to bot: \n
|
||||
add feed https://reclaimthenet.org/feed/
|
||||
update
|
||||
"""
|
||||
print(msg)
|
||||
else:
|
||||
os.chdir(db_dir)
|
||||
files = os.listdir()
|
||||
for file in files:
|
||||
jid = file[:-3]
|
||||
initdb(jid,
|
||||
False,
|
||||
download_updates)
|
||||
await asyncio.sleep(60 * 30)
|
||||
#await asyncio.sleep(180 * 60)
|
||||
|
||||
|
||||
async def send_updates(self):
|
||||
|
||||
while True:
|
||||
db_dir = get_default_dbdir()
|
||||
if not os.path.isdir(db_dir):
|
||||
msg = """
|
||||
No database directory was found. \n
|
||||
To create News database,send these messages to bot: \n
|
||||
add feed https://reclaimthenet.org/feed/
|
||||
update
|
||||
"""
|
||||
print(msg)
|
||||
else:
|
||||
os.chdir(db_dir)
|
||||
files = os.listdir()
|
||||
for file in files:
|
||||
jid = file[:-3]
|
||||
new = initdb(jid,
|
||||
False,
|
||||
get_unread)
|
||||
if new:
|
||||
msg = self.make_message(mto=jid,
|
||||
mbody=new,
|
||||
mtype='chat')
|
||||
msg.send()
|
||||
# today = str(date.today())
|
||||
# news.insert = [0, 'News fetched on: ' + today]
|
||||
#news.append('End of News update')
|
||||
#for new in news:
|
||||
#print("sending to: jid")
|
||||
#print("sending to: " + jid)
|
||||
# self.send_message(mto=jid,
|
||||
# mbody=new,
|
||||
# mtype='normal').send()
|
||||
#msg = self.make_message(mto=jid,
|
||||
# mbody=new,
|
||||
# mtype='chat')
|
||||
#print(msg)
|
||||
#msg.send()
|
||||
await asyncio.sleep(10)
|
||||
|
||||
asyncio.ensure_future(check_updates(self))
|
||||
asyncio.ensure_future(send_updates(self))
|
||||
|
||||
# Function from buku
|
||||
# https://github.com/jarun/buku
|
||||
# Arun Prakash Jana (jarun)
|
||||
# Dmitry Marakasov (AMDmi3)
|
||||
def get_default_dbdir():
|
||||
"""Determine the directory path where dbfile will be stored.
|
||||
|
||||
If $XDG_DATA_HOME is defined, use it
|
||||
else if $HOME exists, use it
|
||||
else if the platform is Windows, use %APPDATA%
|
||||
else use the current directory.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Path to database file.
|
||||
"""
|
||||
|
||||
# data_home = xdg.BaseDirectory.xdg_data_home
|
||||
data_home = os.environ.get('XDG_DATA_HOME')
|
||||
if data_home is None:
|
||||
if os.environ.get('HOME') is None:
|
||||
if sys.platform == 'win32':
|
||||
data_home = os.environ.get('APPDATA')
|
||||
if data_home is None:
|
||||
return os.path.abspath('.')
|
||||
else:
|
||||
return os.path.abspath('.')
|
||||
else:
|
||||
data_home = os.path.join(os.environ.get('HOME'), '.local', 'share')
|
||||
|
||||
return os.path.join(data_home, 'slixfeed')
|
||||
|
||||
|
||||
# TODO Perhaps this needs to be executed
|
||||
# just once per program execution
|
||||
def initdb(jid, message, callback):
|
||||
|
||||
db_dir = get_default_dbdir()
|
||||
if not os.path.isdir(db_dir):
|
||||
os.mkdir(db_dir)
|
||||
os.chdir(db_dir)
|
||||
db_file = r"{}.db".format(jid)
|
||||
|
||||
feeds_table_sql = """
|
||||
CREATE TABLE IF NOT EXISTS feeds (
|
||||
id integer PRIMARY KEY,
|
||||
name text,
|
||||
address text NOT NULL,
|
||||
status integer,
|
||||
updated text
|
||||
); """
|
||||
|
||||
entries_table_sql = """
|
||||
CREATE TABLE IF NOT EXISTS entries (
|
||||
id integer PRIMARY KEY,
|
||||
title text NOT NULL,
|
||||
summary text NOT NULL,
|
||||
link text NOT NULL,
|
||||
source text,
|
||||
read integer
|
||||
); """
|
||||
|
||||
# create a database connection
|
||||
conn = create_connection(db_file)
|
||||
|
||||
# create tables
|
||||
if conn is not None:
|
||||
# create projects table
|
||||
create_table(conn, feeds_table_sql)
|
||||
create_table(conn, entries_table_sql)
|
||||
else:
|
||||
print("Error! cannot create the database connection.")
|
||||
|
||||
if message:
|
||||
return callback(conn, message)
|
||||
else:
|
||||
return callback(conn)
|
||||
|
||||
|
||||
def create_connection(db_file):
|
||||
"""
|
||||
Create a database connection to the SQLite database
|
||||
specified by db_file
|
||||
:param db_file: database file
|
||||
:return: Connection object or None
|
||||
"""
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_file)
|
||||
return conn
|
||||
except Error as e:
|
||||
print(e)
|
||||
|
||||
return conn
|
||||
|
||||
|
||||
def create_table(conn, create_table_sql):
|
||||
"""
|
||||
Create a table from the create_table_sql statement
|
||||
:param conn: Connection object
|
||||
:param create_table_sql: a CREATE TABLE statement
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
c = conn.cursor()
|
||||
c.execute(create_table_sql)
|
||||
except Error as e:
|
||||
print(e)
|
||||
|
||||
|
||||
# def setup_info(jid):
|
||||
# def start_process(jid):
|
||||
def download_updates(conn):
|
||||
|
||||
with conn:
|
||||
# get current date
|
||||
#today = date.today()
|
||||
urls = get_subscriptions(conn)
|
||||
for url in urls:
|
||||
#"".join(url)
|
||||
source = url[0]
|
||||
try:
|
||||
feed = feedparser.parse(source)
|
||||
except (IncompleteReadError, IncompleteRead, error.URLError) as e:
|
||||
print(e)
|
||||
continue
|
||||
print("# " + source)
|
||||
# TODO Place these couple of lines back down
|
||||
# NOTE Need to correct the SQL statement to do so
|
||||
length = len(feed.entries)
|
||||
remove_entry(conn, source, length)
|
||||
for entry in feed.entries:
|
||||
title = '*** No title ***' if not entry.title else entry.title
|
||||
link = source if not entry.link else entry.link
|
||||
exist = check_entry(conn, title, link)
|
||||
if not exist:
|
||||
if feed.bozo:
|
||||
print('feed.bozo')
|
||||
print(source)
|
||||
#print('entry')
|
||||
#print(entry)
|
||||
if entry.has_key('summary'):
|
||||
summary = entry.summary
|
||||
# Remove HTML tags
|
||||
summary = BeautifulSoup(summary, "lxml").text
|
||||
# TODO Limit text length
|
||||
else:
|
||||
summary = '*** No summary ***'
|
||||
#print('~~~~~~summary not in entry')
|
||||
entry = (title, summary, link, source, 0);
|
||||
add_entry(conn, entry)
|
||||
set_date(conn, source)
|
||||
#make_message
|
||||
# message = title + '\n\n' + summary + '\n\nLink: ' + link
|
||||
# print(message)
|
||||
# news.append(message)
|
||||
# print(len(news))
|
||||
# return news
|
||||
|
||||
|
||||
def check_feed(conn, url):
|
||||
"""
|
||||
Check whether a feed exists
|
||||
Query for feeds by url
|
||||
:param conn:
|
||||
:param url:
|
||||
:return: row
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
sql = "SELECT id FROM feeds WHERE address = ?"
|
||||
cur.execute(sql, (url,))
|
||||
return cur.fetchone()
|
||||
|
||||
|
||||
def add_feed(conn, url):
|
||||
"""
|
||||
Add a new feed into the feeds table
|
||||
:param conn:
|
||||
:param feed:
|
||||
:return: string
|
||||
"""
|
||||
#conn = create_connection(db_file)
|
||||
exist = check_feed(conn, url)
|
||||
if not exist:
|
||||
title = feedparser.parse(url)['feed']['title']
|
||||
feed = (title, url, 1)
|
||||
cur = conn.cursor()
|
||||
sql = """ INSERT INTO feeds(name,address,status)
|
||||
VALUES(?,?,?) """
|
||||
cur.execute(sql, feed)
|
||||
conn.commit()
|
||||
# source = title if not '' else url
|
||||
source = title if title else url
|
||||
return 'News source "{}" has been added to subscriptions list'.format(source)
|
||||
|
||||
|
||||
def remove_feed(conn, id):
|
||||
"""
|
||||
Delete a feed by feed id
|
||||
:param id: id of the feed
|
||||
:return: string
|
||||
"""
|
||||
# You have chose to remove feed (title, url) from your feed list.
|
||||
# Enter "delete" to confirm removal.
|
||||
#conn = create_connection(db_file)
|
||||
cur = conn.cursor()
|
||||
sql = 'SELECT address FROM feeds WHERE id = ?'
|
||||
# NOTE [0][1][2]
|
||||
url = cur.execute(sql, (id,))
|
||||
for i in url:
|
||||
url = i[0]
|
||||
sql = 'DELETE FROM entries WHERE source = ? '
|
||||
cur.execute(sql, (url,))
|
||||
sql = 'DELETE FROM feeds WHERE id = ?'
|
||||
cur.execute(sql, (id,))
|
||||
conn.commit()
|
||||
return 'News source "{}" has been removed from subscriptions list'.format(url)
|
||||
|
||||
|
||||
def get_unread(conn):
|
||||
"""
|
||||
Check read status of entry
|
||||
:param id: id of the entry
|
||||
:return: string
|
||||
"""
|
||||
entry = []
|
||||
cur = conn.cursor()
|
||||
sql = "SELECT id FROM entries WHERE read = 0"
|
||||
#id = cur.execute(sql).fetchone()[0]
|
||||
id = cur.execute(sql).fetchone()
|
||||
if id is None:
|
||||
return False
|
||||
id = id[0]
|
||||
print(id)
|
||||
sql = "SELECT title FROM entries WHERE id = :id"
|
||||
cur.execute(sql, (id,))
|
||||
title = cur.fetchone()[0]
|
||||
entry.append(title)
|
||||
sql = "SELECT summary FROM entries WHERE id = :id"
|
||||
cur.execute(sql, (id,))
|
||||
summary = cur.fetchone()[0]
|
||||
entry.append(summary)
|
||||
sql = "SELECT link FROM entries WHERE id = :id"
|
||||
cur.execute(sql, (id,))
|
||||
link = cur.fetchone()[0]
|
||||
entry.append(link)
|
||||
# columns = ['title', 'summary', 'link']
|
||||
# for column in columns:
|
||||
# sql = "SELECT :column FROM entries WHERE id = :id"
|
||||
# cur.execute(sql, {"column": column, "id": id})
|
||||
# str = cur.fetchone()[0]
|
||||
# entry.append(str)
|
||||
entry = '{}\n\n{}\n\nLink: {}'.format(entry[0], entry[1], entry[2])
|
||||
mark_as_read(conn, id)
|
||||
conn.commit()
|
||||
return entry
|
||||
|
||||
|
||||
def mark_as_read(conn, id):
|
||||
"""
|
||||
Set read status of entry
|
||||
:param id: id of the entry
|
||||
:return:
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
sql = "UPDATE entries SET summary = '', read = 1 WHERE id = ?"
|
||||
cur.execute(sql, (id,))
|
||||
conn.commit()
|
||||
return
|
||||
|
||||
|
||||
# TODO test
|
||||
def toggle_status(conn, id):
|
||||
"""
|
||||
Set status of feed
|
||||
:param id: id of the feed
|
||||
:return: string
|
||||
"""
|
||||
#conn = create_connection(db_file)
|
||||
cur = conn.cursor()
|
||||
sql = "SELECT status FROM feeds WHERE id = ?"
|
||||
# NOTE [0][1][2]
|
||||
status = cur.execute(sql, (id,))
|
||||
# FIXME always set to 1
|
||||
# NOTE Maybe because is not integer
|
||||
# TODO Reset feed table before further testing
|
||||
status = 0 if status == 1 else 1
|
||||
sql = "UPDATE feeds SET status = :status WHERE id = :id"
|
||||
cur.execute(sql, {"status": status, "id": id})
|
||||
conn.commit()
|
||||
return 'News source status has changed to {}'.format(status)
|
||||
|
||||
|
||||
def set_date(conn, url):
|
||||
"""
|
||||
Set last update date of feed
|
||||
:param url: url of the feed
|
||||
:return:
|
||||
"""
|
||||
today = date.today()
|
||||
cur = conn.cursor()
|
||||
sql = "UPDATE feeds SET updated = :today WHERE address = :url"
|
||||
cur.execute(sql, {"today": today, "url": url})
|
||||
conn.commit()
|
||||
|
||||
|
||||
def get_subscriptions(conn):
|
||||
"""
|
||||
Query feeds
|
||||
:param conn:
|
||||
:return: rows (tuple)
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
sql = "SELECT address FROM feeds"
|
||||
result = cur.execute(sql)
|
||||
return result
|
||||
|
||||
|
||||
def list_subscriptions(conn):
|
||||
"""
|
||||
Query feeds
|
||||
:param conn:
|
||||
:return: rows (string)
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
#sql = "SELECT id, address FROM feeds"
|
||||
sql = "SELECT name, address, updated, id FROM feeds"
|
||||
results = cur.execute(sql)
|
||||
feeds_list = 'List of subscriptions: \n'
|
||||
for result in results:
|
||||
#feeds_list = feeds_list + '\n {}. {}'.format(str(result[0]), str(result[1]))
|
||||
feeds_list = feeds_list + '\n {} \n {} \n Last updated: {} \n ID: {} \n'.format(str(result[0]), str(result[1]), str(result[2]), str(result[3]))
|
||||
return feeds_list
|
||||
|
||||
|
||||
def check_entry(conn, title, link):
|
||||
"""
|
||||
Check whether an entry exists
|
||||
Query entries by title and link
|
||||
:param conn:
|
||||
:param link:
|
||||
:param title:
|
||||
:return: row
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
sql = "SELECT id FROM entries WHERE title = :title and link = :link"
|
||||
cur.execute(sql, {"title": title, "link": link})
|
||||
return cur.fetchone()
|
||||
|
||||
|
||||
def add_entry(conn, entry):
|
||||
"""
|
||||
Add a new entry into the entries table
|
||||
:param conn:
|
||||
:param entry:
|
||||
:return:
|
||||
"""
|
||||
sql = """ INSERT INTO entries(title,summary,link,source,read)
|
||||
VALUES(?,?,?,?,?) """
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql, entry)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def remove_entry(conn, source, length):
|
||||
"""
|
||||
Maintain list of entries
|
||||
Check the number returned by feed and delete
|
||||
existing entries up to the same returned amount
|
||||
:param conn:
|
||||
:param source:
|
||||
:param length:
|
||||
:return:
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
# FIXED
|
||||
# Dino empty titles are not counted https://dino.im/index.xml
|
||||
# SOLVED
|
||||
# Add text if is empty
|
||||
# title = '*** No title ***' if not entry.title else entry.title
|
||||
sql = "SELECT count(id) FROM entries WHERE source = ?"
|
||||
count = cur.execute(sql, (source,))
|
||||
count = cur.fetchone()[0]
|
||||
#limit = count - length
|
||||
limit = count - length
|
||||
if limit:
|
||||
#if limit > 0:
|
||||
limit = limit;
|
||||
sql = "DELETE FROM entries WHERE id IN (SELECT id FROM entries WHERE source = :source ORDER BY id ASC LIMIT :limit)"
|
||||
cur.execute(sql, {"source": source, "limit": limit})
|
||||
conn.commit()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Setup the command line arguments.
|
||||
parser = ArgumentParser(description=EchoBot.__doc__)
|
||||
|
||||
# Output verbosity options.
|
||||
parser.add_argument("-q", "--quiet", help="set logging to ERROR",
|
||||
action="store_const", dest="loglevel",
|
||||
const=logging.ERROR, default=logging.INFO)
|
||||
parser.add_argument("-d", "--debug", help="set logging to DEBUG",
|
||||
action="store_const", dest="loglevel",
|
||||
const=logging.DEBUG, default=logging.INFO)
|
||||
|
||||
# JID and password options.
|
||||
parser.add_argument("-j", "--jid", dest="jid",
|
||||
help="JID to use")
|
||||
parser.add_argument("-p", "--password", dest="password",
|
||||
help="password to use")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setup logging.
|
||||
logging.basicConfig(level=args.loglevel,
|
||||
format='%(levelname)-8s %(message)s')
|
||||
|
||||
if args.jid is None:
|
||||
args.jid = input("Username: ")
|
||||
if args.password is None:
|
||||
args.password = getpass("Password: ")
|
||||
|
||||
# Setup the EchoBot and register plugins. Note that while plugins may
|
||||
# have interdependencies, the order in which you register them does
|
||||
# not matter.
|
||||
xmpp = EchoBot(args.jid, args.password)
|
||||
xmpp.register_plugin('xep_0030') # Service Discovery
|
||||
xmpp.register_plugin('xep_0004') # Data Forms
|
||||
xmpp.register_plugin('xep_0060') # PubSub
|
||||
xmpp.register_plugin('xep_0199') # XMPP Ping
|
||||
|
||||
# Connect to the XMPP server and start processing XMPP stanzas.
|
||||
xmpp.connect()
|
||||
xmpp.process()
|
Loading…
Reference in a new issue