Only operate on sources of http code 200
What potentially could happen, is removing all news items of a 403 feed, which would cause in resending all of its news items, including those that were already seen by user.
This commit is contained in:
parent
52fad12660
commit
10735697ce
1 changed files with 52 additions and 30 deletions
82
slixfeed.py
82
slixfeed.py
|
@ -1,5 +1,11 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# vars and their meanings:
|
||||||
|
# cur = Cursor (SQL)
|
||||||
|
# jid = Jabber ID (XMPP)
|
||||||
|
# res = response (HTTP)
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from asyncio.exceptions import IncompleteReadError
|
from asyncio.exceptions import IncompleteReadError
|
||||||
|
@ -167,7 +173,7 @@ def print_help():
|
||||||
msg = ("Slixfeed - News syndication bot for Jabber/XMPP \n"
|
msg = ("Slixfeed - News syndication bot for Jabber/XMPP \n"
|
||||||
"\n"
|
"\n"
|
||||||
"DESCRIPTION: \n"
|
"DESCRIPTION: \n"
|
||||||
" Slixfeed is an aggregator bot for online news feeds. \n"
|
" Slixfeed is a news aggregator bot for online news feeds. \n"
|
||||||
"\n"
|
"\n"
|
||||||
"BASIC USAGE: \n"
|
"BASIC USAGE: \n"
|
||||||
" enable \n"
|
" enable \n"
|
||||||
|
@ -179,7 +185,7 @@ def print_help():
|
||||||
"\n"
|
"\n"
|
||||||
"EDIT OPTIONS: \n"
|
"EDIT OPTIONS: \n"
|
||||||
" feed add URL \n"
|
" feed add URL \n"
|
||||||
" Add URL to the subscriptions list. \n"
|
" Add URL to subscription list. \n"
|
||||||
" feed remove ID \n"
|
" feed remove ID \n"
|
||||||
" Remove feed from subscription list. \n"
|
" Remove feed from subscription list. \n"
|
||||||
" feed status ID \n"
|
" feed status ID \n"
|
||||||
|
@ -191,6 +197,16 @@ def print_help():
|
||||||
" feed recent N \n"
|
" feed recent N \n"
|
||||||
" List recent N news items (up to 50 items). \n"
|
" List recent N news items (up to 50 items). \n"
|
||||||
"\n"
|
"\n"
|
||||||
|
"BACKUP OPTIONS: \n"
|
||||||
|
" export opml \n"
|
||||||
|
" Send an OPML file with your feeds. \n"
|
||||||
|
" backup news html\n"
|
||||||
|
" Send an HTML formatted file of your news items. \n"
|
||||||
|
" backup news md \n"
|
||||||
|
" Send a Markdown file of your news items. \n"
|
||||||
|
" backup news text \n"
|
||||||
|
" Send a Plain Text file of your news items. \n"
|
||||||
|
"\n"
|
||||||
"DOCUMENTATION: \n"
|
"DOCUMENTATION: \n"
|
||||||
" Slixfeed \n"
|
" Slixfeed \n"
|
||||||
" https://gitgud.io/sjehuda/slixfeed \n"
|
" https://gitgud.io/sjehuda/slixfeed \n"
|
||||||
|
@ -328,36 +344,42 @@ async def download_updates(db_file):
|
||||||
return
|
return
|
||||||
# TODO Place these couple of lines back down
|
# TODO Place these couple of lines back down
|
||||||
# NOTE Need to correct the SQL statement to do so
|
# NOTE Need to correct the SQL statement to do so
|
||||||
entries = feed.entries
|
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||||
length = len(entries)
|
|
||||||
async with DBLOCK:
|
if res[1] == 200:
|
||||||
with conn:
|
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||||
await remove_entry(conn, source, length)
|
# TODO Place these couple of lines back down
|
||||||
|
# NOTE Need to correct the SQL statement to do so
|
||||||
|
entries = feed.entries
|
||||||
|
length = len(entries)
|
||||||
|
async with DBLOCK:
|
||||||
|
with conn:
|
||||||
|
await remove_entry(conn, source, length)
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
if entry.has_key("title"):
|
if entry.has_key("title"):
|
||||||
title = entry.title
|
title = entry.title
|
||||||
else:
|
|
||||||
title = feed["feed"]["title"]
|
|
||||||
link = source if not entry.link else entry.link
|
|
||||||
with conn:
|
|
||||||
exist = await check_entry(conn, title, link)
|
|
||||||
|
|
||||||
if not exist:
|
|
||||||
if entry.has_key("summary"):
|
|
||||||
summary = entry.summary
|
|
||||||
# Remove HTML tags
|
|
||||||
summary = BeautifulSoup(summary, "lxml").text
|
|
||||||
# TODO Limit text length
|
|
||||||
summary = summary.replace("\n\n", "\n")[:300] + " ⃨"
|
|
||||||
else:
|
else:
|
||||||
summary = '*** No summary ***'
|
title = feed["feed"]["title"]
|
||||||
#print('~~~~~~summary not in entry')
|
link = source if not entry.link else entry.link
|
||||||
entry = (title, summary, link, source, 0);
|
with conn:
|
||||||
async with DBLOCK:
|
exist = await check_entry(conn, title, link)
|
||||||
with conn:
|
|
||||||
await add_entry(conn, entry)
|
if not exist:
|
||||||
await set_date(conn, source)
|
if entry.has_key("summary"):
|
||||||
|
summary = entry.summary
|
||||||
|
# Remove HTML tags
|
||||||
|
summary = BeautifulSoup(summary, "lxml").text
|
||||||
|
# TODO Limit text length
|
||||||
|
summary = summary.replace("\n\n", "\n")[:300] + " ⃨"
|
||||||
|
else:
|
||||||
|
summary = '*** No summary ***'
|
||||||
|
#print('~~~~~~summary not in entry')
|
||||||
|
entry = (title, summary, link, source, 0);
|
||||||
|
async with DBLOCK:
|
||||||
|
with conn:
|
||||||
|
await add_entry(conn, entry)
|
||||||
|
await set_date(conn, source)
|
||||||
|
|
||||||
|
|
||||||
async def download_feed(url):
|
async def download_feed(url):
|
||||||
|
|
Loading…
Reference in a new issue