forked from sch/Slixfeed
Fix new entry selection (Thank you lorenzo and roughnecks);
Fix handling with RDF documents which has caused to a halt during feed tscan; Ignore items without a link (Thank you Kris); Set entry identifier as MD5 hash of link (Thank you TheCoffeMaker).
This commit is contained in:
parent
93ea8a9fab
commit
5babb02cf8
7 changed files with 484 additions and 297 deletions
|
@ -2226,9 +2226,33 @@ async def add_entries_and_update_feed_state(db_file, feed_id, new_entries):
|
||||||
"""
|
"""
|
||||||
INSERT
|
INSERT
|
||||||
INTO entries_properties(
|
INTO entries_properties(
|
||||||
feed_id, identifier, link, title, title_type, summary_text, summary_lang, summary_type, summary_base, category, comments, published, updated)
|
feed_id,
|
||||||
|
identifier,
|
||||||
|
link,
|
||||||
|
title,
|
||||||
|
title_type,
|
||||||
|
summary_text,
|
||||||
|
summary_lang,
|
||||||
|
summary_type,
|
||||||
|
summary_base,
|
||||||
|
category,
|
||||||
|
comments,
|
||||||
|
published,
|
||||||
|
updated)
|
||||||
VALUES(
|
VALUES(
|
||||||
:feed_id, :identifier, :link, :title, :title_type, :summary_text, :summary_lang, :summary_type, :summary_base, :category, :comments, :published, :updated)
|
:feed_id,
|
||||||
|
:identifier,
|
||||||
|
:link,
|
||||||
|
:title,
|
||||||
|
:title_type,
|
||||||
|
:summary_text,
|
||||||
|
:summary_lang,
|
||||||
|
:summary_type,
|
||||||
|
:summary_base,
|
||||||
|
:category,
|
||||||
|
:comments,
|
||||||
|
:published,
|
||||||
|
:updated)
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
entry_properties = new_entry['entry_properties']
|
entry_properties = new_entry['entry_properties']
|
||||||
|
@ -2847,6 +2871,35 @@ def get_entries_of_feed(db_file, feed_id):
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
def get_entries_id_of_feed(db_file, feed_id):
|
||||||
|
"""
|
||||||
|
Get entries of given feed.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db_file : str
|
||||||
|
Path to database file.
|
||||||
|
feed_id : str
|
||||||
|
Feed Id.
|
||||||
|
"""
|
||||||
|
function_name = sys._getframe().f_code.co_name
|
||||||
|
logger.debug('{} db_file: {} feed_id: {}'
|
||||||
|
.format(function_name, db_file, feed_id))
|
||||||
|
with create_connection(db_file) as conn:
|
||||||
|
cur = conn.cursor()
|
||||||
|
sql = (
|
||||||
|
"""
|
||||||
|
SELECT id
|
||||||
|
FROM entries_properties
|
||||||
|
WHERE feed_id = ?
|
||||||
|
ORDER BY published DESC
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
par = (feed_id,)
|
||||||
|
items = cur.execute(sql, par).fetchall()
|
||||||
|
return items
|
||||||
|
|
||||||
|
|
||||||
# TODO What is this function for? 2024-01-02
|
# TODO What is this function for? 2024-01-02
|
||||||
# def get_feeds(db_file):
|
# def get_feeds(db_file):
|
||||||
# """
|
# """
|
||||||
|
@ -3231,9 +3284,9 @@ def check_entry_exist(db_file, feed_id, identifier=None, title=None, link=None,
|
||||||
function_name = sys._getframe().f_code.co_name
|
function_name = sys._getframe().f_code.co_name
|
||||||
logger.debug('{}: db_file: {} feed_id: {}'
|
logger.debug('{}: db_file: {} feed_id: {}'
|
||||||
.format(function_name, db_file, feed_id))
|
.format(function_name, db_file, feed_id))
|
||||||
|
exist = False
|
||||||
with create_connection(db_file) as conn:
|
with create_connection(db_file) as conn:
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
exist = False
|
|
||||||
if identifier:
|
if identifier:
|
||||||
sql = (
|
sql = (
|
||||||
"""
|
"""
|
||||||
|
@ -3291,6 +3344,76 @@ def check_entry_exist(db_file, feed_id, identifier=None, title=None, link=None,
|
||||||
return exist
|
return exist
|
||||||
|
|
||||||
|
|
||||||
|
def get_entry_id_by_identifier(db_file, identifier):
|
||||||
|
"""
|
||||||
|
Get entry ID by its identifier.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db_file : str
|
||||||
|
Path to database file.
|
||||||
|
identifier : str
|
||||||
|
Entry identifier.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
result : tuple
|
||||||
|
Entry ID or None.
|
||||||
|
"""
|
||||||
|
function_name = sys._getframe().f_code.co_name
|
||||||
|
logger.debug('{}: db_file: {} identifier: {}'
|
||||||
|
.format(function_name, db_file, identifier))
|
||||||
|
with create_connection(db_file) as conn:
|
||||||
|
cur = conn.cursor()
|
||||||
|
sql = (
|
||||||
|
"""
|
||||||
|
SELECT id
|
||||||
|
FROM entries_properties
|
||||||
|
WHERE identifier = :identifier
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
par = {
|
||||||
|
"identifier": identifier
|
||||||
|
}
|
||||||
|
result = cur.execute(sql, par).fetchone()
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def get_entry_identifier(db_file, ix):
|
||||||
|
"""
|
||||||
|
Get identifier by its entry ID.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db_file : str
|
||||||
|
Path to database file.
|
||||||
|
id : str
|
||||||
|
Entry ID.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
result : tuple
|
||||||
|
Entry ID or None.
|
||||||
|
"""
|
||||||
|
function_name = sys._getframe().f_code.co_name
|
||||||
|
logger.debug('{}: db_file: {} ix: {}'
|
||||||
|
.format(function_name, db_file, ix))
|
||||||
|
with create_connection(db_file) as conn:
|
||||||
|
cur = conn.cursor()
|
||||||
|
sql = (
|
||||||
|
"""
|
||||||
|
SELECT identifier
|
||||||
|
FROM entries_properties
|
||||||
|
WHERE id = :ix
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
par = {
|
||||||
|
"ix": ix
|
||||||
|
}
|
||||||
|
result = cur.execute(sql, par).fetchone()
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
async def set_setting_value(db_file, key_value):
|
async def set_setting_value(db_file, key_value):
|
||||||
"""
|
"""
|
||||||
Set setting value.
|
Set setting value.
|
||||||
|
|
|
@ -32,8 +32,8 @@ from slixfeed.config import Config
|
||||||
import slixfeed.fetch as fetch
|
import slixfeed.fetch as fetch
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
from slixfeed.utilities import DateAndTime, Url
|
from slixfeed.utilities import DateAndTime, String, Url
|
||||||
from slixfeed.utilities import Html, MD
|
from slixfeed.utilities import Html, MD, String, Utilities
|
||||||
from slixmpp.xmlstream import ET
|
from slixmpp.xmlstream import ET
|
||||||
import sys
|
import sys
|
||||||
from urllib.parse import urlsplit
|
from urllib.parse import urlsplit
|
||||||
|
@ -274,8 +274,7 @@ class Feed:
|
||||||
while True:
|
while True:
|
||||||
feed_id = sqlite.get_feed_id(db_file, url)
|
feed_id = sqlite.get_feed_id(db_file, url)
|
||||||
if not feed_id:
|
if not feed_id:
|
||||||
exist_identifier = sqlite.check_identifier_exist(db_file, identifier)
|
if not sqlite.check_identifier_exist(db_file, identifier):
|
||||||
if not exist_identifier:
|
|
||||||
result = await fetch.http(url)
|
result = await fetch.http(url)
|
||||||
message = result['message']
|
message = result['message']
|
||||||
status_code = result['status_code']
|
status_code = result['status_code']
|
||||||
|
@ -336,8 +335,17 @@ class Feed:
|
||||||
db_file, feed_id, feed_properties)
|
db_file, feed_id, feed_properties)
|
||||||
feed_id = sqlite.get_feed_id(db_file, url)
|
feed_id = sqlite.get_feed_id(db_file, url)
|
||||||
feed_id = feed_id[0]
|
feed_id = feed_id[0]
|
||||||
new_entries = Feed.get_properties_of_entries(
|
new_entries = []
|
||||||
jid_bare, db_file, url, feed_id, feed)
|
for entry in feed.entries:
|
||||||
|
if entry.has_key("link"):
|
||||||
|
entry_link = Url.join_url(url, entry.link)
|
||||||
|
entry_link = Url.trim_url(entry_link)
|
||||||
|
entry_identifier = String.md5_hash(entry_link)
|
||||||
|
if not sqlite.get_entry_id_by_identifier(
|
||||||
|
db_file, entry_identifier):
|
||||||
|
new_entry = Feed.get_properties_of_entry(
|
||||||
|
url, entry_identifier, entry)
|
||||||
|
new_entries.extend([new_entry])
|
||||||
if new_entries:
|
if new_entries:
|
||||||
await sqlite.add_entries_and_update_feed_state(
|
await sqlite.add_entries_and_update_feed_state(
|
||||||
db_file, feed_id, new_entries)
|
db_file, feed_id, new_entries)
|
||||||
|
@ -387,8 +395,7 @@ class Feed:
|
||||||
'identifier' : None}
|
'identifier' : None}
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
ix = exist_identifier[1]
|
ix = sqlite.get_entry_id_by_identifier(db_file, identifier)
|
||||||
identifier = exist_identifier[2]
|
|
||||||
message = ('Identifier "{}" is already allocated.'
|
message = ('Identifier "{}" is already allocated.'
|
||||||
.format(identifier))
|
.format(identifier))
|
||||||
result_final = {'link' : url,
|
result_final = {'link' : url,
|
||||||
|
@ -517,14 +524,14 @@ class Feed:
|
||||||
# NOTE This function is not being utilized
|
# NOTE This function is not being utilized
|
||||||
async def download_feed(self, db_file, feed_url):
|
async def download_feed(self, db_file, feed_url):
|
||||||
"""
|
"""
|
||||||
Get feed content.
|
Process feed content.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
db_file : str
|
db_file : str
|
||||||
Path to database file.
|
Path to database file.
|
||||||
url : str, optional
|
feed_url : str
|
||||||
URL.
|
URL of feed.
|
||||||
"""
|
"""
|
||||||
function_name = sys._getframe().f_code.co_name
|
function_name = sys._getframe().f_code.co_name
|
||||||
logger.debug('{}: db_file: {} url: {}'
|
logger.debug('{}: db_file: {} url: {}'
|
||||||
|
@ -554,7 +561,15 @@ class Feed:
|
||||||
feed_encoding = feed.encoding if feed.has_key('encoding') else ''
|
feed_encoding = feed.encoding if feed.has_key('encoding') else ''
|
||||||
feed_language = feed.feed.language if feed.feed.has_key('language') else ''
|
feed_language = feed.feed.language if feed.feed.has_key('language') else ''
|
||||||
feed_icon = feed.feed.icon if feed.feed.has_key('icon') else ''
|
feed_icon = feed.feed.icon if feed.feed.has_key('icon') else ''
|
||||||
feed_image = feed.feed.image.href if feed.feed.has_key('image') else ''
|
# (Pdb) feed.feed.image
|
||||||
|
# {}
|
||||||
|
# (Pdb) feed.version
|
||||||
|
# 'rss10'
|
||||||
|
# (Pdb) feed.feed.image
|
||||||
|
# {'links': [{'rel': 'alternate', 'type': 'text/html'}]}
|
||||||
|
# (Pdb) feed.version
|
||||||
|
# ''
|
||||||
|
feed_image = feed.feed.image.href if feed.feed.has_key('image') and feed.feed.image.has_key('href') else ''
|
||||||
feed_logo = feed.feed.logo if feed.feed.has_key('logo') else ''
|
feed_logo = feed.feed.logo if feed.feed.has_key('logo') else ''
|
||||||
feed_ttl = feed.feed.ttl if feed.feed.has_key('ttl') else ''
|
feed_ttl = feed.feed.ttl if feed.feed.has_key('ttl') else ''
|
||||||
|
|
||||||
|
@ -576,25 +591,22 @@ class Feed:
|
||||||
|
|
||||||
# TODO get all active feeds of active accounts and scan the feed with the earliest scanned time
|
# TODO get all active feeds of active accounts and scan the feed with the earliest scanned time
|
||||||
# TODO Rename function name (idea: scan_and_populate)
|
# TODO Rename function name (idea: scan_and_populate)
|
||||||
def get_properties_of_entries(jid_bare, db_file, feed_url, feed_id, feed):
|
def get_properties_of_entry(feed_url, entry_identifier, entry):
|
||||||
"""
|
"""
|
||||||
Get new entries.
|
Process entry content.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
db_file : str
|
feed_url : str
|
||||||
Path to database file.
|
URL of feed.
|
||||||
url : str, optional
|
entry :
|
||||||
URL.
|
Object of entry.
|
||||||
"""
|
"""
|
||||||
# print('MID', feed_url, jid_bare, 'get_properties_of_entries')
|
|
||||||
function_name = sys._getframe().f_code.co_name
|
function_name = sys._getframe().f_code.co_name
|
||||||
logger.debug('{}: feed_id: {} url: {}'
|
logger.debug('{} feed_url: {}'
|
||||||
.format(function_name, feed_id, feed_url))
|
.format(function_name, feed_url))
|
||||||
|
|
||||||
new_entries = []
|
read_status = 0
|
||||||
for entry in feed.entries:
|
|
||||||
logger.debug('{}: entry: {}'.format(function_name, entry.link))
|
|
||||||
if entry.has_key("published"):
|
if entry.has_key("published"):
|
||||||
entry_published = entry.published
|
entry_published = entry.published
|
||||||
entry_published = DateAndTime.rfc2822_to_iso8601(entry_published)
|
entry_published = DateAndTime.rfc2822_to_iso8601(entry_published)
|
||||||
|
@ -614,14 +626,7 @@ class Feed:
|
||||||
# title = feed["feed"]["title"]
|
# title = feed["feed"]["title"]
|
||||||
# title = "{}: *{}*".format(feed["feed"]["title"], entry.title)
|
# title = "{}: *{}*".format(feed["feed"]["title"], entry.title)
|
||||||
entry_title = entry.title if entry.has_key("title") else entry_published
|
entry_title = entry.title if entry.has_key("title") else entry_published
|
||||||
entry_id = entry.id if entry.has_key("id") else entry_link
|
# entry_id = entry.id if entry.has_key("id") else entry_link
|
||||||
exist = sqlite.check_entry_exist(db_file, feed_id,
|
|
||||||
identifier=entry_id,
|
|
||||||
title=entry_title,
|
|
||||||
link=entry_link,
|
|
||||||
published=entry_published)
|
|
||||||
if not exist:
|
|
||||||
read_status = 0
|
|
||||||
# # Filter
|
# # Filter
|
||||||
# pathname = urlsplit(link).path
|
# pathname = urlsplit(link).path
|
||||||
# string = (
|
# string = (
|
||||||
|
@ -788,7 +793,7 @@ class Feed:
|
||||||
###########################################################
|
###########################################################
|
||||||
|
|
||||||
entry_properties = {
|
entry_properties = {
|
||||||
"identifier": entry_id,
|
"identifier": entry_identifier,
|
||||||
"link": entry_link,
|
"link": entry_link,
|
||||||
"href": entry_href,
|
"href": entry_href,
|
||||||
"title": entry_title,
|
"title": entry_title,
|
||||||
|
@ -802,22 +807,20 @@ class Feed:
|
||||||
"rating": entry_rating,
|
"rating": entry_rating,
|
||||||
"published": entry_published,
|
"published": entry_published,
|
||||||
"updated": entry_updated,
|
"updated": entry_updated,
|
||||||
"read_status": read_status
|
"read_status": read_status}
|
||||||
}
|
|
||||||
|
|
||||||
new_entries.extend([{
|
new_entry = {
|
||||||
"entry_properties" : entry_properties,
|
"entry_properties" : entry_properties,
|
||||||
"entry_authors" : entry_authors,
|
"entry_authors" : entry_authors,
|
||||||
"entry_contributors" : entry_contributors,
|
"entry_contributors" : entry_contributors,
|
||||||
"entry_contents" : entry_contents,
|
"entry_contents" : entry_contents,
|
||||||
"entry_links" : entry_links,
|
"entry_links" : entry_links,
|
||||||
"entry_tags" : entry_tags
|
"entry_tags" : entry_tags}
|
||||||
}])
|
|
||||||
# await sqlite.add_entry(
|
# await sqlite.add_entry(
|
||||||
# db_file, title, link, entry_id,
|
# db_file, title, link, entry_id,
|
||||||
# url, date, read_status)
|
# url, date, read_status)
|
||||||
# await sqlite.set_date(db_file, url)
|
# await sqlite.set_date(db_file, url)
|
||||||
return new_entries
|
return new_entry
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -1277,7 +1280,6 @@ class FeedTask:
|
||||||
urls = sqlite.get_active_feeds_url(db_file)
|
urls = sqlite.get_active_feeds_url(db_file)
|
||||||
for url in urls:
|
for url in urls:
|
||||||
url = url[0]
|
url = url[0]
|
||||||
print('start scan\nurl {}\ndatabase {}'.format(url, db_file))
|
|
||||||
# print('STA',url)
|
# print('STA',url)
|
||||||
|
|
||||||
# # Skip Reddit
|
# # Skip Reddit
|
||||||
|
@ -1291,6 +1293,19 @@ class FeedTask:
|
||||||
feed_id = sqlite.get_feed_id(db_file, url)
|
feed_id = sqlite.get_feed_id(db_file, url)
|
||||||
feed_id = feed_id[0]
|
feed_id = feed_id[0]
|
||||||
if not result['error']:
|
if not result['error']:
|
||||||
|
identifier = sqlite.get_feed_identifier(db_file, feed_id)
|
||||||
|
identifier = identifier[0]
|
||||||
|
if not identifier:
|
||||||
|
counter = 0
|
||||||
|
while True:
|
||||||
|
identifier = String.generate_identifier(url, counter)
|
||||||
|
if sqlite.check_identifier_exist(db_file, identifier):
|
||||||
|
counter += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
await sqlite.update_feed_identifier(db_file, feed_id, identifier)
|
||||||
|
# identifier = sqlite.get_feed_identifier(db_file, feed_id)
|
||||||
|
# identifier = identifier[0]
|
||||||
await sqlite.update_feed_status(db_file, feed_id, status_code)
|
await sqlite.update_feed_status(db_file, feed_id, status_code)
|
||||||
document = result['content']
|
document = result['content']
|
||||||
feed = parse(document)
|
feed = parse(document)
|
||||||
|
@ -1300,17 +1315,54 @@ class FeedTask:
|
||||||
db_file, feed_id, feed)
|
db_file, feed_id, feed)
|
||||||
await sqlite.update_feed_properties(
|
await sqlite.update_feed_properties(
|
||||||
db_file, feed_id, feed_properties)
|
db_file, feed_id, feed_properties)
|
||||||
new_entries = Feed.get_properties_of_entries(
|
new_entries = []
|
||||||
jid_bare, db_file, url, feed_id, feed)
|
for entry in feed.entries:
|
||||||
|
if entry.has_key("link"):
|
||||||
|
# link = complete_url(source, entry.link)
|
||||||
|
entry_link = Url.join_url(url, entry.link)
|
||||||
|
entry_link = Url.trim_url(entry_link)
|
||||||
|
entry_identifier = String.md5_hash(entry_link)
|
||||||
|
# if 'f-droid.org' in url:
|
||||||
|
# breakpoint()
|
||||||
|
# print(entry.link)
|
||||||
|
# print(entry_identifier)
|
||||||
|
# Check if an entry identifier exists
|
||||||
|
if not sqlite.get_entry_id_by_identifier(
|
||||||
|
db_file, entry_identifier):
|
||||||
|
new_entry = Feed.get_properties_of_entry(
|
||||||
|
url, entry_identifier, entry)
|
||||||
|
# new_entries.append(new_entry)
|
||||||
|
new_entries.extend([new_entry])
|
||||||
|
print(url)
|
||||||
if new_entries:
|
if new_entries:
|
||||||
await sqlite.add_entries_and_update_feed_state(db_file, feed_id, new_entries)
|
await sqlite.add_entries_and_update_feed_state(db_file, feed_id, new_entries)
|
||||||
limit = Config.get_setting_value(self.settings, jid_bare, 'archive')
|
limit = Config.get_setting_value(self.settings, jid_bare, 'archive')
|
||||||
ixs = sqlite.get_invalid_entries(db_file, url, feed)
|
ixs = sqlite.get_entries_id_of_feed(db_file, feed_id)
|
||||||
await sqlite.process_invalid_entries(db_file, ixs)
|
ixs_invalid = {}
|
||||||
|
for ix in ixs:
|
||||||
|
ix = ix[0]
|
||||||
|
read_status = sqlite.is_entry_read(db_file, ix)
|
||||||
|
read_status = read_status[0]
|
||||||
|
entry_identifier_local = sqlite.get_entry_identifier(db_file, ix)
|
||||||
|
entry_identifier_local = entry_identifier_local[0]
|
||||||
|
valid = False
|
||||||
|
for entry in feed.entries:
|
||||||
|
if entry.has_key("link"):
|
||||||
|
entry_link = Url.join_url(url, entry.link)
|
||||||
|
entry_link = Url.trim_url(entry_link)
|
||||||
|
entry_identifier_external = Utilities.hash_url_to_md5(
|
||||||
|
entry_link)
|
||||||
|
if entry_identifier_local == entry_identifier_external:
|
||||||
|
valid = True
|
||||||
|
continue
|
||||||
|
if not valid: ixs_invalid[ix] = read_status
|
||||||
|
if len(ixs_invalid):
|
||||||
|
print('erasing {}/{}'.format(len(ixs_invalid), len(feed.entries)))
|
||||||
|
await sqlite.process_invalid_entries(db_file, ixs_invalid)
|
||||||
|
# TODO return number of archived entries and add if statement to run archive maintainence function
|
||||||
await sqlite.maintain_archive(db_file, limit)
|
await sqlite.maintain_archive(db_file, limit)
|
||||||
# await sqlite.process_invalid_entries(db_file, ixs)
|
# await sqlite.process_invalid_entries(db_file, ixs)
|
||||||
print('end scan\nurl {}\ndatabase {}'.format(url, db_file))
|
# await asyncio.sleep(50)
|
||||||
await asyncio.sleep(50)
|
|
||||||
val = Config.get_setting_value(self.settings, jid_bare, 'check')
|
val = Config.get_setting_value(self.settings, jid_bare, 'check')
|
||||||
await asyncio.sleep(60 * float(val))
|
await asyncio.sleep(60 * float(val))
|
||||||
# Schedule to call this function again in 90 minutes
|
# Schedule to call this function again in 90 minutes
|
||||||
|
|
|
@ -40,14 +40,13 @@ TODO
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from email.utils import parseaddr
|
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
from email.utils import parsedate, parsedate_to_datetime
|
from email.utils import parseaddr, parsedate, parsedate_to_datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
|
from lxml import etree, html
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import slixfeed.config as config
|
import slixfeed.config as config
|
||||||
from lxml import etree, html
|
|
||||||
import slixfeed.dt as dt
|
import slixfeed.dt as dt
|
||||||
import slixfeed.fetch as fetch
|
import slixfeed.fetch as fetch
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
|
@ -681,9 +680,31 @@ class Url:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class String:
|
||||||
|
|
||||||
|
|
||||||
|
def generate_identifier(url, counter):
|
||||||
|
hostname = Url.get_hostname(url)
|
||||||
|
hostname = hostname.replace('.','-')
|
||||||
|
identifier = hostname + ':' + str(counter)
|
||||||
|
return identifier
|
||||||
|
|
||||||
|
|
||||||
|
# string_to_md5_hash
|
||||||
|
# NOTE Warning: Entry might not have a link
|
||||||
|
# TODO Handle situation error
|
||||||
|
def md5_hash(url):
|
||||||
|
url_encoded = url.encode()
|
||||||
|
url_hashed = hashlib.md5(url_encoded)
|
||||||
|
url_digest = url_hashed.hexdigest()
|
||||||
|
return url_digest
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Utilities:
|
class Utilities:
|
||||||
|
|
||||||
|
|
||||||
|
# string_to_md5_hash
|
||||||
# NOTE Warning: Entry might not have a link
|
# NOTE Warning: Entry might not have a link
|
||||||
# TODO Handle situation error
|
# TODO Handle situation error
|
||||||
def hash_url_to_md5(url):
|
def hash_url_to_md5(url):
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
__version__ = '0.1.82'
|
__version__ = '0.1.83'
|
||||||
__version_info__ = (0, 1, 82)
|
__version_info__ = (0, 1, 83)
|
||||||
|
|
|
@ -48,7 +48,7 @@ import slixfeed.fetch as fetch
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
from slixfeed.syndication import Feed, FeedDiscovery, FeedTask, Opml
|
from slixfeed.syndication import Feed, FeedDiscovery, FeedTask, Opml
|
||||||
from slixfeed.utilities import DateAndTime, Html, Task, Url, Utilities
|
from slixfeed.utilities import DateAndTime, Html, String, Task, Url, Utilities
|
||||||
from slixfeed.version import __version__
|
from slixfeed.version import __version__
|
||||||
from slixfeed.xmpp.bookmark import XmppBookmark
|
from slixfeed.xmpp.bookmark import XmppBookmark
|
||||||
from slixfeed.xmpp.chat import XmppChat, XmppChatTask
|
from slixfeed.xmpp.chat import XmppChat, XmppChatTask
|
||||||
|
@ -1776,12 +1776,10 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['prev'] = None
|
session['prev'] = None
|
||||||
# elif not identifier:
|
# elif not identifier:
|
||||||
# counter = 0
|
# counter = 0
|
||||||
# hostname = Url.get_hostname(url)
|
|
||||||
# identifier = hostname + ':' + str(counter)
|
|
||||||
# while True:
|
# while True:
|
||||||
|
# identifier = String.generate_identifier(url, counter)
|
||||||
# if sqlite.check_identifier_exist(db_file, identifier):
|
# if sqlite.check_identifier_exist(db_file, identifier):
|
||||||
# counter += 1
|
# counter += 1
|
||||||
# identifier = hostname + ':' + str(counter)
|
|
||||||
# else:
|
# else:
|
||||||
# break
|
# break
|
||||||
# Several URLs to subscribe
|
# Several URLs to subscribe
|
||||||
|
@ -1793,12 +1791,10 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
exist_count = 0
|
exist_count = 0
|
||||||
for url in urls:
|
for url in urls:
|
||||||
counter = 0
|
counter = 0
|
||||||
hostname = Url.get_hostname(url)
|
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
while True:
|
while True:
|
||||||
|
identifier = String.generate_identifier(url, counter)
|
||||||
if sqlite.check_identifier_exist(db_file, identifier):
|
if sqlite.check_identifier_exist(db_file, identifier):
|
||||||
counter += 1
|
counter += 1
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
result = await Feed.add_feed(self, jid_bare, db_file, url,
|
result = await Feed.add_feed(self, jid_bare, db_file, url,
|
||||||
|
@ -1826,12 +1822,10 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
if isinstance(url, list):
|
if isinstance(url, list):
|
||||||
url = url[0]
|
url = url[0]
|
||||||
counter = 0
|
counter = 0
|
||||||
hostname = Url.get_hostname(url)
|
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
while True:
|
while True:
|
||||||
|
identifier = String.generate_identifier(url, counter)
|
||||||
if sqlite.check_identifier_exist(db_file, identifier):
|
if sqlite.check_identifier_exist(db_file, identifier):
|
||||||
counter += 1
|
counter += 1
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
result = await Feed.add_feed(self, jid_bare, db_file, url,
|
result = await Feed.add_feed(self, jid_bare, db_file, url,
|
||||||
|
|
|
@ -9,7 +9,7 @@ import slixfeed.fetch as fetch
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
from slixfeed.syndication import Feed, FeedDiscovery, Opml
|
from slixfeed.syndication import Feed, FeedDiscovery, Opml
|
||||||
from slixfeed.utilities import DateAndTime, Documentation, Url, Utilities
|
from slixfeed.utilities import DateAndTime, Documentation, String, Url, Utilities
|
||||||
from slixfeed.version import __version__
|
from slixfeed.version import __version__
|
||||||
from slixfeed.xmpp.bookmark import XmppBookmark
|
from slixfeed.xmpp.bookmark import XmppBookmark
|
||||||
from slixfeed.xmpp.muc import XmppMuc
|
from slixfeed.xmpp.muc import XmppMuc
|
||||||
|
@ -119,18 +119,15 @@ class XmppCommands:
|
||||||
if url.startswith('http'):
|
if url.startswith('http'):
|
||||||
if not title:
|
if not title:
|
||||||
title = Url.get_hostname(url)
|
title = Url.get_hostname(url)
|
||||||
counter = 0
|
|
||||||
hostname = Url.get_hostname(url)
|
|
||||||
hostname = hostname.replace('.','-')
|
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
while True:
|
|
||||||
if sqlite.check_identifier_exist(db_file, identifier):
|
|
||||||
counter += 1
|
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
exist = sqlite.get_feed_id_and_name(db_file, url)
|
exist = sqlite.get_feed_id_and_name(db_file, url)
|
||||||
if not exist:
|
if not exist:
|
||||||
|
counter = 0
|
||||||
|
while True:
|
||||||
|
identifier = String.generate_identifier(url, counter)
|
||||||
|
if sqlite.check_identifier_exist(db_file, identifier):
|
||||||
|
counter += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
await sqlite.insert_feed(db_file, url, title,
|
await sqlite.insert_feed(db_file, url, title,
|
||||||
identifier)
|
identifier)
|
||||||
feed_id = sqlite.get_feed_id(db_file, url)
|
feed_id = sqlite.get_feed_id(db_file, url)
|
||||||
|
@ -157,8 +154,17 @@ class XmppCommands:
|
||||||
feed_properties)
|
feed_properties)
|
||||||
feed_id = sqlite.get_feed_id(db_file, url)
|
feed_id = sqlite.get_feed_id(db_file, url)
|
||||||
feed_id = feed_id[0]
|
feed_id = feed_id[0]
|
||||||
new_entries = Feed.get_properties_of_entries(
|
new_entries = []
|
||||||
jid_bare, db_file, url, feed_id, feed)
|
for entry in feed.entries:
|
||||||
|
if entry.has_key("link"):
|
||||||
|
entry_link = Url.join_url(url, entry.link)
|
||||||
|
entry_link = Url.trim_url(entry_link)
|
||||||
|
entry_identifier = String.md5_hash(entry_link)
|
||||||
|
if not sqlite.get_entry_id_by_identifier(
|
||||||
|
db_file, entry_identifier):
|
||||||
|
new_entry = Feed.get_properties_of_entry(
|
||||||
|
url, entry_identifier, entry)
|
||||||
|
new_entries.extend([new_entry])
|
||||||
if new_entries:
|
if new_entries:
|
||||||
await sqlite.add_entries_and_update_feed_state(
|
await sqlite.add_entries_and_update_feed_state(
|
||||||
db_file, feed_id, new_entries)
|
db_file, feed_id, new_entries)
|
||||||
|
@ -390,14 +396,11 @@ class XmppCommands:
|
||||||
identifier = info[2]
|
identifier = info[2]
|
||||||
else:
|
else:
|
||||||
counter = 0
|
counter = 0
|
||||||
hostname = Url.get_hostname(url)
|
|
||||||
hostname = hostname.replace('.','-')
|
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
while True:
|
while True:
|
||||||
|
identifier = String.generate_identifier(url, counter)
|
||||||
if sqlite.check_identifier_exist(
|
if sqlite.check_identifier_exist(
|
||||||
db_file, identifier):
|
db_file, identifier):
|
||||||
counter += 1
|
counter += 1
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
# task.clean_tasks_xmpp_chat(self, jid_bare, ['status'])
|
# task.clean_tasks_xmpp_chat(self, jid_bare, ['status'])
|
||||||
|
@ -479,13 +482,10 @@ class XmppCommands:
|
||||||
url = Url.feed_to_http(url)
|
url = Url.feed_to_http(url)
|
||||||
url = (await Url.replace_hostname(url, 'feed')) or url
|
url = (await Url.replace_hostname(url, 'feed')) or url
|
||||||
counter = 0
|
counter = 0
|
||||||
hostname = Url.get_hostname(url)
|
|
||||||
hostname = hostname.replace('.','-')
|
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
while True:
|
while True:
|
||||||
|
identifier = String.generate_identifier(url, counter)
|
||||||
if sqlite.check_identifier_exist(db_file, identifier):
|
if sqlite.check_identifier_exist(db_file, identifier):
|
||||||
counter += 1
|
counter += 1
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
# try:
|
# try:
|
||||||
|
|
|
@ -16,7 +16,7 @@ from slixfeed.config import Config
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
from slixfeed.syndication import Feed
|
from slixfeed.syndication import Feed
|
||||||
from slixfeed.utilities import Url, Utilities
|
from slixfeed.utilities import String, Url, Utilities
|
||||||
from slixfeed.xmpp.iq import XmppIQ
|
from slixfeed.xmpp.iq import XmppIQ
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -336,13 +336,10 @@ class XmppPubsubAction:
|
||||||
node_id = node_id[0]
|
node_id = node_id[0]
|
||||||
if not node_id:
|
if not node_id:
|
||||||
counter = 0
|
counter = 0
|
||||||
hostname = Url.get_hostname(url)
|
|
||||||
hostname = hostname.replace('.','-')
|
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
while True:
|
while True:
|
||||||
|
identifier = String.generate_identifier(url, counter)
|
||||||
if sqlite.check_identifier_exist(db_file, identifier):
|
if sqlite.check_identifier_exist(db_file, identifier):
|
||||||
counter += 1
|
counter += 1
|
||||||
identifier = hostname + ':' + str(counter)
|
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
await sqlite.update_feed_identifier(db_file, feed_id, identifier)
|
await sqlite.update_feed_identifier(db_file, feed_id, identifier)
|
||||||
|
|
Loading…
Reference in a new issue