2024-04-03 07:48:47 +02:00
|
|
|
/* Copyright Martin Dosch
|
2018-07-13 22:53:22 +02:00
|
|
|
Licensed under the "MIT License" */
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
"hash/fnv"
|
|
|
|
"log"
|
|
|
|
"os"
|
2018-07-15 11:03:19 +02:00
|
|
|
"os/user"
|
2018-07-13 22:53:22 +02:00
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
2022-12-16 11:57:42 +01:00
|
|
|
"github.com/jaytaylor/html2text"
|
2023-04-06 17:51:14 +02:00
|
|
|
"github.com/mmcdole/gofeed"
|
2018-07-13 22:53:22 +02:00
|
|
|
)
|
|
|
|
|
2018-08-02 09:41:21 +02:00
|
|
|
// Get new articles for specified feed.
|
2020-07-06 21:06:00 +02:00
|
|
|
func getArticles(feedURL string, max int, noExcerpt bool, filter []string, filterMessage []string) (string, error) {
|
2018-07-13 22:53:22 +02:00
|
|
|
type feedCache struct {
|
|
|
|
LastChange string
|
|
|
|
}
|
|
|
|
|
2018-07-15 11:03:19 +02:00
|
|
|
var output, cachePath string
|
2018-07-13 22:53:22 +02:00
|
|
|
var last time.Time
|
|
|
|
var lastUpdate feedCache
|
|
|
|
var file *os.File
|
|
|
|
var updateTime time.Time
|
|
|
|
|
2018-07-15 11:03:19 +02:00
|
|
|
// Get systems user cache path.
|
|
|
|
osCacheDir := os.Getenv("$XDG_CACHE_HOME")
|
|
|
|
if osCacheDir != "" {
|
|
|
|
// Create configPath if not yet existing.
|
|
|
|
cachePath = osCacheDir + "/feed-to-muc/"
|
|
|
|
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
|
2023-09-30 20:42:07 +02:00
|
|
|
err = os.MkdirAll(cachePath, 0o700)
|
2018-07-15 11:03:19 +02:00
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't create cache path:", err)
|
2018-07-15 11:03:19 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} else { // Get the current user.
|
|
|
|
curUser, err := user.Current()
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't get current user:", err)
|
2018-07-15 11:03:19 +02:00
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
// Get home directory.
|
|
|
|
home := curUser.HomeDir
|
|
|
|
|
|
|
|
if home == "" {
|
|
|
|
log.Fatal("Error: No home directory available.")
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create cachePath if not yet existing.
|
|
|
|
cachePath = home + "/.cache/feed-to-muc/"
|
|
|
|
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
|
2023-09-30 20:42:07 +02:00
|
|
|
err = os.MkdirAll(cachePath, 0o700)
|
2018-07-15 11:03:19 +02:00
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't create cache path:", err)
|
2018-07-15 11:03:19 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2018-07-13 22:53:22 +02:00
|
|
|
// Create a hash as identifier for the feed.
|
|
|
|
// The identifier will be used as filename for caching the update time.
|
|
|
|
h := fnv.New32a()
|
2019-06-14 11:39:18 +02:00
|
|
|
_, err := h.Write([]byte(feedURL))
|
|
|
|
if err != nil {
|
2019-08-30 12:39:55 +02:00
|
|
|
log.Fatal("Error: Can't create hash for", feedURL+":", err)
|
2019-06-14 11:39:18 +02:00
|
|
|
}
|
2018-07-13 22:53:22 +02:00
|
|
|
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
|
2023-09-30 20:42:07 +02:00
|
|
|
err = os.MkdirAll(cachePath, 0o700)
|
2018-07-13 22:53:22 +02:00
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't create hash identifier for cache file:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cacheFile := cachePath + strconv.Itoa(int(h.Sum32()))
|
|
|
|
|
|
|
|
if _, err := os.Stat(cacheFile); os.IsNotExist(err) {
|
|
|
|
file, err = os.Create(cacheFile)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't create cache file:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
last = time.Now()
|
|
|
|
|
|
|
|
lastUpdate.LastChange = last.Format(time.RFC3339)
|
|
|
|
|
|
|
|
lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ")
|
|
|
|
_, err = file.Write(lastUpdateJSON)
|
|
|
|
if err != nil {
|
2024-10-05 15:20:57 +02:00
|
|
|
file.Close()
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't write last update time stamp to cache file:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
2023-09-30 20:42:07 +02:00
|
|
|
file, err = os.OpenFile(cacheFile, os.O_RDWR, 0o600)
|
2018-07-13 22:53:22 +02:00
|
|
|
if err != nil {
|
2024-10-05 15:20:57 +02:00
|
|
|
file.Close()
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't open cache file:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
decoder := json.NewDecoder(file)
|
|
|
|
lastUpdate := feedCache{}
|
|
|
|
if err := decoder.Decode(&lastUpdate); err != nil {
|
2024-10-05 15:20:57 +02:00
|
|
|
file.Close()
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't decode laste updates time stamp:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
last, err = time.Parse(time.RFC3339, string(lastUpdate.LastChange))
|
|
|
|
if err != nil {
|
2024-10-05 15:20:57 +02:00
|
|
|
file.Close()
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't parse last updates time stamp:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
}
|
2024-10-05 15:20:57 +02:00
|
|
|
file.Close()
|
2018-07-13 22:53:22 +02:00
|
|
|
|
|
|
|
fp := gofeed.NewParser()
|
|
|
|
feed, err := fp.ParseURL(feedURL)
|
|
|
|
if err != nil {
|
2019-02-01 14:58:12 +01:00
|
|
|
// Don't return an error, but log a message as the
|
|
|
|
// bot should not crash when the feed is not available.
|
|
|
|
log.Println(feedURL, ": Feed not available.")
|
|
|
|
return "", nil
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// If no publish date is offered try update date.
|
|
|
|
// If both is not offered give up.
|
|
|
|
if feed.Items[0].PublishedParsed == nil {
|
|
|
|
if feed.Items[0].UpdatedParsed == nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
// If cached timestamp is newer than the one of
|
|
|
|
// the last article return.
|
|
|
|
if last.After(*feed.Items[0].UpdatedParsed) {
|
|
|
|
return "", err
|
|
|
|
}
|
2024-10-05 15:20:57 +02:00
|
|
|
} else if last.After(*feed.Items[0].PublishedParsed) {
|
2018-07-13 22:53:22 +02:00
|
|
|
// If cached timestamp is newer than the one of
|
|
|
|
// the last article return.
|
2024-10-05 15:20:57 +02:00
|
|
|
return "", err
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check last n (defined in config) articles for new ones.
|
|
|
|
for i := max - 1; i >= 0; i-- {
|
|
|
|
// Stop processing for article i if there are not so
|
|
|
|
// many articles in the feed.
|
|
|
|
if len(feed.Items) < i+1 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
article := *feed.Items[i]
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
if article.PublishedParsed == nil {
|
|
|
|
updateTime = *article.UpdatedParsed
|
|
|
|
} else {
|
|
|
|
updateTime = *article.PublishedParsed
|
|
|
|
}
|
|
|
|
|
|
|
|
// If cached timestamp is not older than the article stop processing.
|
|
|
|
// Note: Checking for cached timestamp being newer, instead of not older
|
|
|
|
// lead to duplicate messages for the same article. Probably a corner
|
|
|
|
// case when the time is identical.
|
2019-06-14 11:43:06 +02:00
|
|
|
if !last.Before(updateTime) {
|
2018-07-13 22:53:22 +02:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2018-08-03 13:33:57 +02:00
|
|
|
last = updateTime
|
|
|
|
lastUpdate.LastChange = updateTime.Format(time.RFC3339)
|
2018-07-13 22:53:22 +02:00
|
|
|
|
2018-08-03 13:33:57 +02:00
|
|
|
// Remove file with cached timestamp and create it
|
|
|
|
// again with updated timestamp.
|
|
|
|
// ToDo: Replace timestamp without deleting.
|
|
|
|
err = os.Remove(cacheFile)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't delete cache file:", err)
|
2018-08-03 13:33:57 +02:00
|
|
|
}
|
2018-07-13 22:53:22 +02:00
|
|
|
|
2018-08-03 13:33:57 +02:00
|
|
|
file, err = os.Create(cacheFile)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't create cache file:", err)
|
2018-08-03 13:33:57 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ")
|
|
|
|
_, err = file.Write(lastUpdateJSON)
|
|
|
|
if err != nil {
|
2024-10-05 15:20:57 +02:00
|
|
|
file.Close()
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't write last update time stamp to cache file:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
2024-10-05 15:20:57 +02:00
|
|
|
file.Close()
|
2018-07-13 22:53:22 +02:00
|
|
|
|
2018-08-03 11:02:31 +02:00
|
|
|
// Remove redirects and tracking parameters from URL.
|
|
|
|
cleanURL, _ := removeTracking(article.Link)
|
|
|
|
|
2018-09-03 20:42:19 +02:00
|
|
|
// Only send title and link if option noExcerpt is set,
|
|
|
|
// otherwise add the description.
|
2019-06-14 11:43:06 +02:00
|
|
|
if noExcerpt {
|
2020-07-06 21:06:00 +02:00
|
|
|
// Stop processing the title if it contains the string
|
|
|
|
// configured in FilterMessage
|
|
|
|
filterStrike := false
|
|
|
|
for _, filterString := range filterMessage {
|
|
|
|
if strings.Contains(feed.Title, filterString) {
|
|
|
|
filterStrike = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if filterStrike {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2018-09-03 20:42:19 +02:00
|
|
|
output = output + feed.Title + ": *" + article.Title + "*\n" +
|
|
|
|
cleanURL
|
2018-07-13 22:53:22 +02:00
|
|
|
} else {
|
2019-05-26 22:28:58 +02:00
|
|
|
var description string
|
|
|
|
|
|
|
|
// Some feeds don't provide a description, let's use the content
|
|
|
|
// in this case, otherwise use the shorter description.
|
|
|
|
if article.Description != "" {
|
|
|
|
// Strip HTML as we want to get plain text.
|
|
|
|
description, err = html2text.FromString(article.Description)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Strip HTML as we want to get plain text.
|
|
|
|
description, err = html2text.FromString(article.Content)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2018-09-03 20:42:19 +02:00
|
|
|
}
|
|
|
|
|
2020-07-06 21:06:00 +02:00
|
|
|
// Stop processing the article if it contains the string
|
|
|
|
// configured in FilterMessage
|
|
|
|
filterStrike := false
|
|
|
|
for _, filterString := range filterMessage {
|
2023-09-30 20:43:55 +02:00
|
|
|
if strings.Contains(description, filterString) ||
|
|
|
|
strings.Contains(feed.Title, filterString) {
|
2020-07-06 21:06:00 +02:00
|
|
|
filterStrike = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if filterStrike {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2019-05-26 23:53:18 +02:00
|
|
|
// Remove lines only consisting of "> "; thank you reddit.
|
2024-10-05 15:20:57 +02:00
|
|
|
description = strings.ReplaceAll(description, "> \n", "")
|
2019-05-26 23:53:18 +02:00
|
|
|
|
2019-05-31 09:50:46 +02:00
|
|
|
// Split article description/content in single lines.
|
|
|
|
lines := strings.Split(description, "\n")
|
|
|
|
// Empty article description/content.
|
2019-05-26 21:03:02 +02:00
|
|
|
description = ""
|
2019-05-31 09:50:46 +02:00
|
|
|
// Get amount of lines in description/content.
|
|
|
|
descriptionLength := len(lines)
|
|
|
|
for i, line := range lines {
|
|
|
|
// Remove empty lines to safe space.
|
2019-05-26 23:53:18 +02:00
|
|
|
if line != "" {
|
2019-05-31 09:50:46 +02:00
|
|
|
// Remove lines starting with one of the defined filters.
|
|
|
|
filterStrike := false
|
|
|
|
for _, filterString := range filter {
|
2019-06-14 11:43:06 +02:00
|
|
|
if strings.HasPrefix(line, filterString) {
|
2019-05-31 09:50:46 +02:00
|
|
|
filterStrike = true
|
|
|
|
}
|
|
|
|
}
|
2019-06-14 11:43:06 +02:00
|
|
|
if !filterStrike {
|
2024-10-05 15:20:57 +02:00
|
|
|
description += line
|
2019-05-31 09:50:46 +02:00
|
|
|
// Add new line, except for the last line.
|
|
|
|
if i < descriptionLength-1 {
|
2024-10-05 15:20:57 +02:00
|
|
|
description += "\n"
|
2019-05-31 09:50:46 +02:00
|
|
|
}
|
|
|
|
}
|
2019-05-26 22:28:58 +02:00
|
|
|
}
|
2019-05-26 21:03:02 +02:00
|
|
|
}
|
|
|
|
|
2019-05-31 09:50:46 +02:00
|
|
|
// To make the message look not so bloated we remove double newlines.
|
|
|
|
// Split the article description/content into fragments between double newlines.
|
|
|
|
// fragments := strings.Split(description, "\n\n")
|
|
|
|
// Empty article description/content
|
|
|
|
// description = ""
|
|
|
|
// Fill article description/content with the fragments separated by one newline.
|
|
|
|
// for _, line := range fragments {
|
|
|
|
// Only if the only content is not empty.
|
|
|
|
// if line != "" {
|
|
|
|
// description = description + line + "\n"
|
|
|
|
// }
|
|
|
|
//}
|
|
|
|
|
2018-09-03 20:42:19 +02:00
|
|
|
// Only append article link if it is not yet contained in description (e.g. read more: URL).
|
2019-06-14 11:43:06 +02:00
|
|
|
if strings.Contains(description, article.Link) {
|
2018-09-03 20:42:19 +02:00
|
|
|
// Replace article link with URL cleaned from redirects and trackers.
|
2024-10-05 15:20:57 +02:00
|
|
|
description = strings.ReplaceAll(description, article.Link, cleanURL)
|
2018-09-03 20:42:19 +02:00
|
|
|
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description
|
|
|
|
} else {
|
2019-05-31 12:36:30 +02:00
|
|
|
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + "\n" + cleanURL
|
2018-09-03 20:42:19 +02:00
|
|
|
}
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if i > 0 {
|
2024-10-05 15:20:57 +02:00
|
|
|
output += "\n\n---\n\n"
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return output, err
|
|
|
|
}
|