2018-07-13 22:53:22 +02:00
|
|
|
/* Copyright 2018 Martin Dosch
|
|
|
|
Licensed under the "MIT License" */
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
"hash/fnv"
|
|
|
|
"log"
|
|
|
|
"os"
|
2018-07-15 11:03:19 +02:00
|
|
|
"os/user"
|
2018-07-13 22:53:22 +02:00
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
2019-08-30 09:51:38 +02:00
|
|
|
"github.com/jaytaylor/html2text"
|
2019-08-30 12:30:30 +02:00
|
|
|
"github.com/mmcdole/gofeed"
|
2018-07-13 22:53:22 +02:00
|
|
|
)
|
|
|
|
|
2018-08-02 09:41:21 +02:00
|
|
|
// Get new articles for specified feed.
|
2018-09-03 20:42:19 +02:00
|
|
|
func getArticles(feedURL string, max int, noExcerpt bool) (string, error) {
|
2018-07-13 22:53:22 +02:00
|
|
|
|
|
|
|
type feedCache struct {
|
|
|
|
LastChange string
|
|
|
|
}
|
|
|
|
|
2018-07-15 11:03:19 +02:00
|
|
|
var output, cachePath string
|
2018-07-13 22:53:22 +02:00
|
|
|
var last time.Time
|
|
|
|
var lastUpdate feedCache
|
|
|
|
var file *os.File
|
|
|
|
var updateTime time.Time
|
|
|
|
|
2018-07-15 11:03:19 +02:00
|
|
|
// Get systems user cache path.
|
|
|
|
osCacheDir := os.Getenv("$XDG_CACHE_HOME")
|
|
|
|
if osCacheDir != "" {
|
|
|
|
// Create configPath if not yet existing.
|
|
|
|
cachePath = osCacheDir + "/feed-to-muc/"
|
|
|
|
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
|
|
|
|
err = os.MkdirAll(cachePath, 0700)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't create cache path:", err)
|
2018-07-15 11:03:19 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} else { // Get the current user.
|
|
|
|
curUser, err := user.Current()
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't get current user:", err)
|
2018-07-15 11:03:19 +02:00
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
// Get home directory.
|
|
|
|
home := curUser.HomeDir
|
|
|
|
|
|
|
|
if home == "" {
|
|
|
|
log.Fatal("Error: No home directory available.")
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create cachePath if not yet existing.
|
|
|
|
cachePath = home + "/.cache/feed-to-muc/"
|
|
|
|
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
|
|
|
|
err = os.MkdirAll(cachePath, 0700)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't create cache path:", err)
|
2018-07-15 11:03:19 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2018-07-13 22:53:22 +02:00
|
|
|
// Create a hash as identifier for the feed.
|
|
|
|
// The identifier will be used as filename for caching the update time.
|
|
|
|
h := fnv.New32a()
|
2019-08-30 12:30:30 +02:00
|
|
|
_, err := h.Write([]byte(feedURL))
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal("Error: Can't create hash for", feedURL+":", err)
|
|
|
|
}
|
2018-07-13 22:53:22 +02:00
|
|
|
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
|
|
|
|
err = os.MkdirAll(cachePath, 0700)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't create hash identifier for cache file:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cacheFile := cachePath + strconv.Itoa(int(h.Sum32()))
|
|
|
|
|
|
|
|
if _, err := os.Stat(cacheFile); os.IsNotExist(err) {
|
|
|
|
file, err = os.Create(cacheFile)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't create cache file:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
last = time.Now()
|
|
|
|
|
|
|
|
lastUpdate.LastChange = last.Format(time.RFC3339)
|
|
|
|
|
|
|
|
lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ")
|
|
|
|
_, err = file.Write(lastUpdateJSON)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't write last update time stamp to cache file:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
file, err = os.OpenFile(cacheFile, os.O_RDWR, 0600)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't open cache file:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
decoder := json.NewDecoder(file)
|
|
|
|
lastUpdate := feedCache{}
|
|
|
|
if err := decoder.Decode(&lastUpdate); err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't decode laste updates time stamp:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
last, err = time.Parse(time.RFC3339, string(lastUpdate.LastChange))
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't parse last updates time stamp:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fp := gofeed.NewParser()
|
|
|
|
feed, err := fp.ParseURL(feedURL)
|
|
|
|
if err != nil {
|
2019-02-01 14:58:12 +01:00
|
|
|
// Don't return an error, but log a message as the
|
|
|
|
// bot should not crash when the feed is not available.
|
|
|
|
log.Println(feedURL, ": Feed not available.")
|
|
|
|
return "", nil
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// If no publish date is offered try update date.
|
|
|
|
// If both is not offered give up.
|
|
|
|
if feed.Items[0].PublishedParsed == nil {
|
|
|
|
if feed.Items[0].UpdatedParsed == nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
// If cached timestamp is newer than the one of
|
|
|
|
// the last article return.
|
|
|
|
if last.After(*feed.Items[0].UpdatedParsed) {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// If cached timestamp is newer than the one of
|
|
|
|
// the last article return.
|
|
|
|
if last.After(*feed.Items[0].PublishedParsed) {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check last n (defined in config) articles for new ones.
|
|
|
|
for i := max - 1; i >= 0; i-- {
|
|
|
|
// Stop processing for article i if there are not so
|
|
|
|
// many articles in the feed.
|
|
|
|
if len(feed.Items) < i+1 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
article := *feed.Items[i]
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
if article.PublishedParsed == nil {
|
|
|
|
updateTime = *article.UpdatedParsed
|
|
|
|
} else {
|
|
|
|
updateTime = *article.PublishedParsed
|
|
|
|
}
|
|
|
|
|
|
|
|
// If cached timestamp is not older than the article stop processing.
|
|
|
|
// Note: Checking for cached timestamp being newer, instead of not older
|
|
|
|
// lead to duplicate messages for the same article. Probably a corner
|
|
|
|
// case when the time is identical.
|
2019-08-30 12:30:30 +02:00
|
|
|
if !last.Before(updateTime) {
|
2018-07-13 22:53:22 +02:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2018-08-03 13:33:57 +02:00
|
|
|
last = updateTime
|
|
|
|
lastUpdate.LastChange = updateTime.Format(time.RFC3339)
|
2018-07-13 22:53:22 +02:00
|
|
|
|
2018-08-03 13:33:57 +02:00
|
|
|
// Remove file with cached timestamp and create it
|
|
|
|
// again with updated timestamp.
|
|
|
|
// ToDo: Replace timestamp without deleting.
|
|
|
|
err = os.Remove(cacheFile)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't delete cache file:", err)
|
2018-08-03 13:33:57 +02:00
|
|
|
}
|
2018-07-13 22:53:22 +02:00
|
|
|
|
2018-08-03 13:33:57 +02:00
|
|
|
file, err = os.Create(cacheFile)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't create cache file:", err)
|
2018-08-03 13:33:57 +02:00
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ")
|
|
|
|
_, err = file.Write(lastUpdateJSON)
|
|
|
|
if err != nil {
|
2019-05-31 09:06:36 +02:00
|
|
|
log.Fatal("Error: Can't write last update time stamp to cache file:", err)
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
|
2018-08-03 11:02:31 +02:00
|
|
|
// Remove redirects and tracking parameters from URL.
|
|
|
|
cleanURL, _ := removeTracking(article.Link)
|
2018-12-14 14:18:46 +01:00
|
|
|
// Don't process mastodon messages that are a reply
|
2018-12-18 11:08:21 +01:00
|
|
|
// Strip HTML as we want to get plain text.
|
|
|
|
mastodonContent := strings.Replace(article.Description, `</span><span class="ellipsis">`, "", -1)
|
|
|
|
mastodonContent = strings.Replace(mastodonContent, `</span><span class="invisible">`, "", -1)
|
2018-12-20 10:03:07 +01:00
|
|
|
mastodonContent = strings.Replace(mastodonContent, `</span><span class="">`, "", -1)
|
2018-12-18 11:08:21 +01:00
|
|
|
mastodonContent = strings.Replace(mastodonContent, `</span>`, "", -1)
|
|
|
|
mastodonContent = strings.Replace(mastodonContent, `<span>`, "", -1)
|
2021-12-13 10:30:00 +01:00
|
|
|
mastodonContent, err = html2text.FromString(mastodonContent, html2text.Options{OmitLinks: true, TextOnly: true})
|
2018-12-18 11:08:21 +01:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2018-09-03 20:42:19 +02:00
|
|
|
|
2020-06-30 16:02:55 +02:00
|
|
|
// If the content is empty after html2text (e.g. consisting of an image only)
|
|
|
|
// then stop processing
|
|
|
|
if strings.Replace(mastodonContent, " ", "", -1) == "" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2019-02-01 14:39:25 +01:00
|
|
|
output = output + mastodonContent + "\n\n" + cleanURL
|
2018-07-13 22:53:22 +02:00
|
|
|
|
2018-12-18 11:08:21 +01:00
|
|
|
if i > 0 {
|
|
|
|
output = output + "\n\n---\n\n"
|
2018-07-13 22:53:22 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return output, err
|
|
|
|
}
|