feed-to-muc/getarticles.go

228 lines
5.5 KiB
Go

/* Copyright 2018 Martin Dosch
Licensed under the "MIT License" */
package main
import (
"encoding/json"
"hash/fnv"
"log"
"os"
"os/user"
"strconv"
"strings"
"time"
"github.com/mmcdole/gofeed"
"jaytaylor.com/html2text"
)
// Get new articles for specified feed.
func getArticles(feedURL string, max int, noExcerpt bool) (string, error) {
type feedCache struct {
LastChange string
}
var output, cachePath string
var last time.Time
var lastUpdate feedCache
var file *os.File
var updateTime time.Time
// Get systems user cache path.
osCacheDir := os.Getenv("$XDG_CACHE_HOME")
if osCacheDir != "" {
// Create configPath if not yet existing.
cachePath = osCacheDir + "/feed-to-muc/"
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
err = os.MkdirAll(cachePath, 0700)
if err != nil {
log.Fatal("Error: ", err)
}
}
} else { // Get the current user.
curUser, err := user.Current()
if err != nil {
log.Fatal("Error: ", err)
return "", err
}
// Get home directory.
home := curUser.HomeDir
if home == "" {
log.Fatal("Error: No home directory available.")
return "", err
}
// Create cachePath if not yet existing.
cachePath = home + "/.cache/feed-to-muc/"
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
err = os.MkdirAll(cachePath, 0700)
if err != nil {
log.Fatal("Error: ", err)
}
}
}
// Create a hash as identifier for the feed.
// The identifier will be used as filename for caching the update time.
h := fnv.New32a()
h.Write([]byte(feedURL))
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
err = os.MkdirAll(cachePath, 0700)
if err != nil {
log.Fatal("Error: ", err)
}
}
cacheFile := cachePath + strconv.Itoa(int(h.Sum32()))
if _, err := os.Stat(cacheFile); os.IsNotExist(err) {
file, err = os.Create(cacheFile)
if err != nil {
log.Fatal("Error: ", err)
}
defer file.Close()
last = time.Now()
lastUpdate.LastChange = last.Format(time.RFC3339)
lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ")
_, err = file.Write(lastUpdateJSON)
if err != nil {
log.Fatal("Error: ", err)
}
} else {
file, err = os.OpenFile(cacheFile, os.O_RDWR, 0600)
if err != nil {
log.Fatal("Error: ", err)
}
defer file.Close()
decoder := json.NewDecoder(file)
lastUpdate := feedCache{}
if err := decoder.Decode(&lastUpdate); err != nil {
log.Fatal("Error: ", err)
}
last, err = time.Parse(time.RFC3339, string(lastUpdate.LastChange))
if err != nil {
log.Fatal("Error: ", err)
}
}
fp := gofeed.NewParser()
feed, err := fp.ParseURL(feedURL)
if err != nil {
// Don't return an error, but log a message as the
// bot should not crash when the feed is not available.
log.Println(feedURL, ": Feed not available.")
return "", nil
}
// If no publish date is offered try update date.
// If both is not offered give up.
if feed.Items[0].PublishedParsed == nil {
if feed.Items[0].UpdatedParsed == nil {
return "", err
}
// If cached timestamp is newer than the one of
// the last article return.
if last.After(*feed.Items[0].UpdatedParsed) {
return "", err
}
} else {
// If cached timestamp is newer than the one of
// the last article return.
if last.After(*feed.Items[0].PublishedParsed) {
return "", err
}
}
// Check last n (defined in config) articles for new ones.
for i := max - 1; i >= 0; i-- {
// Stop processing for article i if there are not so
// many articles in the feed.
if len(feed.Items) < i+1 {
continue
}
article := *feed.Items[i]
if err != nil {
return "", err
}
if article.PublishedParsed == nil {
updateTime = *article.UpdatedParsed
} else {
updateTime = *article.PublishedParsed
}
// If cached timestamp is not older than the article stop processing.
// Note: Checking for cached timestamp being newer, instead of not older
// lead to duplicate messages for the same article. Probably a corner
// case when the time is identical.
if last.Before(updateTime) == false {
continue
}
last = updateTime
lastUpdate.LastChange = updateTime.Format(time.RFC3339)
// Remove file with cached timestamp and create it
// again with updated timestamp.
// ToDo: Replace timestamp without deleting.
err = os.Remove(cacheFile)
if err != nil {
log.Fatal("Error: ", err)
}
file, err = os.Create(cacheFile)
if err != nil {
log.Fatal("Error: ", err)
}
defer file.Close()
lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ")
_, err = file.Write(lastUpdateJSON)
if err != nil {
log.Fatal("Error: ", err)
}
// Remove redirects and tracking parameters from URL.
cleanURL, _ := removeTracking(article.Link)
// Only send title and link if option noExcerpt is set,
// otherwise add the description.
if noExcerpt == true {
output = output + feed.Title + ": *" + article.Title + "*\n" +
cleanURL
} else {
// Strip HTML as we want to get plain text.
description, err := html2text.FromString(article.Description)
if err != nil {
return "", err
}
// Only append article link if it is not yet contained in description (e.g. read more: URL).
if strings.Contains(description, article.Link) == true {
// Replace article link with URL cleaned from redirects and trackers.
description = strings.Replace(description, article.Link, cleanURL, -1)
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description
} else {
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + "\n" + cleanURL
}
}
if i > 0 {
output = output + "\n\n---\n\n"
}
}
return output, err
}