feed-to-muc/getarticles.go
mdosch efe8f1a549 Made some changes to remove multiple linebreaks and (hopefully) made help output look more nice.
Revert "Revert "(Hopefully) made help output look more nice.""

This reverts commit b73f39ef48.
2019-05-26 21:03:02 +02:00

238 lines
6 KiB
Go

/* Copyright 2018 Martin Dosch
Licensed under the "MIT License" */
package main
import (
"encoding/json"
"hash/fnv"
"log"
"os"
"os/user"
"strconv"
"strings"
"time"
"github.com/mmcdole/gofeed"
"jaytaylor.com/html2text"
)
// Get new articles for specified feed.
func getArticles(feedURL string, max int, noExcerpt bool) (string, error) {
type feedCache struct {
LastChange string
}
var output, cachePath string
var last time.Time
var lastUpdate feedCache
var file *os.File
var updateTime time.Time
// Get systems user cache path.
osCacheDir := os.Getenv("$XDG_CACHE_HOME")
if osCacheDir != "" {
// Create configPath if not yet existing.
cachePath = osCacheDir + "/feed-to-muc/"
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
err = os.MkdirAll(cachePath, 0700)
if err != nil {
log.Fatal("Error: ", err)
}
}
} else { // Get the current user.
curUser, err := user.Current()
if err != nil {
log.Fatal("Error: ", err)
return "", err
}
// Get home directory.
home := curUser.HomeDir
if home == "" {
log.Fatal("Error: No home directory available.")
return "", err
}
// Create cachePath if not yet existing.
cachePath = home + "/.cache/feed-to-muc/"
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
err = os.MkdirAll(cachePath, 0700)
if err != nil {
log.Fatal("Error: ", err)
}
}
}
// Create a hash as identifier for the feed.
// The identifier will be used as filename for caching the update time.
h := fnv.New32a()
h.Write([]byte(feedURL))
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
err = os.MkdirAll(cachePath, 0700)
if err != nil {
log.Fatal("Error: ", err)
}
}
cacheFile := cachePath + strconv.Itoa(int(h.Sum32()))
if _, err := os.Stat(cacheFile); os.IsNotExist(err) {
file, err = os.Create(cacheFile)
if err != nil {
log.Fatal("Error: ", err)
}
defer file.Close()
last = time.Now()
lastUpdate.LastChange = last.Format(time.RFC3339)
lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ")
_, err = file.Write(lastUpdateJSON)
if err != nil {
log.Fatal("Error: ", err)
}
} else {
file, err = os.OpenFile(cacheFile, os.O_RDWR, 0600)
if err != nil {
log.Fatal("Error: ", err)
}
defer file.Close()
decoder := json.NewDecoder(file)
lastUpdate := feedCache{}
if err := decoder.Decode(&lastUpdate); err != nil {
log.Fatal("Error: ", err)
}
last, err = time.Parse(time.RFC3339, string(lastUpdate.LastChange))
if err != nil {
log.Fatal("Error: ", err)
}
}
fp := gofeed.NewParser()
feed, err := fp.ParseURL(feedURL)
if err != nil {
// Don't return an error, but log a message as the
// bot should not crash when the feed is not available.
log.Println(feedURL, ": Feed not available.")
return "", nil
}
// If no publish date is offered try update date.
// If both is not offered give up.
if feed.Items[0].PublishedParsed == nil {
if feed.Items[0].UpdatedParsed == nil {
return "", err
}
// If cached timestamp is newer than the one of
// the last article return.
if last.After(*feed.Items[0].UpdatedParsed) {
return "", err
}
} else {
// If cached timestamp is newer than the one of
// the last article return.
if last.After(*feed.Items[0].PublishedParsed) {
return "", err
}
}
// Check last n (defined in config) articles for new ones.
for i := max - 1; i >= 0; i-- {
// Stop processing for article i if there are not so
// many articles in the feed.
if len(feed.Items) < i+1 {
continue
}
article := *feed.Items[i]
if err != nil {
return "", err
}
if article.PublishedParsed == nil {
updateTime = *article.UpdatedParsed
} else {
updateTime = *article.PublishedParsed
}
// If cached timestamp is not older than the article stop processing.
// Note: Checking for cached timestamp being newer, instead of not older
// lead to duplicate messages for the same article. Probably a corner
// case when the time is identical.
if last.Before(updateTime) == false {
continue
}
last = updateTime
lastUpdate.LastChange = updateTime.Format(time.RFC3339)
// Remove file with cached timestamp and create it
// again with updated timestamp.
// ToDo: Replace timestamp without deleting.
err = os.Remove(cacheFile)
if err != nil {
log.Fatal("Error: ", err)
}
file, err = os.Create(cacheFile)
if err != nil {
log.Fatal("Error: ", err)
}
defer file.Close()
lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ")
_, err = file.Write(lastUpdateJSON)
if err != nil {
log.Fatal("Error: ", err)
}
// Remove redirects and tracking parameters from URL.
cleanURL, _ := removeTracking(article.Link)
// Only send title and link if option noExcerpt is set,
// otherwise add the description.
if noExcerpt == true {
output = output + feed.Title + ": *" + article.Title + "*\n" +
cleanURL
} else {
// Strip HTML as we want to get plain text.
description, err := html2text.FromString(article.Description)
if err != nil {
return "", err
}
// To make the message look not so bloated we remove multiple newlines.
// Split the article description/content into fragments between newlines.
fragments := strings.Split(description, "\n")
// Empty article description/content
description = ""
// Fill article description/content with the fragments separated by one newline.
for _, line := range fragments {
description = description + line + "n"
}
// Only append article link if it is not yet contained in description (e.g. read more: URL).
if strings.Contains(description, article.Link) == true {
// Replace article link with URL cleaned from redirects and trackers.
description = strings.Replace(description, article.Link, cleanURL, -1)
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description
} else {
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + cleanURL
}
}
if i > 0 {
output = output + "\n\n---\n\n"
}
}
return output, err
}