feed-to-muc/getarticles.go
2018-07-13 22:53:22 +02:00

178 lines
4.1 KiB
Go

/* Copyright 2018 Martin Dosch
Licensed under the "MIT License" */
package main
import (
"encoding/json"
"hash/fnv"
"log"
"os"
"strconv"
"strings"
"time"
"github.com/mmcdole/gofeed"
"jaytaylor.com/html2text"
)
//func getArticles(max int) ([]string, error) {
func getArticles(feedURL string, max int, cachePath string) (string, error) {
type feedCache struct {
LastChange string
}
var output string
var last time.Time
var lastUpdate feedCache
var file *os.File
var updateTime time.Time
// Create a hash as identifier for the feed.
// The identifier will be used as filename for caching the update time.
// ToDo: cachePath should probably be moved to ~/.cache/feed-to-muc
h := fnv.New32a()
h.Write([]byte(feedURL))
cachePath = cachePath + "cache/"
if _, err := os.Stat(cachePath); os.IsNotExist(err) {
err = os.MkdirAll(cachePath, 0700)
if err != nil {
log.Fatal("Error: ", err)
}
}
cacheFile := cachePath + strconv.Itoa(int(h.Sum32()))
if _, err := os.Stat(cacheFile); os.IsNotExist(err) {
file, err = os.Create(cacheFile)
if err != nil {
log.Fatal("Error: ", err)
}
defer file.Close()
last = time.Now()
lastUpdate.LastChange = last.Format(time.RFC3339)
lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ")
_, err = file.Write(lastUpdateJSON)
if err != nil {
log.Fatal("Error: ", err)
}
} else {
file, err = os.OpenFile(cacheFile, os.O_RDWR, 0600)
if err != nil {
log.Fatal("Error: ", err)
}
defer file.Close()
decoder := json.NewDecoder(file)
lastUpdate := feedCache{}
if err := decoder.Decode(&lastUpdate); err != nil {
log.Fatal("Error: ", err)
}
last, err = time.Parse(time.RFC3339, string(lastUpdate.LastChange))
if err != nil {
log.Fatal("Error: ", err)
}
}
fp := gofeed.NewParser()
feed, err := fp.ParseURL(feedURL)
if err != nil {
return "", err
}
// If no publish date is offered try update date.
// If both is not offered give up.
if feed.Items[0].PublishedParsed == nil {
if feed.Items[0].UpdatedParsed == nil {
return "", err
}
// If cached timestamp is newer than the one of
// the last article return.
if last.After(*feed.Items[0].UpdatedParsed) {
return "", err
}
} else {
// If cached timestamp is newer than the one of
// the last article return.
if last.After(*feed.Items[0].PublishedParsed) {
return "", err
}
}
// Check last n (defined in config) articles for new ones.
for i := max - 1; i >= 0; i-- {
// Stop processing for article i if there are not so
// many articles in the feed.
if len(feed.Items) < i+1 {
continue
}
article := *feed.Items[i]
if err != nil {
return "", err
}
if article.PublishedParsed == nil {
updateTime = *article.UpdatedParsed
} else {
updateTime = *article.PublishedParsed
}
// If cached timestamp is not older than the article stop processing.
// Note: Checking for cached timestamp being newer, instead of not older
// lead to duplicate messages for the same article. Probably a corner
// case when the time is identical.
if last.Before(updateTime) == false {
continue
}
if i == 0 {
last = updateTime
lastUpdate.LastChange = updateTime.Format(time.RFC3339)
// Remove file with cached timestamp and create it
// again with updated timestamp.
// ToDo: Replace timestamp without deleting.
err = os.Remove(cacheFile)
if err != nil {
log.Fatal("Error: ", err)
}
file, err = os.Create(cacheFile)
if err != nil {
log.Fatal("Error: ", err)
}
defer file.Close()
lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ")
_, err = file.Write(lastUpdateJSON)
if err != nil {
log.Fatal("Error: ", err)
}
}
// Strip HTML as we want to get plain text.
description, err := html2text.FromString(article.Description)
if err != nil {
return "", err
}
if strings.Contains(description, article.Link) == true {
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description
} else {
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + "\n" + article.Link
}
if i > 0 {
output = output + "\n\n---\n\n"
}
}
return output, err
}