/* Copyright 2018 Martin Dosch Licensed under the "MIT License" */ package main import ( "encoding/json" "hash/fnv" "log" "os" "os/user" "strconv" "strings" "time" "github.com/jaytaylor/html2text" "github.com/mmcdole/gofeed" ) // Get new articles for specified feed. func getArticles(feedURL string, max int, noExcerpt bool) (string, error) { type feedCache struct { LastChange string } var output, cachePath string var last time.Time var lastUpdate feedCache var file *os.File var updateTime time.Time // Get systems user cache path. osCacheDir := os.Getenv("$XDG_CACHE_HOME") if osCacheDir != "" { // Create configPath if not yet existing. cachePath = osCacheDir + "/feed-to-muc/" if _, err := os.Stat(cachePath); os.IsNotExist(err) { err = os.MkdirAll(cachePath, 0700) if err != nil { log.Fatal("Error: Can't create cache path:", err) } } } else { // Get the current user. curUser, err := user.Current() if err != nil { log.Fatal("Error: Can't get current user:", err) return "", err } // Get home directory. home := curUser.HomeDir if home == "" { log.Fatal("Error: No home directory available.") return "", err } // Create cachePath if not yet existing. cachePath = home + "/.cache/feed-to-muc/" if _, err := os.Stat(cachePath); os.IsNotExist(err) { err = os.MkdirAll(cachePath, 0700) if err != nil { log.Fatal("Error: Can't create cache path:", err) } } } // Create a hash as identifier for the feed. // The identifier will be used as filename for caching the update time. h := fnv.New32a() _, err := h.Write([]byte(feedURL)) if err != nil { log.Fatal("Error: Can't create hash for", feedURL+":", err) } if _, err := os.Stat(cachePath); os.IsNotExist(err) { err = os.MkdirAll(cachePath, 0700) if err != nil { log.Fatal("Error: Can't create hash identifier for cache file:", err) } } cacheFile := cachePath + strconv.Itoa(int(h.Sum32())) if _, err := os.Stat(cacheFile); os.IsNotExist(err) { file, err = os.Create(cacheFile) if err != nil { log.Fatal("Error: Can't create cache file:", err) } defer file.Close() last = time.Now() lastUpdate.LastChange = last.Format(time.RFC3339) lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ") _, err = file.Write(lastUpdateJSON) if err != nil { log.Fatal("Error: Can't write last update time stamp to cache file:", err) } } else { file, err = os.OpenFile(cacheFile, os.O_RDWR, 0600) if err != nil { log.Fatal("Error: Can't open cache file:", err) } defer file.Close() decoder := json.NewDecoder(file) lastUpdate := feedCache{} if err := decoder.Decode(&lastUpdate); err != nil { log.Fatal("Error: Can't decode laste updates time stamp:", err) } last, err = time.Parse(time.RFC3339, string(lastUpdate.LastChange)) if err != nil { log.Fatal("Error: Can't parse last updates time stamp:", err) } } fp := gofeed.NewParser() feed, err := fp.ParseURL(feedURL) if err != nil { // Don't return an error, but log a message as the // bot should not crash when the feed is not available. log.Println(feedURL, ": Feed not available.") return "", nil } // If no publish date is offered try update date. // If both is not offered give up. if feed.Items[0].PublishedParsed == nil { if feed.Items[0].UpdatedParsed == nil { return "", err } // If cached timestamp is newer than the one of // the last article return. if last.After(*feed.Items[0].UpdatedParsed) { return "", err } } else { // If cached timestamp is newer than the one of // the last article return. if last.After(*feed.Items[0].PublishedParsed) { return "", err } } // Check last n (defined in config) articles for new ones. for i := max - 1; i >= 0; i-- { // Stop processing for article i if there are not so // many articles in the feed. if len(feed.Items) < i+1 { continue } article := *feed.Items[i] if err != nil { return "", err } if article.PublishedParsed == nil { updateTime = *article.UpdatedParsed } else { updateTime = *article.PublishedParsed } // If cached timestamp is not older than the article stop processing. // Note: Checking for cached timestamp being newer, instead of not older // lead to duplicate messages for the same article. Probably a corner // case when the time is identical. if !last.Before(updateTime) { continue } last = updateTime lastUpdate.LastChange = updateTime.Format(time.RFC3339) // Remove file with cached timestamp and create it // again with updated timestamp. // ToDo: Replace timestamp without deleting. err = os.Remove(cacheFile) if err != nil { log.Fatal("Error: Can't delete cache file:", err) } file, err = os.Create(cacheFile) if err != nil { log.Fatal("Error: Can't create cache file:", err) } defer file.Close() lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ") _, err = file.Write(lastUpdateJSON) if err != nil { log.Fatal("Error: Can't write last update time stamp to cache file:", err) } // Remove redirects and tracking parameters from URL. cleanURL, _ := removeTracking(article.Link) // Don't process mastodon messages that are a reply // Strip HTML as we want to get plain text. mastodonContent := strings.Replace(article.Description, ``, "", -1) mastodonContent = strings.Replace(mastodonContent, ``, "", -1) mastodonContent = strings.Replace(mastodonContent, ``, "", -1) mastodonContent = strings.Replace(mastodonContent, ``, "", -1) mastodonContent, err = html2text.FromString(mastodonContent, html2text.Options{OmitLinks: true, TextOnly: true}) if err != nil { return "", err } // If the content is empty after html2text (e.g. consisting of an image only) // then stop processing if strings.Replace(mastodonContent, " ", "", -1) == "" { continue } output = output + mastodonContent + "\n\n" + cleanURL if i > 0 { output = output + "\n\n---\n\n" } } return output, err }