/* Copyright 2018 Martin Dosch Licensed under the "MIT License" */ package main import ( "encoding/json" "hash/fnv" "log" "os" "os/user" "strconv" "strings" "time" "github.com/mmcdole/gofeed" "jaytaylor.com/html2text" ) // Get new articles for specified feed. func getArticles(feedURL string, max int, noExcerpt bool) (string, error) { type feedCache struct { LastChange string } var output, cachePath string var last time.Time var lastUpdate feedCache var file *os.File var updateTime time.Time // Get systems user cache path. osCacheDir := os.Getenv("$XDG_CACHE_HOME") if osCacheDir != "" { // Create configPath if not yet existing. cachePath = osCacheDir + "/feed-to-muc/" if _, err := os.Stat(cachePath); os.IsNotExist(err) { err = os.MkdirAll(cachePath, 0700) if err != nil { log.Fatal("Error: ", err) } } } else { // Get the current user. curUser, err := user.Current() if err != nil { log.Fatal("Error: ", err) return "", err } // Get home directory. home := curUser.HomeDir if home == "" { log.Fatal("Error: No home directory available.") return "", err } // Create cachePath if not yet existing. cachePath = home + "/.cache/feed-to-muc/" if _, err := os.Stat(cachePath); os.IsNotExist(err) { err = os.MkdirAll(cachePath, 0700) if err != nil { log.Fatal("Error: ", err) } } } // Create a hash as identifier for the feed. // The identifier will be used as filename for caching the update time. h := fnv.New32a() h.Write([]byte(feedURL)) if _, err := os.Stat(cachePath); os.IsNotExist(err) { err = os.MkdirAll(cachePath, 0700) if err != nil { log.Fatal("Error: ", err) } } cacheFile := cachePath + strconv.Itoa(int(h.Sum32())) if _, err := os.Stat(cacheFile); os.IsNotExist(err) { file, err = os.Create(cacheFile) if err != nil { log.Fatal("Error: ", err) } defer file.Close() last = time.Now() lastUpdate.LastChange = last.Format(time.RFC3339) lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ") _, err = file.Write(lastUpdateJSON) if err != nil { log.Fatal("Error: ", err) } } else { file, err = os.OpenFile(cacheFile, os.O_RDWR, 0600) if err != nil { log.Fatal("Error: ", err) } defer file.Close() decoder := json.NewDecoder(file) lastUpdate := feedCache{} if err := decoder.Decode(&lastUpdate); err != nil { log.Fatal("Error: ", err) } last, err = time.Parse(time.RFC3339, string(lastUpdate.LastChange)) if err != nil { log.Fatal("Error: ", err) } } fp := gofeed.NewParser() feed, err := fp.ParseURL(feedURL) if err != nil { // Don't return an error, but log a message as the // bot should not crash when the feed is not available. log.Println(feedURL, ": Feed not available.") return "", nil } // If no publish date is offered try update date. // If both is not offered give up. if feed.Items[0].PublishedParsed == nil { if feed.Items[0].UpdatedParsed == nil { return "", err } // If cached timestamp is newer than the one of // the last article return. if last.After(*feed.Items[0].UpdatedParsed) { return "", err } } else { // If cached timestamp is newer than the one of // the last article return. if last.After(*feed.Items[0].PublishedParsed) { return "", err } } // Check last n (defined in config) articles for new ones. for i := max - 1; i >= 0; i-- { // Stop processing for article i if there are not so // many articles in the feed. if len(feed.Items) < i+1 { continue } article := *feed.Items[i] if err != nil { return "", err } if article.PublishedParsed == nil { updateTime = *article.UpdatedParsed } else { updateTime = *article.PublishedParsed } // If cached timestamp is not older than the article stop processing. // Note: Checking for cached timestamp being newer, instead of not older // lead to duplicate messages for the same article. Probably a corner // case when the time is identical. if last.Before(updateTime) == false { continue } last = updateTime lastUpdate.LastChange = updateTime.Format(time.RFC3339) // Remove file with cached timestamp and create it // again with updated timestamp. // ToDo: Replace timestamp without deleting. err = os.Remove(cacheFile) if err != nil { log.Fatal("Error: ", err) } file, err = os.Create(cacheFile) if err != nil { log.Fatal("Error: ", err) } defer file.Close() lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ") _, err = file.Write(lastUpdateJSON) if err != nil { log.Fatal("Error: ", err) } // Remove redirects and tracking parameters from URL. cleanURL, _ := removeTracking(article.Link) // Only send title and link if option noExcerpt is set, // otherwise add the description. if noExcerpt == true { output = output + feed.Title + ": *" + article.Title + "*\n" + cleanURL } else { var description string // Some feeds don't provide a description, let's use the content // in this case, otherwise use the shorter description. if article.Description != "" { // Strip HTML as we want to get plain text. description, err = html2text.FromString(article.Description) if err != nil { return "", err } } else { // Strip HTML as we want to get plain text. description, err = html2text.FromString(article.Content) if err != nil { return "", err } } // Remove lines only consisting of "> "; thank you reddit. description = strings.Replace(description, "> \n", "", -1) // To make the message look not so bloated we remove double newlines. // Split the article description/content into fragments between double newlines. fragments := strings.Split(description, "\n\n") // Empty article description/content description = "" // Fill article description/content with the fragments separated by one newline. for _, line := range fragments { // Only if the only content is not empty. if line != "" { description = description + line + "\n" } } // Only append article link if it is not yet contained in description (e.g. read more: URL). if strings.Contains(description, article.Link) == true { // Replace article link with URL cleaned from redirects and trackers. description = strings.Replace(description, article.Link, cleanURL, -1) output = output + feed.Title + ": *" + article.Title + "*\n\n" + description } else { output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + cleanURL } } if i > 0 { output = output + "\n\n---\n\n" } } return output, err }