/* Copyright Martin Dosch Licensed under the "MIT License" */ package main import ( "encoding/json" "hash/fnv" "log" "os" "os/user" "strconv" "strings" "time" "github.com/jaytaylor/html2text" "github.com/mmcdole/gofeed" ) // Get new articles for specified feed. func getArticles(feedURL string, max int, noExcerpt bool, filter []string, filterMessage []string) (string, error) { type feedCache struct { LastChange string } var output, cachePath string var last time.Time var lastUpdate feedCache var file *os.File var updateTime time.Time // Get systems user cache path. osCacheDir := os.Getenv("$XDG_CACHE_HOME") if osCacheDir != "" { // Create configPath if not yet existing. cachePath = osCacheDir + "/feed-to-muc/" if _, err := os.Stat(cachePath); os.IsNotExist(err) { err = os.MkdirAll(cachePath, 0o700) if err != nil { log.Fatal("Error: Can't create cache path:", err) } } } else { // Get the current user. curUser, err := user.Current() if err != nil { log.Fatal("Error: Can't get current user:", err) return "", err } // Get home directory. home := curUser.HomeDir if home == "" { log.Fatal("Error: No home directory available.") return "", err } // Create cachePath if not yet existing. cachePath = home + "/.cache/feed-to-muc/" if _, err := os.Stat(cachePath); os.IsNotExist(err) { err = os.MkdirAll(cachePath, 0o700) if err != nil { log.Fatal("Error: Can't create cache path:", err) } } } // Create a hash as identifier for the feed. // The identifier will be used as filename for caching the update time. h := fnv.New32a() _, err := h.Write([]byte(feedURL)) if err != nil { log.Fatal("Error: Can't create hash for", feedURL+":", err) } if _, err := os.Stat(cachePath); os.IsNotExist(err) { err = os.MkdirAll(cachePath, 0o700) if err != nil { log.Fatal("Error: Can't create hash identifier for cache file:", err) } } cacheFile := cachePath + strconv.Itoa(int(h.Sum32())) if _, err := os.Stat(cacheFile); os.IsNotExist(err) { file, err = os.Create(cacheFile) if err != nil { log.Fatal("Error: Can't create cache file:", err) } defer file.Close() last = time.Now() lastUpdate.LastChange = last.Format(time.RFC3339) lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ") _, err = file.Write(lastUpdateJSON) if err != nil { log.Fatal("Error: Can't write last update time stamp to cache file:", err) } } else { file, err = os.OpenFile(cacheFile, os.O_RDWR, 0o600) if err != nil { log.Fatal("Error: Can't open cache file:", err) } defer file.Close() decoder := json.NewDecoder(file) lastUpdate := feedCache{} if err := decoder.Decode(&lastUpdate); err != nil { log.Fatal("Error: Can't decode laste updates time stamp:", err) } last, err = time.Parse(time.RFC3339, string(lastUpdate.LastChange)) if err != nil { log.Fatal("Error: Can't parse last updates time stamp:", err) } } fp := gofeed.NewParser() feed, err := fp.ParseURL(feedURL) if err != nil { // Don't return an error, but log a message as the // bot should not crash when the feed is not available. log.Println(feedURL, ": Feed not available.") return "", nil } // If no publish date is offered try update date. // If both is not offered give up. if feed.Items[0].PublishedParsed == nil { if feed.Items[0].UpdatedParsed == nil { return "", err } // If cached timestamp is newer than the one of // the last article return. if last.After(*feed.Items[0].UpdatedParsed) { return "", err } } else { // If cached timestamp is newer than the one of // the last article return. if last.After(*feed.Items[0].PublishedParsed) { return "", err } } // Check last n (defined in config) articles for new ones. for i := max - 1; i >= 0; i-- { // Stop processing for article i if there are not so // many articles in the feed. if len(feed.Items) < i+1 { continue } article := *feed.Items[i] if err != nil { return "", err } if article.PublishedParsed == nil { updateTime = *article.UpdatedParsed } else { updateTime = *article.PublishedParsed } // If cached timestamp is not older than the article stop processing. // Note: Checking for cached timestamp being newer, instead of not older // lead to duplicate messages for the same article. Probably a corner // case when the time is identical. if !last.Before(updateTime) { continue } last = updateTime lastUpdate.LastChange = updateTime.Format(time.RFC3339) // Remove file with cached timestamp and create it // again with updated timestamp. // ToDo: Replace timestamp without deleting. err = os.Remove(cacheFile) if err != nil { log.Fatal("Error: Can't delete cache file:", err) } file, err = os.Create(cacheFile) if err != nil { log.Fatal("Error: Can't create cache file:", err) } defer file.Close() lastUpdateJSON, _ := json.MarshalIndent(lastUpdate, "", " ") _, err = file.Write(lastUpdateJSON) if err != nil { log.Fatal("Error: Can't write last update time stamp to cache file:", err) } // Remove redirects and tracking parameters from URL. cleanURL, _ := removeTracking(article.Link) // Only send title and link if option noExcerpt is set, // otherwise add the description. if noExcerpt { // Stop processing the title if it contains the string // configured in FilterMessage filterStrike := false for _, filterString := range filterMessage { if strings.Contains(feed.Title, filterString) { filterStrike = true } } if filterStrike { continue } output = output + feed.Title + ": *" + article.Title + "*\n" + cleanURL } else { var description string // Some feeds don't provide a description, let's use the content // in this case, otherwise use the shorter description. if article.Description != "" { // Strip HTML as we want to get plain text. description, err = html2text.FromString(article.Description) if err != nil { return "", err } } else { // Strip HTML as we want to get plain text. description, err = html2text.FromString(article.Content) if err != nil { return "", err } } // Stop processing the article if it contains the string // configured in FilterMessage filterStrike := false for _, filterString := range filterMessage { if strings.Contains(description, filterString) || strings.Contains(feed.Title, filterString) { filterStrike = true } } if filterStrike { continue } // Remove lines only consisting of "> "; thank you reddit. description = strings.Replace(description, "> \n", "", -1) // Split article description/content in single lines. lines := strings.Split(description, "\n") // Empty article description/content. description = "" // Get amount of lines in description/content. descriptionLength := len(lines) for i, line := range lines { // Remove empty lines to safe space. if line != "" { // Remove lines starting with one of the defined filters. filterStrike := false for _, filterString := range filter { if strings.HasPrefix(line, filterString) { filterStrike = true } } if !filterStrike { description = description + line // Add new line, except for the last line. if i < descriptionLength-1 { description = description + "\n" } } } } // To make the message look not so bloated we remove double newlines. // Split the article description/content into fragments between double newlines. // fragments := strings.Split(description, "\n\n") // Empty article description/content // description = "" // Fill article description/content with the fragments separated by one newline. // for _, line := range fragments { // Only if the only content is not empty. // if line != "" { // description = description + line + "\n" // } //} // Only append article link if it is not yet contained in description (e.g. read more: URL). if strings.Contains(description, article.Link) { // Replace article link with URL cleaned from redirects and trackers. description = strings.Replace(description, article.Link, cleanURL, -1) output = output + feed.Title + ": *" + article.Title + "*\n\n" + description } else { output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + "\n" + cleanURL } } if i > 0 { output = output + "\n\n---\n\n" } } return output, err }