Added function to remove some commonly used tracking.

This commit is contained in:
Martin Dosch 2018-08-03 11:02:31 +02:00
parent 03a3fd5b8e
commit 4577ec34c1
2 changed files with 41 additions and 1 deletions

View file

@ -193,16 +193,22 @@ func getArticles(feedURL string, max int) (string, error) {
} }
} }
// Remove redirects and tracking parameters from URL.
cleanURL, _ := removeTracking(article.Link)
// Strip HTML as we want to get plain text. // Strip HTML as we want to get plain text.
description, err := html2text.FromString(article.Description) description, err := html2text.FromString(article.Description)
if err != nil { if err != nil {
return "", err return "", err
} }
// Only append article link if it is not yet contained in description (e.g. read more: URL).
if strings.Contains(description, article.Link) == true { if strings.Contains(description, article.Link) == true {
// Replace article link with URL cleaned from redirects and trackers.
description = strings.Replace(description, article.Link, cleanURL, -1)
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description output = output + feed.Title + ": *" + article.Title + "*\n\n" + description
} else { } else {
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + "\n" + article.Link output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + "\n" + cleanURL
} }
if i > 0 { if i > 0 {

34
removetracking.go Normal file
View file

@ -0,0 +1,34 @@
/* Copyright 2018 Martin Dosch
Licensed under the "MIT License" */
package main
import (
"net/http"
"strings"
)
// Remove tracking parameter from URLs
func removeTracking(input string) (output string, err error) {
// Perfom a get request to get rid of 301 forwarding through
// services like feedproxy.google.com.
resp, err := http.Get(input)
if err != nil {
return input, err
}
// Remove the URL part starting with "?utm_", which is
// used for tracking purposes.
output = strings.Split(resp.Request.URL.String(), "?utm_")[0]
// Remove the URL part starting with "?wt_", which is
// used for Webtrekk tracking.
output = strings.Split(output, "?wt_")[0]
// Remove the URL part starting with "#ref=", which is
// used for tracking the referer by some feeds.
output = strings.Split(output, "#ref=")[0]
return output, err
}