From 4577ec34c103b0a7c40b0606602aa2f7918a403d Mon Sep 17 00:00:00 2001 From: Martin Dosch Date: Fri, 3 Aug 2018 11:02:31 +0200 Subject: [PATCH] Added function to remove some commonly used tracking. --- getarticles.go | 8 +++++++- removetracking.go | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 removetracking.go diff --git a/getarticles.go b/getarticles.go index 7d039e1..f8d4949 100644 --- a/getarticles.go +++ b/getarticles.go @@ -193,16 +193,22 @@ func getArticles(feedURL string, max int) (string, error) { } } + // Remove redirects and tracking parameters from URL. + cleanURL, _ := removeTracking(article.Link) + // Strip HTML as we want to get plain text. description, err := html2text.FromString(article.Description) if err != nil { return "", err } + // Only append article link if it is not yet contained in description (e.g. read more: URL). if strings.Contains(description, article.Link) == true { + // Replace article link with URL cleaned from redirects and trackers. + description = strings.Replace(description, article.Link, cleanURL, -1) output = output + feed.Title + ": *" + article.Title + "*\n\n" + description } else { - output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + "\n" + article.Link + output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + "\n" + cleanURL } if i > 0 { diff --git a/removetracking.go b/removetracking.go new file mode 100644 index 0000000..a3179a3 --- /dev/null +++ b/removetracking.go @@ -0,0 +1,34 @@ +/* Copyright 2018 Martin Dosch +Licensed under the "MIT License" */ + +package main + +import ( + "net/http" + "strings" +) + +// Remove tracking parameter from URLs +func removeTracking(input string) (output string, err error) { + + // Perfom a get request to get rid of 301 forwarding through + // services like feedproxy.google.com. + resp, err := http.Get(input) + if err != nil { + return input, err + } + + // Remove the URL part starting with "?utm_", which is + // used for tracking purposes. + output = strings.Split(resp.Request.URL.String(), "?utm_")[0] + + // Remove the URL part starting with "?wt_", which is + // used for Webtrekk tracking. + output = strings.Split(output, "?wt_")[0] + + // Remove the URL part starting with "#ref=", which is + // used for tracking the referer by some feeds. + output = strings.Split(output, "#ref=")[0] + + return output, err +}