mirror of
https://salsa.debian.org/mdosch/feed-to-muc.git
synced 2024-11-22 14:08:39 +01:00
Added function to remove some commonly used tracking.
This commit is contained in:
parent
03a3fd5b8e
commit
4577ec34c1
2 changed files with 41 additions and 1 deletions
|
@ -193,16 +193,22 @@ func getArticles(feedURL string, max int) (string, error) {
|
|||
}
|
||||
}
|
||||
|
||||
// Remove redirects and tracking parameters from URL.
|
||||
cleanURL, _ := removeTracking(article.Link)
|
||||
|
||||
// Strip HTML as we want to get plain text.
|
||||
description, err := html2text.FromString(article.Description)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Only append article link if it is not yet contained in description (e.g. read more: URL).
|
||||
if strings.Contains(description, article.Link) == true {
|
||||
// Replace article link with URL cleaned from redirects and trackers.
|
||||
description = strings.Replace(description, article.Link, cleanURL, -1)
|
||||
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description
|
||||
} else {
|
||||
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + "\n" + article.Link
|
||||
output = output + feed.Title + ": *" + article.Title + "*\n\n" + description + "\n" + cleanURL
|
||||
}
|
||||
|
||||
if i > 0 {
|
||||
|
|
34
removetracking.go
Normal file
34
removetracking.go
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* Copyright 2018 Martin Dosch
|
||||
Licensed under the "MIT License" */
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Remove tracking parameter from URLs
|
||||
func removeTracking(input string) (output string, err error) {
|
||||
|
||||
// Perfom a get request to get rid of 301 forwarding through
|
||||
// services like feedproxy.google.com.
|
||||
resp, err := http.Get(input)
|
||||
if err != nil {
|
||||
return input, err
|
||||
}
|
||||
|
||||
// Remove the URL part starting with "?utm_", which is
|
||||
// used for tracking purposes.
|
||||
output = strings.Split(resp.Request.URL.String(), "?utm_")[0]
|
||||
|
||||
// Remove the URL part starting with "?wt_", which is
|
||||
// used for Webtrekk tracking.
|
||||
output = strings.Split(output, "?wt_")[0]
|
||||
|
||||
// Remove the URL part starting with "#ref=", which is
|
||||
// used for tracking the referer by some feeds.
|
||||
output = strings.Split(output, "#ref=")[0]
|
||||
|
||||
return output, err
|
||||
}
|
Loading…
Reference in a new issue