feed-to-muc/vendor/github.com/mmcdole/gofeed/parser.go

190 lines
4.3 KiB
Go
Raw Normal View History

2019-02-20 20:23:48 +01:00
package gofeed
import (
"bytes"
2020-05-14 16:07:09 +02:00
"context"
2019-02-20 20:23:48 +01:00
"errors"
"fmt"
"io"
"net/http"
"strings"
"github.com/mmcdole/gofeed/atom"
2020-10-24 12:37:32 +02:00
"github.com/mmcdole/gofeed/json"
2019-02-20 20:23:48 +01:00
"github.com/mmcdole/gofeed/rss"
)
2020-05-14 16:07:09 +02:00
// ErrFeedTypeNotDetected is returned when the detection system can not figure
// out the Feed format
var ErrFeedTypeNotDetected = errors.New("Failed to detect feed type")
2019-02-20 20:23:48 +01:00
// HTTPError represents an HTTP error returned by a server.
type HTTPError struct {
StatusCode int
Status string
}
func (err HTTPError) Error() string {
return fmt.Sprintf("http error: %s", err.Status)
}
// Parser is a universal feed parser that detects
// a given feed type, parsers it, and translates it
// to the universal feed type.
type Parser struct {
AtomTranslator Translator
RSSTranslator Translator
2020-10-24 12:37:32 +02:00
JSONTranslator Translator
2021-12-13 10:35:45 +01:00
UserAgent string
2019-02-20 20:23:48 +01:00
Client *http.Client
rp *rss.Parser
ap *atom.Parser
2020-10-24 12:37:32 +02:00
jp *json.Parser
2019-02-20 20:23:48 +01:00
}
// NewParser creates a universal feed parser.
func NewParser() *Parser {
fp := Parser{
2021-12-13 10:35:45 +01:00
rp: &rss.Parser{},
ap: &atom.Parser{},
jp: &json.Parser{},
UserAgent: "Gofeed/1.0",
2019-02-20 20:23:48 +01:00
}
return &fp
}
2020-10-24 12:37:32 +02:00
// Parse parses a RSS or Atom or JSON feed into
2019-02-20 20:23:48 +01:00
// the universal gofeed.Feed. It takes an
2020-10-24 12:37:32 +02:00
// io.Reader which should return the xml/json content.
2019-02-20 20:23:48 +01:00
func (f *Parser) Parse(feed io.Reader) (*Feed, error) {
// Wrap the feed io.Reader in a io.TeeReader
// so we can capture all the bytes read by the
// DetectFeedType function and construct a new
// reader with those bytes intact for when we
// attempt to parse the feeds.
var buf bytes.Buffer
tee := io.TeeReader(feed, &buf)
feedType := DetectFeedType(tee)
// Glue the read bytes from the detect function
// back into a new reader
r := io.MultiReader(&buf, feed)
switch feedType {
case FeedTypeAtom:
return f.parseAtomFeed(r)
case FeedTypeRSS:
return f.parseRSSFeed(r)
2020-10-24 12:37:32 +02:00
case FeedTypeJSON:
return f.parseJSONFeed(r)
2019-02-20 20:23:48 +01:00
}
2020-05-14 16:07:09 +02:00
return nil, ErrFeedTypeNotDetected
2019-02-20 20:23:48 +01:00
}
// ParseURL fetches the contents of a given url and
// attempts to parse the response into the universal feed type.
func (f *Parser) ParseURL(feedURL string) (feed *Feed, err error) {
2020-05-14 16:07:09 +02:00
return f.ParseURLWithContext(feedURL, context.Background())
}
// ParseURLWithContext fetches contents of a given url and
// attempts to parse the response into the universal feed type.
// Request could be canceled or timeout via given context
func (f *Parser) ParseURLWithContext(feedURL string, ctx context.Context) (feed *Feed, err error) {
2019-02-20 20:23:48 +01:00
client := f.httpClient()
2020-05-14 16:07:09 +02:00
req, err := http.NewRequest("GET", feedURL, nil)
if err != nil {
return nil, err
}
req = req.WithContext(ctx)
2021-12-13 10:35:45 +01:00
req.Header.Set("User-Agent", f.UserAgent)
2020-05-14 16:07:09 +02:00
resp, err := client.Do(req)
2019-02-20 20:23:48 +01:00
if err != nil {
return nil, err
}
if resp != nil {
defer func() {
ce := resp.Body.Close()
if ce != nil {
err = ce
}
}()
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, HTTPError{
StatusCode: resp.StatusCode,
Status: resp.Status,
}
}
return f.Parse(resp.Body)
}
// ParseString parses a feed XML string and into the
// universal feed type.
func (f *Parser) ParseString(feed string) (*Feed, error) {
return f.Parse(strings.NewReader(feed))
}
func (f *Parser) parseAtomFeed(feed io.Reader) (*Feed, error) {
af, err := f.ap.Parse(feed)
if err != nil {
return nil, err
}
return f.atomTrans().Translate(af)
}
func (f *Parser) parseRSSFeed(feed io.Reader) (*Feed, error) {
rf, err := f.rp.Parse(feed)
if err != nil {
return nil, err
}
return f.rssTrans().Translate(rf)
}
2020-10-24 12:37:32 +02:00
func (f *Parser) parseJSONFeed(feed io.Reader) (*Feed, error) {
jf, err := f.jp.Parse(feed)
if err != nil {
return nil, err
}
return f.jsonTrans().Translate(jf)
}
2019-02-20 20:23:48 +01:00
func (f *Parser) atomTrans() Translator {
if f.AtomTranslator != nil {
return f.AtomTranslator
}
f.AtomTranslator = &DefaultAtomTranslator{}
return f.AtomTranslator
}
func (f *Parser) rssTrans() Translator {
if f.RSSTranslator != nil {
return f.RSSTranslator
}
f.RSSTranslator = &DefaultRSSTranslator{}
return f.RSSTranslator
}
2020-10-24 12:37:32 +02:00
func (f *Parser) jsonTrans() Translator {
if f.JSONTranslator != nil {
return f.JSONTranslator
}
f.JSONTranslator = &DefaultJSONTranslator{}
return f.JSONTranslator
}
2019-02-20 20:23:48 +01:00
func (f *Parser) httpClient() *http.Client {
if f.Client != nil {
return f.Client
}
f.Client = &http.Client{}
return f.Client
}