feed-to-muc/vendor/github.com/mmcdole/gofeed/detector.go

79 lines
1.6 KiB
Go
Raw Normal View History

2019-02-20 20:23:48 +01:00
package gofeed
import (
2020-10-24 12:37:32 +02:00
"bytes"
2019-02-20 20:23:48 +01:00
"io"
"strings"
2020-10-24 12:37:32 +02:00
jsoniter "github.com/json-iterator/go"
2019-02-20 20:23:48 +01:00
"github.com/mmcdole/gofeed/internal/shared"
2020-05-14 16:07:09 +02:00
xpp "github.com/mmcdole/goxpp"
2019-02-20 20:23:48 +01:00
)
// FeedType represents one of the possible feed
// types that we can detect.
type FeedType int
const (
// FeedTypeUnknown represents a feed that could not have its
// type determiend.
FeedTypeUnknown FeedType = iota
// FeedTypeAtom repesents an Atom feed
FeedTypeAtom
// FeedTypeRSS represents an RSS feed
FeedTypeRSS
2020-10-24 12:37:32 +02:00
// FeedTypeJSON represents a JSON feed
FeedTypeJSON
2019-02-20 20:23:48 +01:00
)
// DetectFeedType attempts to determine the type of feed
// by looking for specific xml elements unique to the
// various feed types.
func DetectFeedType(feed io.Reader) FeedType {
2020-10-24 12:37:32 +02:00
buffer := new(bytes.Buffer)
buffer.ReadFrom(feed)
2019-02-20 20:23:48 +01:00
2020-10-24 12:37:32 +02:00
// remove leading whitespace (if exists)
var firstChar byte
for {
ch, err := buffer.ReadByte()
if err != nil {
return FeedTypeUnknown
}
if ch != ' ' && ch != '\t' {
firstChar = ch
buffer.UnreadByte()
break
}
2019-02-20 20:23:48 +01:00
}
2020-10-24 12:37:32 +02:00
if firstChar == '<' {
// Check if it's an XML based feed
p := xpp.NewXMLPullParser(bytes.NewReader(buffer.Bytes()), false, shared.NewReaderLabel)
xmlBase := shared.XMLBase{}
_, err := xmlBase.FindRoot(p)
if err != nil {
return FeedTypeUnknown
}
name := strings.ToLower(p.Name)
switch name {
case "rdf":
return FeedTypeRSS
case "rss":
return FeedTypeRSS
case "feed":
return FeedTypeAtom
default:
return FeedTypeUnknown
}
} else if firstChar == '{' {
// Check if document is valid JSON
if jsoniter.Valid(buffer.Bytes()) {
return FeedTypeJSON
}
2019-02-20 20:23:48 +01:00
}
2020-10-24 12:37:32 +02:00
return FeedTypeUnknown
2019-02-20 20:23:48 +01:00
}