2019-02-20 20:23:48 +01:00
|
|
|
package gofeed
|
|
|
|
|
|
|
|
import (
|
2020-10-24 12:37:32 +02:00
|
|
|
"bytes"
|
2019-02-20 20:23:48 +01:00
|
|
|
"io"
|
|
|
|
"strings"
|
|
|
|
|
2020-10-24 12:37:32 +02:00
|
|
|
jsoniter "github.com/json-iterator/go"
|
2019-02-20 20:23:48 +01:00
|
|
|
"github.com/mmcdole/gofeed/internal/shared"
|
2020-05-14 16:07:09 +02:00
|
|
|
xpp "github.com/mmcdole/goxpp"
|
2019-02-20 20:23:48 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
// FeedType represents one of the possible feed
|
|
|
|
// types that we can detect.
|
|
|
|
type FeedType int
|
|
|
|
|
|
|
|
const (
|
|
|
|
// FeedTypeUnknown represents a feed that could not have its
|
|
|
|
// type determiend.
|
|
|
|
FeedTypeUnknown FeedType = iota
|
|
|
|
// FeedTypeAtom repesents an Atom feed
|
|
|
|
FeedTypeAtom
|
|
|
|
// FeedTypeRSS represents an RSS feed
|
|
|
|
FeedTypeRSS
|
2020-10-24 12:37:32 +02:00
|
|
|
// FeedTypeJSON represents a JSON feed
|
|
|
|
FeedTypeJSON
|
2019-02-20 20:23:48 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
// DetectFeedType attempts to determine the type of feed
|
|
|
|
// by looking for specific xml elements unique to the
|
|
|
|
// various feed types.
|
|
|
|
func DetectFeedType(feed io.Reader) FeedType {
|
2020-10-24 12:37:32 +02:00
|
|
|
buffer := new(bytes.Buffer)
|
|
|
|
buffer.ReadFrom(feed)
|
2019-02-20 20:23:48 +01:00
|
|
|
|
2020-10-24 12:37:32 +02:00
|
|
|
var firstChar byte
|
2021-12-13 10:35:45 +01:00
|
|
|
loop: for {
|
2020-10-24 12:37:32 +02:00
|
|
|
ch, err := buffer.ReadByte()
|
|
|
|
if err != nil {
|
|
|
|
return FeedTypeUnknown
|
|
|
|
}
|
2021-12-13 10:35:45 +01:00
|
|
|
// ignore leading whitespace & byte order marks
|
|
|
|
switch ch {
|
|
|
|
case ' ', '\r', '\n', '\t':
|
|
|
|
case 0xFE, 0xFF, 0x00, 0xEF, 0xBB, 0xBF: // utf 8-16-32 bom
|
|
|
|
default:
|
2020-10-24 12:37:32 +02:00
|
|
|
firstChar = ch
|
|
|
|
buffer.UnreadByte()
|
2021-12-13 10:35:45 +01:00
|
|
|
break loop
|
2020-10-24 12:37:32 +02:00
|
|
|
}
|
2019-02-20 20:23:48 +01:00
|
|
|
}
|
|
|
|
|
2020-10-24 12:37:32 +02:00
|
|
|
if firstChar == '<' {
|
|
|
|
// Check if it's an XML based feed
|
|
|
|
p := xpp.NewXMLPullParser(bytes.NewReader(buffer.Bytes()), false, shared.NewReaderLabel)
|
|
|
|
|
|
|
|
xmlBase := shared.XMLBase{}
|
|
|
|
_, err := xmlBase.FindRoot(p)
|
|
|
|
if err != nil {
|
|
|
|
return FeedTypeUnknown
|
|
|
|
}
|
|
|
|
|
|
|
|
name := strings.ToLower(p.Name)
|
|
|
|
switch name {
|
|
|
|
case "rdf":
|
|
|
|
return FeedTypeRSS
|
|
|
|
case "rss":
|
|
|
|
return FeedTypeRSS
|
|
|
|
case "feed":
|
|
|
|
return FeedTypeAtom
|
|
|
|
default:
|
|
|
|
return FeedTypeUnknown
|
|
|
|
}
|
|
|
|
} else if firstChar == '{' {
|
|
|
|
// Check if document is valid JSON
|
|
|
|
if jsoniter.Valid(buffer.Bytes()) {
|
|
|
|
return FeedTypeJSON
|
|
|
|
}
|
2019-02-20 20:23:48 +01:00
|
|
|
}
|
2020-10-24 12:37:32 +02:00
|
|
|
return FeedTypeUnknown
|
2019-02-20 20:23:48 +01:00
|
|
|
}
|