package rss import ( "fmt" "io" "strings" ext "github.com/mmcdole/gofeed/extensions" "github.com/mmcdole/gofeed/internal/shared" xpp "github.com/mmcdole/goxpp" ) // Parser is a RSS Parser type Parser struct { base *shared.XMLBase } // Parse parses an xml feed into an rss.Feed func (rp *Parser) Parse(feed io.Reader) (*Feed, error) { p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel) rp.base = &shared.XMLBase{} _, err := rp.base.FindRoot(p) if err != nil { return nil, err } return rp.parseRoot(p) } func (rp *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) { rssErr := p.Expect(xpp.StartTag, "rss") rdfErr := p.Expect(xpp.StartTag, "rdf") if rssErr != nil && rdfErr != nil { return nil, fmt.Errorf("%s or %s", rssErr.Error(), rdfErr.Error()) } // Items found in feed root var channel *Feed var textinput *TextInput var image *Image items := []*Item{} ver := rp.parseVersion(p) for { tok, err := rp.base.NextTag(p) if err != nil { return nil, err } if tok == xpp.EndTag { break } if tok == xpp.StartTag { // Skip any extensions found in the feed root. if shared.IsExtension(p) { p.Skip() continue } name := strings.ToLower(p.Name) if name == "channel" { channel, err = rp.parseChannel(p) if err != nil { return nil, err } } else if name == "item" { item, err := rp.parseItem(p) if err != nil { return nil, err } items = append(items, item) } else if name == "textinput" { textinput, err = rp.parseTextInput(p) if err != nil { return nil, err } } else if name == "image" { image, err = rp.parseImage(p) if err != nil { return nil, err } } else { p.Skip() } } } rssErr = p.Expect(xpp.EndTag, "rss") rdfErr = p.Expect(xpp.EndTag, "rdf") if rssErr != nil && rdfErr != nil { return nil, fmt.Errorf("%s or %s", rssErr.Error(), rdfErr.Error()) } if channel == nil { channel = &Feed{} channel.Items = []*Item{} } if len(items) > 0 { channel.Items = append(channel.Items, items...) } if textinput != nil { channel.TextInput = textinput } if image != nil { channel.Image = image } channel.Version = ver return channel, nil } func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) { if err = p.Expect(xpp.StartTag, "channel"); err != nil { return nil, err } rss = &Feed{} rss.Items = []*Item{} extensions := ext.Extensions{} categories := []*Category{} for { tok, err := rp.base.NextTag(p) if err != nil { return nil, err } if tok == xpp.EndTag { break } if tok == xpp.StartTag { name := strings.ToLower(p.Name) if shared.IsExtension(p) { ext, err := shared.ParseExtension(extensions, p) if err != nil { return nil, err } extensions = ext } else if name == "title" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.Title = result } else if name == "description" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.Description = result } else if name == "link" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.Link = result } else if name == "language" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.Language = result } else if name == "copyright" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.Copyright = result } else if name == "managingeditor" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.ManagingEditor = result } else if name == "webmaster" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.WebMaster = result } else if name == "pubdate" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.PubDate = result date, err := shared.ParseDate(result) if err == nil { utcDate := date.UTC() rss.PubDateParsed = &utcDate } } else if name == "lastbuilddate" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.LastBuildDate = result date, err := shared.ParseDate(result) if err == nil { utcDate := date.UTC() rss.LastBuildDateParsed = &utcDate } } else if name == "generator" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.Generator = result } else if name == "docs" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.Docs = result } else if name == "ttl" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.TTL = result } else if name == "rating" { result, err := shared.ParseText(p) if err != nil { return nil, err } rss.Rating = result } else if name == "skiphours" { result, err := rp.parseSkipHours(p) if err != nil { return nil, err } rss.SkipHours = result } else if name == "skipdays" { result, err := rp.parseSkipDays(p) if err != nil { return nil, err } rss.SkipDays = result } else if name == "item" { result, err := rp.parseItem(p) if err != nil { return nil, err } rss.Items = append(rss.Items, result) } else if name == "cloud" { result, err := rp.parseCloud(p) if err != nil { return nil, err } rss.Cloud = result } else if name == "category" { result, err := rp.parseCategory(p) if err != nil { return nil, err } categories = append(categories, result) } else if name == "image" { result, err := rp.parseImage(p) if err != nil { return nil, err } rss.Image = result } else if name == "textinput" { result, err := rp.parseTextInput(p) if err != nil { return nil, err } rss.TextInput = result } else { // Skip element as it isn't an extension and not // part of the spec p.Skip() } } } if err = p.Expect(xpp.EndTag, "channel"); err != nil { return nil, err } if len(categories) > 0 { rss.Categories = categories } if len(extensions) > 0 { rss.Extensions = extensions if itunes, ok := rss.Extensions["itunes"]; ok { rss.ITunesExt = ext.NewITunesFeedExtension(itunes) } if dc, ok := rss.Extensions["dc"]; ok { rss.DublinCoreExt = ext.NewDublinCoreExtension(dc) } } return rss, nil } func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) { if err = p.Expect(xpp.StartTag, "item"); err != nil { return nil, err } item = &Item{} extensions := ext.Extensions{} categories := []*Category{} for { tok, err := rp.base.NextTag(p) if err != nil { return nil, err } if tok == xpp.EndTag { break } if tok == xpp.StartTag { name := strings.ToLower(p.Name) if shared.IsExtension(p) { ext, err := shared.ParseExtension(extensions, p) if err != nil { return nil, err } item.Extensions = ext } else if name == "title" { result, err := shared.ParseText(p) if err != nil { return nil, err } item.Title = result } else if name == "description" { result, err := shared.ParseText(p) if err != nil { return nil, err } item.Description = result } else if name == "encoded" { space := strings.TrimSpace(p.Space) if prefix, ok := p.Spaces[space]; ok && prefix == "content" { result, err := shared.ParseText(p) if err != nil { return nil, err } item.Content = result } } else if name == "link" { result, err := shared.ParseText(p) if err != nil { return nil, err } item.Link = result } else if name == "author" { result, err := shared.ParseText(p) if err != nil { return nil, err } item.Author = result } else if name == "comments" { result, err := shared.ParseText(p) if err != nil { return nil, err } item.Comments = result } else if name == "pubdate" { result, err := shared.ParseText(p) if err != nil { return nil, err } item.PubDate = result date, err := shared.ParseDate(result) if err == nil { utcDate := date.UTC() item.PubDateParsed = &utcDate } } else if name == "source" { result, err := rp.parseSource(p) if err != nil { return nil, err } item.Source = result } else if name == "enclosure" { result, err := rp.parseEnclosure(p) if err != nil { return nil, err } item.Enclosure = result } else if name == "guid" { result, err := rp.parseGUID(p) if err != nil { return nil, err } item.GUID = result } else if name == "category" { result, err := rp.parseCategory(p) if err != nil { return nil, err } categories = append(categories, result) } else { result, err := shared.ParseText(p) if err != nil { continue } if item.Custom == nil { item.Custom = make(map[string]string, 0) } item.Custom[name] = result } } } if len(categories) > 0 { item.Categories = categories } if len(extensions) > 0 { item.Extensions = extensions if itunes, ok := item.Extensions["itunes"]; ok { item.ITunesExt = ext.NewITunesItemExtension(itunes) } if dc, ok := item.Extensions["dc"]; ok { item.DublinCoreExt = ext.NewDublinCoreExtension(dc) } } if err = p.Expect(xpp.EndTag, "item"); err != nil { return nil, err } return item, nil } func (rp *Parser) parseSource(p *xpp.XMLPullParser) (source *Source, err error) { if err = p.Expect(xpp.StartTag, "source"); err != nil { return nil, err } source = &Source{} source.URL = p.Attribute("url") result, err := shared.ParseText(p) if err != nil { return source, err } source.Title = result if err = p.Expect(xpp.EndTag, "source"); err != nil { return nil, err } return source, nil } func (rp *Parser) parseEnclosure(p *xpp.XMLPullParser) (enclosure *Enclosure, err error) { if err = p.Expect(xpp.StartTag, "enclosure"); err != nil { return nil, err } enclosure = &Enclosure{} enclosure.URL = p.Attribute("url") enclosure.Length = p.Attribute("length") enclosure.Type = p.Attribute("type") // Ignore any enclosure text _, err = p.NextText() if err != nil { return enclosure, err } if err = p.Expect(xpp.EndTag, "enclosure"); err != nil { return nil, err } return enclosure, nil } func (rp *Parser) parseImage(p *xpp.XMLPullParser) (image *Image, err error) { if err = p.Expect(xpp.StartTag, "image"); err != nil { return nil, err } image = &Image{} for { tok, err := rp.base.NextTag(p) if err != nil { return image, err } if tok == xpp.EndTag { break } if tok == xpp.StartTag { name := strings.ToLower(p.Name) if name == "url" { result, err := shared.ParseText(p) if err != nil { return nil, err } image.URL = result } else if name == "title" { result, err := shared.ParseText(p) if err != nil { return nil, err } image.Title = result } else if name == "link" { result, err := shared.ParseText(p) if err != nil { return nil, err } image.Link = result } else if name == "width" { result, err := shared.ParseText(p) if err != nil { return nil, err } image.Width = result } else if name == "height" { result, err := shared.ParseText(p) if err != nil { return nil, err } image.Height = result } else if name == "description" { result, err := shared.ParseText(p) if err != nil { return nil, err } image.Description = result } else { p.Skip() } } } if err = p.Expect(xpp.EndTag, "image"); err != nil { return nil, err } return image, nil } func (rp *Parser) parseGUID(p *xpp.XMLPullParser) (guid *GUID, err error) { if err = p.Expect(xpp.StartTag, "guid"); err != nil { return nil, err } guid = &GUID{} guid.IsPermalink = p.Attribute("isPermalink") result, err := shared.ParseText(p) if err != nil { return } guid.Value = result if err = p.Expect(xpp.EndTag, "guid"); err != nil { return nil, err } return guid, nil } func (rp *Parser) parseCategory(p *xpp.XMLPullParser) (cat *Category, err error) { if err = p.Expect(xpp.StartTag, "category"); err != nil { return nil, err } cat = &Category{} cat.Domain = p.Attribute("domain") result, err := shared.ParseText(p) if err != nil { return nil, err } cat.Value = result if err = p.Expect(xpp.EndTag, "category"); err != nil { return nil, err } return cat, nil } func (rp *Parser) parseTextInput(p *xpp.XMLPullParser) (*TextInput, error) { if err := p.Expect(xpp.StartTag, "textinput"); err != nil { return nil, err } ti := &TextInput{} for { tok, err := rp.base.NextTag(p) if err != nil { return nil, err } if tok == xpp.EndTag { break } if tok == xpp.StartTag { name := strings.ToLower(p.Name) if name == "title" { result, err := shared.ParseText(p) if err != nil { return nil, err } ti.Title = result } else if name == "description" { result, err := shared.ParseText(p) if err != nil { return nil, err } ti.Description = result } else if name == "name" { result, err := shared.ParseText(p) if err != nil { return nil, err } ti.Name = result } else if name == "link" { result, err := shared.ParseText(p) if err != nil { return nil, err } ti.Link = result } else { p.Skip() } } } if err := p.Expect(xpp.EndTag, "textinput"); err != nil { return nil, err } return ti, nil } func (rp *Parser) parseSkipHours(p *xpp.XMLPullParser) ([]string, error) { if err := p.Expect(xpp.StartTag, "skiphours"); err != nil { return nil, err } hours := []string{} for { tok, err := rp.base.NextTag(p) if err != nil { return nil, err } if tok == xpp.EndTag { break } if tok == xpp.StartTag { name := strings.ToLower(p.Name) if name == "hour" { result, err := shared.ParseText(p) if err != nil { return nil, err } hours = append(hours, result) } else { p.Skip() } } } if err := p.Expect(xpp.EndTag, "skiphours"); err != nil { return nil, err } return hours, nil } func (rp *Parser) parseSkipDays(p *xpp.XMLPullParser) ([]string, error) { if err := p.Expect(xpp.StartTag, "skipdays"); err != nil { return nil, err } days := []string{} for { tok, err := rp.base.NextTag(p) if err != nil { return nil, err } if tok == xpp.EndTag { break } if tok == xpp.StartTag { name := strings.ToLower(p.Name) if name == "day" { result, err := shared.ParseText(p) if err != nil { return nil, err } days = append(days, result) } else { p.Skip() } } } if err := p.Expect(xpp.EndTag, "skipdays"); err != nil { return nil, err } return days, nil } func (rp *Parser) parseCloud(p *xpp.XMLPullParser) (*Cloud, error) { if err := p.Expect(xpp.StartTag, "cloud"); err != nil { return nil, err } cloud := &Cloud{} cloud.Domain = p.Attribute("domain") cloud.Port = p.Attribute("port") cloud.Path = p.Attribute("path") cloud.RegisterProcedure = p.Attribute("registerProcedure") cloud.Protocol = p.Attribute("protocol") rp.base.NextTag(p) if err := p.Expect(xpp.EndTag, "cloud"); err != nil { return nil, err } return cloud, nil } func (rp *Parser) parseVersion(p *xpp.XMLPullParser) (ver string) { name := strings.ToLower(p.Name) if name == "rss" { ver = p.Attribute("version") } else if name == "rdf" { ns := p.Attribute("xmlns") if ns == "http://channel.netscape.com/rdf/simple/0.9/" || ns == "http://my.netscape.com/rdf/simple/0.9/" { ver = "0.9" } else if ns == "http://purl.org/rss/1.0/" { ver = "1.0" } } return }