diff --git a/vendor/github.com/mmcdole/gofeed/atom/parser.go b/vendor/github.com/mmcdole/gofeed/atom/parser.go new file mode 100644 index 0000000..000ace6 --- /dev/null +++ b/vendor/github.com/mmcdole/gofeed/atom/parser.go @@ -0,0 +1,761 @@ +package atom + +import ( + "encoding/base64" + "io" + "strings" + + "github.com/PuerkitoBio/goquery" + ext "github.com/mmcdole/gofeed/extensions" + "github.com/mmcdole/gofeed/internal/shared" + xpp "github.com/mmcdole/goxpp" +) + +var ( + // Atom elements which contain URIs + // https://tools.ietf.org/html/rfc4287 + uriElements = map[string]bool{ + "icon": true, + "id": true, + "logo": true, + "uri": true, + "url": true, // atom 0.3 + } + + // Atom attributes which contain URIs + // https://tools.ietf.org/html/rfc4287 + atomURIAttrs = map[string]bool{ + "href": true, + "scheme": true, + "src": true, + "uri": true, + } +) + +// Parser is an Atom Parser +type Parser struct { + base *shared.XMLBase +} + +// Parse parses an xml feed into an atom.Feed +func (ap *Parser) Parse(feed io.Reader) (*Feed, error) { + p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel) + ap.base = &shared.XMLBase{URIAttrs: atomURIAttrs} + + _, err := ap.base.FindRoot(p) + if err != nil { + return nil, err + } + + return ap.parseRoot(p) +} + +func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) { + if err := p.Expect(xpp.StartTag, "feed"); err != nil { + return nil, err + } + + atom := &Feed{} + atom.Entries = []*Entry{} + atom.Version = ap.parseVersion(p) + atom.Language = ap.parseLanguage(p) + + contributors := []*Person{} + authors := []*Person{} + categories := []*Category{} + links := []*Link{} + extensions := ext.Extensions{} + + for { + tok, err := ap.base.NextTag(p) + if err != nil { + return nil, err + } + + if tok == xpp.EndTag { + break + } + + if tok == xpp.StartTag { + + name := strings.ToLower(p.Name) + + if shared.IsExtension(p) { + e, err := shared.ParseExtension(extensions, p) + if err != nil { + return nil, err + } + extensions = e + } else if name == "title" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + atom.Title = result + } else if name == "id" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + atom.ID = result + } else if name == "updated" || + name == "modified" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + atom.Updated = result + date, err := shared.ParseDate(result) + if err == nil { + utcDate := date.UTC() + atom.UpdatedParsed = &utcDate + } + } else if name == "subtitle" || + name == "tagline" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + atom.Subtitle = result + } else if name == "link" { + result, err := ap.parseLink(p) + if err != nil { + return nil, err + } + links = append(links, result) + } else if name == "generator" { + result, err := ap.parseGenerator(p) + if err != nil { + return nil, err + } + atom.Generator = result + } else if name == "icon" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + atom.Icon = result + } else if name == "logo" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + atom.Logo = result + } else if name == "rights" || + name == "copyright" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + atom.Rights = result + } else if name == "contributor" { + result, err := ap.parsePerson("contributor", p) + if err != nil { + return nil, err + } + contributors = append(contributors, result) + } else if name == "author" { + result, err := ap.parsePerson("author", p) + if err != nil { + return nil, err + } + authors = append(authors, result) + } else if name == "category" { + result, err := ap.parseCategory(p) + if err != nil { + return nil, err + } + categories = append(categories, result) + } else if name == "entry" { + result, err := ap.parseEntry(p) + if err != nil { + return nil, err + } + atom.Entries = append(atom.Entries, result) + } else { + err := p.Skip() + if err != nil { + return nil, err + } + } + } + } + + if len(categories) > 0 { + atom.Categories = categories + } + + if len(authors) > 0 { + atom.Authors = authors + } + + if len(contributors) > 0 { + atom.Contributors = contributors + } + + if len(links) > 0 { + atom.Links = links + } + + if len(extensions) > 0 { + atom.Extensions = extensions + } + + if err := p.Expect(xpp.EndTag, "feed"); err != nil { + return nil, err + } + + return atom, nil +} + +func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) { + if err := p.Expect(xpp.StartTag, "entry"); err != nil { + return nil, err + } + entry := &Entry{} + + contributors := []*Person{} + authors := []*Person{} + categories := []*Category{} + links := []*Link{} + extensions := ext.Extensions{} + + for { + tok, err := ap.base.NextTag(p) + if err != nil { + return nil, err + } + + if tok == xpp.EndTag { + break + } + + if tok == xpp.StartTag { + + name := strings.ToLower(p.Name) + + if shared.IsExtension(p) { + e, err := shared.ParseExtension(extensions, p) + if err != nil { + return nil, err + } + extensions = e + } else if name == "title" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + entry.Title = result + } else if name == "id" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + entry.ID = result + } else if name == "rights" || + name == "copyright" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + entry.Rights = result + } else if name == "summary" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + entry.Summary = result + } else if name == "source" { + result, err := ap.parseSource(p) + if err != nil { + return nil, err + } + entry.Source = result + } else if name == "updated" || + name == "modified" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + entry.Updated = result + date, err := shared.ParseDate(result) + if err == nil { + utcDate := date.UTC() + entry.UpdatedParsed = &utcDate + } + } else if name == "contributor" { + result, err := ap.parsePerson("contributor", p) + if err != nil { + return nil, err + } + contributors = append(contributors, result) + } else if name == "author" { + result, err := ap.parsePerson("author", p) + if err != nil { + return nil, err + } + authors = append(authors, result) + } else if name == "category" { + result, err := ap.parseCategory(p) + if err != nil { + return nil, err + } + categories = append(categories, result) + } else if name == "link" { + result, err := ap.parseLink(p) + if err != nil { + return nil, err + } + links = append(links, result) + } else if name == "published" || + name == "issued" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + entry.Published = result + date, err := shared.ParseDate(result) + if err == nil { + utcDate := date.UTC() + entry.PublishedParsed = &utcDate + } + } else if name == "content" { + result, err := ap.parseContent(p) + if err != nil { + return nil, err + } + entry.Content = result + } else { + err := p.Skip() + if err != nil { + return nil, err + } + } + } + } + + if len(categories) > 0 { + entry.Categories = categories + } + + if len(authors) > 0 { + entry.Authors = authors + } + + if len(links) > 0 { + entry.Links = links + } + + if len(contributors) > 0 { + entry.Contributors = contributors + } + + if len(extensions) > 0 { + entry.Extensions = extensions + } + + if err := p.Expect(xpp.EndTag, "entry"); err != nil { + return nil, err + } + + return entry, nil +} + +func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) { + + if err := p.Expect(xpp.StartTag, "source"); err != nil { + return nil, err + } + + source := &Source{} + + contributors := []*Person{} + authors := []*Person{} + categories := []*Category{} + links := []*Link{} + extensions := ext.Extensions{} + + for { + tok, err := ap.base.NextTag(p) + if err != nil { + return nil, err + } + + if tok == xpp.EndTag { + break + } + + if tok == xpp.StartTag { + + name := strings.ToLower(p.Name) + + if shared.IsExtension(p) { + e, err := shared.ParseExtension(extensions, p) + if err != nil { + return nil, err + } + extensions = e + } else if name == "title" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + source.Title = result + } else if name == "id" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + source.ID = result + } else if name == "updated" || + name == "modified" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + source.Updated = result + date, err := shared.ParseDate(result) + if err == nil { + utcDate := date.UTC() + source.UpdatedParsed = &utcDate + } + } else if name == "subtitle" || + name == "tagline" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + source.Subtitle = result + } else if name == "link" { + result, err := ap.parseLink(p) + if err != nil { + return nil, err + } + links = append(links, result) + } else if name == "generator" { + result, err := ap.parseGenerator(p) + if err != nil { + return nil, err + } + source.Generator = result + } else if name == "icon" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + source.Icon = result + } else if name == "logo" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + source.Logo = result + } else if name == "rights" || + name == "copyright" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + source.Rights = result + } else if name == "contributor" { + result, err := ap.parsePerson("contributor", p) + if err != nil { + return nil, err + } + contributors = append(contributors, result) + } else if name == "author" { + result, err := ap.parsePerson("author", p) + if err != nil { + return nil, err + } + authors = append(authors, result) + } else if name == "category" { + result, err := ap.parseCategory(p) + if err != nil { + return nil, err + } + categories = append(categories, result) + } else { + err := p.Skip() + if err != nil { + return nil, err + } + } + } + } + + if len(categories) > 0 { + source.Categories = categories + } + + if len(authors) > 0 { + source.Authors = authors + } + + if len(contributors) > 0 { + source.Contributors = contributors + } + + if len(links) > 0 { + source.Links = links + } + + if len(extensions) > 0 { + source.Extensions = extensions + } + + if err := p.Expect(xpp.EndTag, "source"); err != nil { + return nil, err + } + + return source, nil +} + +func (ap *Parser) parseContent(p *xpp.XMLPullParser) (*Content, error) { + c := &Content{} + c.Type = p.Attribute("type") + c.Src = p.Attribute("src") + + text, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + c.Value = text + + return c, nil +} + +func (ap *Parser) parsePerson(name string, p *xpp.XMLPullParser) (*Person, error) { + + if err := p.Expect(xpp.StartTag, name); err != nil { + return nil, err + } + + person := &Person{} + + for { + tok, err := ap.base.NextTag(p) + if err != nil { + return nil, err + } + + if tok == xpp.EndTag { + break + } + + if tok == xpp.StartTag { + + name := strings.ToLower(p.Name) + + if name == "name" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + person.Name = result + } else if name == "email" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + person.Email = result + } else if name == "uri" || + name == "url" || + name == "homepage" { + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + person.URI = result + } else { + err := p.Skip() + if err != nil { + return nil, err + } + } + } + } + + if err := p.Expect(xpp.EndTag, name); err != nil { + return nil, err + } + + return person, nil +} + +func (ap *Parser) parseLink(p *xpp.XMLPullParser) (*Link, error) { + if err := p.Expect(xpp.StartTag, "link"); err != nil { + return nil, err + } + + l := &Link{} + l.Href = p.Attribute("href") + l.Hreflang = p.Attribute("hreflang") + l.Type = p.Attribute("type") + l.Length = p.Attribute("length") + l.Title = p.Attribute("title") + l.Rel = p.Attribute("rel") + if l.Rel == "" { + l.Rel = "alternate" + } + + if err := p.Skip(); err != nil { + return nil, err + } + + if err := p.Expect(xpp.EndTag, "link"); err != nil { + return nil, err + } + return l, nil +} + +func (ap *Parser) parseCategory(p *xpp.XMLPullParser) (*Category, error) { + if err := p.Expect(xpp.StartTag, "category"); err != nil { + return nil, err + } + + c := &Category{} + c.Term = p.Attribute("term") + c.Scheme = p.Attribute("scheme") + c.Label = p.Attribute("label") + + if err := p.Skip(); err != nil { + return nil, err + } + + if err := p.Expect(xpp.EndTag, "category"); err != nil { + return nil, err + } + return c, nil +} + +func (ap *Parser) parseGenerator(p *xpp.XMLPullParser) (*Generator, error) { + + if err := p.Expect(xpp.StartTag, "generator"); err != nil { + return nil, err + } + + g := &Generator{} + + uri := p.Attribute("uri") // Atom 1.0 + url := p.Attribute("url") // Atom 0.3 + + if uri != "" { + g.URI = uri + } else if url != "" { + g.URI = url + } + + g.Version = p.Attribute("version") + + result, err := ap.parseAtomText(p) + if err != nil { + return nil, err + } + + g.Value = result + + if err := p.Expect(xpp.EndTag, "generator"); err != nil { + return nil, err + } + + return g, nil +} + +func (ap *Parser) parseAtomText(p *xpp.XMLPullParser) (string, error) { + + var text struct { + Type string `xml:"type,attr"` + Mode string `xml:"mode,attr"` + InnerXML string `xml:",innerxml"` + } + + err := p.DecodeElement(&text) + if err != nil { + return "", err + } + + result := text.InnerXML + result = strings.TrimSpace(result) + + lowerType := strings.ToLower(text.Type) + lowerMode := strings.ToLower(text.Mode) + + if strings.Contains(result, "" + +// ParseText is a helper function for parsing the text +// from the current element of the XMLPullParser. +// This function can handle parsing naked XML text from +// an element. +func ParseText(p *xpp.XMLPullParser) (string, error) { + var text struct { + Type string `xml:"type,attr"` + InnerXML string `xml:",innerxml"` + } + + err := p.DecodeElement(&text) + if err != nil { + return "", err + } + + result := text.InnerXML + result = strings.TrimSpace(result) + + if strings.Contains(result, CDATA_START) { + return StripCDATA(result), nil + } + + return DecodeEntities(result) +} + +// StripCDATA removes CDATA tags from the string +// content outside of CDATA tags is passed via DecodeEntities +func StripCDATA(str string) string { + buf := bytes.NewBuffer([]byte{}) + + curr := 0 + + for curr < len(str) { + + start := indexAt(str, CDATA_START, curr) + + if start == -1 { + dec, _ := DecodeEntities(str[curr:]) + buf.Write([]byte(dec)) + return buf.String() + } + + end := indexAt(str, CDATA_END, start) + + if end == -1 { + dec, _ := DecodeEntities(str[curr:]) + buf.Write([]byte(dec)) + return buf.String() + } + + buf.Write([]byte(str[start+len(CDATA_START) : end])) + + curr = curr + end + len(CDATA_END) + } + + return buf.String() +} + +// DecodeEntities decodes escaped XML entities +// in a string and returns the unescaped string +func DecodeEntities(str string) (string, error) { + data := []byte(str) + buf := bytes.NewBuffer([]byte{}) + + for len(data) > 0 { + // Find the next entity + idx := bytes.IndexByte(data, '&') + if idx == -1 { + buf.Write(data) + break + } + + // Write and skip everything before it + buf.Write(data[:idx]) + data = data[idx+1:] + + if len(data) == 0 { + return "", TruncatedEntity + } + + // Find the end of the entity + end := bytes.IndexByte(data, ';') + if end == -1 { + return "", TruncatedEntity + } + + if data[0] == '#' { + // Numerical character reference + var str string + base := 10 + + if len(data) > 1 && data[1] == 'x' { + str = string(data[2:end]) + base = 16 + } else { + str = string(data[1:end]) + } + + i, err := strconv.ParseUint(str, base, 32) + if err != nil { + return "", InvalidNumericReference + } + + buf.WriteRune(rune(i)) + } else { + // Predefined entity + name := string(data[:end]) + + var c byte + switch name { + case "lt": + c = '<' + case "gt": + c = '>' + case "quot": + c = '"' + case "apos": + c = '\'' + case "amp": + c = '&' + default: + return "", fmt.Errorf("unknown predefined "+ + "entity &%s;", name) + } + + buf.WriteByte(c) + } + + // Skip the entity + data = data[end+1:] + } + + return buf.String(), nil +} + +// ParseNameAddress parses name/email strings commonly +// found in RSS feeds of the format "Example Name (example@site.com)" +// and other variations of this format. +func ParseNameAddress(nameAddressText string) (name string, address string) { + if nameAddressText == "" { + return + } + + if emailNameRgx.MatchString(nameAddressText) { + result := emailNameRgx.FindStringSubmatch(nameAddressText) + address = result[1] + name = result[2] + } else if nameEmailRgx.MatchString(nameAddressText) { + result := nameEmailRgx.FindStringSubmatch(nameAddressText) + name = result[1] + address = result[2] + } else if nameOnlyRgx.MatchString(nameAddressText) { + result := nameOnlyRgx.FindStringSubmatch(nameAddressText) + name = result[1] + } else if emailOnlyRgx.MatchString(nameAddressText) { + result := emailOnlyRgx.FindStringSubmatch(nameAddressText) + address = result[1] + } + return +} + +func indexAt(str, substr string, start int) int { + idx := strings.Index(str[start:], substr) + if idx > -1 { + idx += start + } + return idx +} diff --git a/vendor/github.com/mmcdole/gofeed/rss/parser.go b/vendor/github.com/mmcdole/gofeed/rss/parser.go new file mode 100644 index 0000000..684d160 --- /dev/null +++ b/vendor/github.com/mmcdole/gofeed/rss/parser.go @@ -0,0 +1,770 @@ +package rss + +import ( + "fmt" + "io" + "strings" + + ext "github.com/mmcdole/gofeed/extensions" + "github.com/mmcdole/gofeed/internal/shared" + xpp "github.com/mmcdole/goxpp" +) + +// Parser is a RSS Parser +type Parser struct { + base *shared.XMLBase +} + +// Parse parses an xml feed into an rss.Feed +func (rp *Parser) Parse(feed io.Reader) (*Feed, error) { + p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel) + rp.base = &shared.XMLBase{} + + _, err := rp.base.FindRoot(p) + if err != nil { + return nil, err + } + + return rp.parseRoot(p) +} + +func (rp *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) { + rssErr := p.Expect(xpp.StartTag, "rss") + rdfErr := p.Expect(xpp.StartTag, "rdf") + if rssErr != nil && rdfErr != nil { + return nil, fmt.Errorf("%s or %s", rssErr.Error(), rdfErr.Error()) + } + + // Items found in feed root + var channel *Feed + var textinput *TextInput + var image *Image + items := []*Item{} + + ver := rp.parseVersion(p) + + for { + tok, err := rp.base.NextTag(p) + if err != nil { + return nil, err + } + + if tok == xpp.EndTag { + break + } + + if tok == xpp.StartTag { + + // Skip any extensions found in the feed root. + if shared.IsExtension(p) { + p.Skip() + continue + } + + name := strings.ToLower(p.Name) + + if name == "channel" { + channel, err = rp.parseChannel(p) + if err != nil { + return nil, err + } + } else if name == "item" { + item, err := rp.parseItem(p) + if err != nil { + return nil, err + } + items = append(items, item) + } else if name == "textinput" { + textinput, err = rp.parseTextInput(p) + if err != nil { + return nil, err + } + } else if name == "image" { + image, err = rp.parseImage(p) + if err != nil { + return nil, err + } + } else { + p.Skip() + } + } + } + + rssErr = p.Expect(xpp.EndTag, "rss") + rdfErr = p.Expect(xpp.EndTag, "rdf") + if rssErr != nil && rdfErr != nil { + return nil, fmt.Errorf("%s or %s", rssErr.Error(), rdfErr.Error()) + } + + if channel == nil { + channel = &Feed{} + channel.Items = []*Item{} + } + + if len(items) > 0 { + channel.Items = append(channel.Items, items...) + } + + if textinput != nil { + channel.TextInput = textinput + } + + if image != nil { + channel.Image = image + } + + channel.Version = ver + return channel, nil +} + +func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) { + + if err = p.Expect(xpp.StartTag, "channel"); err != nil { + return nil, err + } + + rss = &Feed{} + rss.Items = []*Item{} + + extensions := ext.Extensions{} + categories := []*Category{} + + for { + tok, err := rp.base.NextTag(p) + if err != nil { + return nil, err + } + + if tok == xpp.EndTag { + break + } + + if tok == xpp.StartTag { + + name := strings.ToLower(p.Name) + + if shared.IsExtension(p) { + ext, err := shared.ParseExtension(extensions, p) + if err != nil { + return nil, err + } + extensions = ext + } else if name == "title" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.Title = result + } else if name == "description" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.Description = result + } else if name == "link" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.Link = result + } else if name == "language" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.Language = result + } else if name == "copyright" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.Copyright = result + } else if name == "managingeditor" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.ManagingEditor = result + } else if name == "webmaster" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.WebMaster = result + } else if name == "pubdate" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.PubDate = result + date, err := shared.ParseDate(result) + if err == nil { + utcDate := date.UTC() + rss.PubDateParsed = &utcDate + } + } else if name == "lastbuilddate" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.LastBuildDate = result + date, err := shared.ParseDate(result) + if err == nil { + utcDate := date.UTC() + rss.LastBuildDateParsed = &utcDate + } + } else if name == "generator" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.Generator = result + } else if name == "docs" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.Docs = result + } else if name == "ttl" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.TTL = result + } else if name == "rating" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + rss.Rating = result + } else if name == "skiphours" { + result, err := rp.parseSkipHours(p) + if err != nil { + return nil, err + } + rss.SkipHours = result + } else if name == "skipdays" { + result, err := rp.parseSkipDays(p) + if err != nil { + return nil, err + } + rss.SkipDays = result + } else if name == "item" { + result, err := rp.parseItem(p) + if err != nil { + return nil, err + } + rss.Items = append(rss.Items, result) + } else if name == "cloud" { + result, err := rp.parseCloud(p) + if err != nil { + return nil, err + } + rss.Cloud = result + } else if name == "category" { + result, err := rp.parseCategory(p) + if err != nil { + return nil, err + } + categories = append(categories, result) + } else if name == "image" { + result, err := rp.parseImage(p) + if err != nil { + return nil, err + } + rss.Image = result + } else if name == "textinput" { + result, err := rp.parseTextInput(p) + if err != nil { + return nil, err + } + rss.TextInput = result + } else { + // Skip element as it isn't an extension and not + // part of the spec + p.Skip() + } + } + } + + if err = p.Expect(xpp.EndTag, "channel"); err != nil { + return nil, err + } + + if len(categories) > 0 { + rss.Categories = categories + } + + if len(extensions) > 0 { + rss.Extensions = extensions + + if itunes, ok := rss.Extensions["itunes"]; ok { + rss.ITunesExt = ext.NewITunesFeedExtension(itunes) + } + + if dc, ok := rss.Extensions["dc"]; ok { + rss.DublinCoreExt = ext.NewDublinCoreExtension(dc) + } + } + + return rss, nil +} + +func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) { + + if err = p.Expect(xpp.StartTag, "item"); err != nil { + return nil, err + } + + item = &Item{} + extensions := ext.Extensions{} + categories := []*Category{} + + for { + tok, err := rp.base.NextTag(p) + if err != nil { + return nil, err + } + + if tok == xpp.EndTag { + break + } + + if tok == xpp.StartTag { + + name := strings.ToLower(p.Name) + + if shared.IsExtension(p) { + ext, err := shared.ParseExtension(extensions, p) + if err != nil { + return nil, err + } + item.Extensions = ext + } else if name == "title" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + item.Title = result + } else if name == "description" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + item.Description = result + } else if name == "encoded" { + space := strings.TrimSpace(p.Space) + if prefix, ok := p.Spaces[space]; ok && prefix == "content" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + item.Content = result + } + } else if name == "link" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + item.Link = result + } else if name == "author" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + item.Author = result + } else if name == "comments" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + item.Comments = result + } else if name == "pubdate" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + item.PubDate = result + date, err := shared.ParseDate(result) + if err == nil { + utcDate := date.UTC() + item.PubDateParsed = &utcDate + } + } else if name == "source" { + result, err := rp.parseSource(p) + if err != nil { + return nil, err + } + item.Source = result + } else if name == "enclosure" { + result, err := rp.parseEnclosure(p) + if err != nil { + return nil, err + } + item.Enclosure = result + } else if name == "guid" { + result, err := rp.parseGUID(p) + if err != nil { + return nil, err + } + item.GUID = result + } else if name == "category" { + result, err := rp.parseCategory(p) + if err != nil { + return nil, err + } + categories = append(categories, result) + } else { + // Skip any elements not part of the item spec + p.Skip() + } + } + } + + if len(categories) > 0 { + item.Categories = categories + } + + if len(extensions) > 0 { + item.Extensions = extensions + + if itunes, ok := item.Extensions["itunes"]; ok { + item.ITunesExt = ext.NewITunesItemExtension(itunes) + } + + if dc, ok := item.Extensions["dc"]; ok { + item.DublinCoreExt = ext.NewDublinCoreExtension(dc) + } + } + + if err = p.Expect(xpp.EndTag, "item"); err != nil { + return nil, err + } + + return item, nil +} + +func (rp *Parser) parseSource(p *xpp.XMLPullParser) (source *Source, err error) { + if err = p.Expect(xpp.StartTag, "source"); err != nil { + return nil, err + } + + source = &Source{} + source.URL = p.Attribute("url") + + result, err := shared.ParseText(p) + if err != nil { + return source, err + } + source.Title = result + + if err = p.Expect(xpp.EndTag, "source"); err != nil { + return nil, err + } + return source, nil +} + +func (rp *Parser) parseEnclosure(p *xpp.XMLPullParser) (enclosure *Enclosure, err error) { + if err = p.Expect(xpp.StartTag, "enclosure"); err != nil { + return nil, err + } + + enclosure = &Enclosure{} + enclosure.URL = p.Attribute("url") + enclosure.Length = p.Attribute("length") + enclosure.Type = p.Attribute("type") + + // Ignore any enclosure text + _, err = p.NextText() + if err != nil { + return enclosure, err + } + + if err = p.Expect(xpp.EndTag, "enclosure"); err != nil { + return nil, err + } + + return enclosure, nil +} + +func (rp *Parser) parseImage(p *xpp.XMLPullParser) (image *Image, err error) { + if err = p.Expect(xpp.StartTag, "image"); err != nil { + return nil, err + } + + image = &Image{} + + for { + tok, err := rp.base.NextTag(p) + if err != nil { + return image, err + } + + if tok == xpp.EndTag { + break + } + + if tok == xpp.StartTag { + name := strings.ToLower(p.Name) + + if name == "url" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + image.URL = result + } else if name == "title" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + image.Title = result + } else if name == "link" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + image.Link = result + } else if name == "width" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + image.Width = result + } else if name == "height" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + image.Height = result + } else if name == "description" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + image.Description = result + } else { + p.Skip() + } + } + } + + if err = p.Expect(xpp.EndTag, "image"); err != nil { + return nil, err + } + + return image, nil +} + +func (rp *Parser) parseGUID(p *xpp.XMLPullParser) (guid *GUID, err error) { + if err = p.Expect(xpp.StartTag, "guid"); err != nil { + return nil, err + } + + guid = &GUID{} + guid.IsPermalink = p.Attribute("isPermalink") + + result, err := shared.ParseText(p) + if err != nil { + return + } + guid.Value = result + + if err = p.Expect(xpp.EndTag, "guid"); err != nil { + return nil, err + } + + return guid, nil +} + +func (rp *Parser) parseCategory(p *xpp.XMLPullParser) (cat *Category, err error) { + + if err = p.Expect(xpp.StartTag, "category"); err != nil { + return nil, err + } + + cat = &Category{} + cat.Domain = p.Attribute("domain") + + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + + cat.Value = result + + if err = p.Expect(xpp.EndTag, "category"); err != nil { + return nil, err + } + return cat, nil +} + +func (rp *Parser) parseTextInput(p *xpp.XMLPullParser) (*TextInput, error) { + if err := p.Expect(xpp.StartTag, "textinput"); err != nil { + return nil, err + } + + ti := &TextInput{} + + for { + tok, err := rp.base.NextTag(p) + if err != nil { + return nil, err + } + + if tok == xpp.EndTag { + break + } + + if tok == xpp.StartTag { + name := strings.ToLower(p.Name) + + if name == "title" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + ti.Title = result + } else if name == "description" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + ti.Description = result + } else if name == "name" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + ti.Name = result + } else if name == "link" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + ti.Link = result + } else { + p.Skip() + } + } + } + + if err := p.Expect(xpp.EndTag, "textinput"); err != nil { + return nil, err + } + + return ti, nil +} + +func (rp *Parser) parseSkipHours(p *xpp.XMLPullParser) ([]string, error) { + if err := p.Expect(xpp.StartTag, "skiphours"); err != nil { + return nil, err + } + + hours := []string{} + + for { + tok, err := rp.base.NextTag(p) + if err != nil { + return nil, err + } + + if tok == xpp.EndTag { + break + } + + if tok == xpp.StartTag { + name := strings.ToLower(p.Name) + if name == "hour" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + hours = append(hours, result) + } else { + p.Skip() + } + } + } + + if err := p.Expect(xpp.EndTag, "skiphours"); err != nil { + return nil, err + } + + return hours, nil +} + +func (rp *Parser) parseSkipDays(p *xpp.XMLPullParser) ([]string, error) { + if err := p.Expect(xpp.StartTag, "skipdays"); err != nil { + return nil, err + } + + days := []string{} + + for { + tok, err := rp.base.NextTag(p) + if err != nil { + return nil, err + } + + if tok == xpp.EndTag { + break + } + + if tok == xpp.StartTag { + name := strings.ToLower(p.Name) + if name == "day" { + result, err := shared.ParseText(p) + if err != nil { + return nil, err + } + days = append(days, result) + } else { + p.Skip() + } + } + } + + if err := p.Expect(xpp.EndTag, "skipdays"); err != nil { + return nil, err + } + + return days, nil +} + +func (rp *Parser) parseCloud(p *xpp.XMLPullParser) (*Cloud, error) { + if err := p.Expect(xpp.StartTag, "cloud"); err != nil { + return nil, err + } + + cloud := &Cloud{} + cloud.Domain = p.Attribute("domain") + cloud.Port = p.Attribute("port") + cloud.Path = p.Attribute("path") + cloud.RegisterProcedure = p.Attribute("registerProcedure") + cloud.Protocol = p.Attribute("protocol") + + rp.base.NextTag(p) + + if err := p.Expect(xpp.EndTag, "cloud"); err != nil { + return nil, err + } + + return cloud, nil +} + +func (rp *Parser) parseVersion(p *xpp.XMLPullParser) (ver string) { + name := strings.ToLower(p.Name) + if name == "rss" { + ver = p.Attribute("version") + } else if name == "rdf" { + ns := p.Attribute("xmlns") + if ns == "http://channel.netscape.com/rdf/simple/0.9/" || + ns == "http://my.netscape.com/rdf/simple/0.9/" { + ver = "0.9" + } else if ns == "http://purl.org/rss/1.0/" { + ver = "1.0" + } + } + return +} diff --git a/vendor/github.com/mmcdole/gofeed/translator.go b/vendor/github.com/mmcdole/gofeed/translator.go new file mode 100644 index 0000000..4756b4b --- /dev/null +++ b/vendor/github.com/mmcdole/gofeed/translator.go @@ -0,0 +1,686 @@ +package gofeed + +import ( + "fmt" + "strings" + "time" + + "github.com/mmcdole/gofeed/atom" + ext "github.com/mmcdole/gofeed/extensions" + "github.com/mmcdole/gofeed/internal/shared" + "github.com/mmcdole/gofeed/rss" +) + +// Translator converts a particular feed (atom.Feed or rss.Feed) +// into the generic Feed struct +type Translator interface { + Translate(feed interface{}) (*Feed, error) +} + +// DefaultRSSTranslator converts an rss.Feed struct +// into the generic Feed struct. +// +// This default implementation defines a set of +// mapping rules between rss.Feed -> Feed +// for each of the fields in Feed. +type DefaultRSSTranslator struct{} + +// Translate converts an RSS feed into the universal +// feed type. +func (t *DefaultRSSTranslator) Translate(feed interface{}) (*Feed, error) { + rss, found := feed.(*rss.Feed) + if !found { + return nil, fmt.Errorf("Feed did not match expected type of *rss.Feed") + } + + result := &Feed{} + result.Title = t.translateFeedTitle(rss) + result.Description = t.translateFeedDescription(rss) + result.Link = t.translateFeedLink(rss) + result.FeedLink = t.translateFeedFeedLink(rss) + result.Updated = t.translateFeedUpdated(rss) + result.UpdatedParsed = t.translateFeedUpdatedParsed(rss) + result.Published = t.translateFeedPublished(rss) + result.PublishedParsed = t.translateFeedPublishedParsed(rss) + result.Author = t.translateFeedAuthor(rss) + result.Language = t.translateFeedLanguage(rss) + result.Image = t.translateFeedImage(rss) + result.Copyright = t.translateFeedCopyright(rss) + result.Generator = t.translateFeedGenerator(rss) + result.Categories = t.translateFeedCategories(rss) + result.Items = t.translateFeedItems(rss) + result.ITunesExt = rss.ITunesExt + result.DublinCoreExt = rss.DublinCoreExt + result.Extensions = rss.Extensions + result.FeedVersion = rss.Version + result.FeedType = "rss" + return result, nil +} + +func (t *DefaultRSSTranslator) translateFeedItem(rssItem *rss.Item) (item *Item) { + item = &Item{} + item.Title = t.translateItemTitle(rssItem) + item.Description = t.translateItemDescription(rssItem) + item.Content = t.translateItemContent(rssItem) + item.Link = t.translateItemLink(rssItem) + item.Published = t.translateItemPublished(rssItem) + item.PublishedParsed = t.translateItemPublishedParsed(rssItem) + item.Author = t.translateItemAuthor(rssItem) + item.GUID = t.translateItemGUID(rssItem) + item.Image = t.translateItemImage(rssItem) + item.Categories = t.translateItemCategories(rssItem) + item.Enclosures = t.translateItemEnclosures(rssItem) + item.DublinCoreExt = rssItem.DublinCoreExt + item.ITunesExt = rssItem.ITunesExt + item.Extensions = rssItem.Extensions + return +} + +func (t *DefaultRSSTranslator) translateFeedTitle(rss *rss.Feed) (title string) { + if rss.Title != "" { + title = rss.Title + } else if rss.DublinCoreExt != nil && rss.DublinCoreExt.Title != nil { + title = t.firstEntry(rss.DublinCoreExt.Title) + } + return +} + +func (t *DefaultRSSTranslator) translateFeedDescription(rss *rss.Feed) (desc string) { + return rss.Description +} + +func (t *DefaultRSSTranslator) translateFeedLink(rss *rss.Feed) (link string) { + if rss.Link != "" { + link = rss.Link + } else if rss.ITunesExt != nil && rss.ITunesExt.Subtitle != "" { + link = rss.ITunesExt.Subtitle + } + return +} + +func (t *DefaultRSSTranslator) translateFeedFeedLink(rss *rss.Feed) (link string) { + atomExtensions := t.extensionsForKeys([]string{"atom", "atom10", "atom03"}, rss.Extensions) + for _, ex := range atomExtensions { + if links, ok := ex["link"]; ok { + for _, l := range links { + if l.Attrs["Rel"] == "self" { + link = l.Value + } + } + } + } + return +} + +func (t *DefaultRSSTranslator) translateFeedUpdated(rss *rss.Feed) (updated string) { + if rss.LastBuildDate != "" { + updated = rss.LastBuildDate + } else if rss.DublinCoreExt != nil && rss.DublinCoreExt.Date != nil { + updated = t.firstEntry(rss.DublinCoreExt.Date) + } + return +} + +func (t *DefaultRSSTranslator) translateFeedUpdatedParsed(rss *rss.Feed) (updated *time.Time) { + if rss.LastBuildDateParsed != nil { + updated = rss.LastBuildDateParsed + } else if rss.DublinCoreExt != nil && rss.DublinCoreExt.Date != nil { + dateText := t.firstEntry(rss.DublinCoreExt.Date) + date, err := shared.ParseDate(dateText) + if err == nil { + updated = &date + } + } + return +} + +func (t *DefaultRSSTranslator) translateFeedPublished(rss *rss.Feed) (published string) { + return rss.PubDate +} + +func (t *DefaultRSSTranslator) translateFeedPublishedParsed(rss *rss.Feed) (published *time.Time) { + return rss.PubDateParsed +} + +func (t *DefaultRSSTranslator) translateFeedAuthor(rss *rss.Feed) (author *Person) { + if rss.ManagingEditor != "" { + name, address := shared.ParseNameAddress(rss.ManagingEditor) + author = &Person{} + author.Name = name + author.Email = address + } else if rss.WebMaster != "" { + name, address := shared.ParseNameAddress(rss.WebMaster) + author = &Person{} + author.Name = name + author.Email = address + } else if rss.DublinCoreExt != nil && rss.DublinCoreExt.Author != nil { + dcAuthor := t.firstEntry(rss.DublinCoreExt.Author) + name, address := shared.ParseNameAddress(dcAuthor) + author = &Person{} + author.Name = name + author.Email = address + } else if rss.DublinCoreExt != nil && rss.DublinCoreExt.Creator != nil { + dcCreator := t.firstEntry(rss.DublinCoreExt.Creator) + name, address := shared.ParseNameAddress(dcCreator) + author = &Person{} + author.Name = name + author.Email = address + } else if rss.ITunesExt != nil && rss.ITunesExt.Author != "" { + name, address := shared.ParseNameAddress(rss.ITunesExt.Author) + author = &Person{} + author.Name = name + author.Email = address + } + return +} + +func (t *DefaultRSSTranslator) translateFeedLanguage(rss *rss.Feed) (language string) { + if rss.Language != "" { + language = rss.Language + } else if rss.DublinCoreExt != nil && rss.DublinCoreExt.Language != nil { + language = t.firstEntry(rss.DublinCoreExt.Language) + } + return +} + +func (t *DefaultRSSTranslator) translateFeedImage(rss *rss.Feed) (image *Image) { + if rss.Image != nil { + image = &Image{} + image.Title = rss.Image.Title + image.URL = rss.Image.URL + } else if rss.ITunesExt != nil && rss.ITunesExt.Image != "" { + image = &Image{} + image.URL = rss.ITunesExt.Image + } + return +} + +func (t *DefaultRSSTranslator) translateFeedCopyright(rss *rss.Feed) (rights string) { + if rss.Copyright != "" { + rights = rss.Copyright + } else if rss.DublinCoreExt != nil && rss.DublinCoreExt.Rights != nil { + rights = t.firstEntry(rss.DublinCoreExt.Rights) + } + return +} + +func (t *DefaultRSSTranslator) translateFeedGenerator(rss *rss.Feed) (generator string) { + return rss.Generator +} + +func (t *DefaultRSSTranslator) translateFeedCategories(rss *rss.Feed) (categories []string) { + cats := []string{} + if rss.Categories != nil { + for _, c := range rss.Categories { + cats = append(cats, c.Value) + } + } + + if rss.ITunesExt != nil && rss.ITunesExt.Keywords != "" { + keywords := strings.Split(rss.ITunesExt.Keywords, ",") + for _, k := range keywords { + cats = append(cats, k) + } + } + + if rss.ITunesExt != nil && rss.ITunesExt.Categories != nil { + for _, c := range rss.ITunesExt.Categories { + cats = append(cats, c.Text) + if c.Subcategory != nil { + cats = append(cats, c.Subcategory.Text) + } + } + } + + if rss.DublinCoreExt != nil && rss.DublinCoreExt.Subject != nil { + for _, c := range rss.DublinCoreExt.Subject { + cats = append(cats, c) + } + } + + if len(cats) > 0 { + categories = cats + } + + return +} + +func (t *DefaultRSSTranslator) translateFeedItems(rss *rss.Feed) (items []*Item) { + items = []*Item{} + for _, i := range rss.Items { + items = append(items, t.translateFeedItem(i)) + } + return +} + +func (t *DefaultRSSTranslator) translateItemTitle(rssItem *rss.Item) (title string) { + if rssItem.Title != "" { + title = rssItem.Title + } else if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Title != nil { + title = t.firstEntry(rssItem.DublinCoreExt.Title) + } + return +} + +func (t *DefaultRSSTranslator) translateItemDescription(rssItem *rss.Item) (desc string) { + if rssItem.Description != "" { + desc = rssItem.Description + } else if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Description != nil { + desc = t.firstEntry(rssItem.DublinCoreExt.Description) + } + return +} + +func (t *DefaultRSSTranslator) translateItemContent(rssItem *rss.Item) (content string) { + return rssItem.Content +} + +func (t *DefaultRSSTranslator) translateItemLink(rssItem *rss.Item) (link string) { + return rssItem.Link +} + +func (t *DefaultRSSTranslator) translateItemUpdated(rssItem *rss.Item) (updated string) { + if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Date != nil { + updated = t.firstEntry(rssItem.DublinCoreExt.Date) + } + return updated +} + +func (t *DefaultRSSTranslator) translateItemUpdatedParsed(rssItem *rss.Item) (updated *time.Time) { + if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Date != nil { + updatedText := t.firstEntry(rssItem.DublinCoreExt.Date) + updatedDate, err := shared.ParseDate(updatedText) + if err == nil { + updated = &updatedDate + } + } + return +} + +func (t *DefaultRSSTranslator) translateItemPublished(rssItem *rss.Item) (pubDate string) { + if rssItem.PubDate != "" { + return rssItem.PubDate + } else if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Date != nil { + return t.firstEntry(rssItem.DublinCoreExt.Date) + } + return +} + +func (t *DefaultRSSTranslator) translateItemPublishedParsed(rssItem *rss.Item) (pubDate *time.Time) { + if rssItem.PubDateParsed != nil { + return rssItem.PubDateParsed + } else if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Date != nil { + pubDateText := t.firstEntry(rssItem.DublinCoreExt.Date) + pubDateParsed, err := shared.ParseDate(pubDateText) + if err == nil { + pubDate = &pubDateParsed + } + } + return +} + +func (t *DefaultRSSTranslator) translateItemAuthor(rssItem *rss.Item) (author *Person) { + if rssItem.Author != "" { + name, address := shared.ParseNameAddress(rssItem.Author) + author = &Person{} + author.Name = name + author.Email = address + } else if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Author != nil { + dcAuthor := t.firstEntry(rssItem.DublinCoreExt.Author) + name, address := shared.ParseNameAddress(dcAuthor) + author = &Person{} + author.Name = name + author.Email = address + } else if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Creator != nil { + dcCreator := t.firstEntry(rssItem.DublinCoreExt.Creator) + name, address := shared.ParseNameAddress(dcCreator) + author = &Person{} + author.Name = name + author.Email = address + } else if rssItem.ITunesExt != nil && rssItem.ITunesExt.Author != "" { + name, address := shared.ParseNameAddress(rssItem.ITunesExt.Author) + author = &Person{} + author.Name = name + author.Email = address + } + return +} + +func (t *DefaultRSSTranslator) translateItemGUID(rssItem *rss.Item) (guid string) { + if rssItem.GUID != nil { + guid = rssItem.GUID.Value + } + return +} + +func (t *DefaultRSSTranslator) translateItemImage(rssItem *rss.Item) (image *Image) { + if rssItem.ITunesExt != nil && rssItem.ITunesExt.Image != "" { + image = &Image{} + image.URL = rssItem.ITunesExt.Image + } + return +} + +func (t *DefaultRSSTranslator) translateItemCategories(rssItem *rss.Item) (categories []string) { + cats := []string{} + if rssItem.Categories != nil { + for _, c := range rssItem.Categories { + cats = append(cats, c.Value) + } + } + + if rssItem.ITunesExt != nil && rssItem.ITunesExt.Keywords != "" { + keywords := strings.Split(rssItem.ITunesExt.Keywords, ",") + for _, k := range keywords { + cats = append(cats, k) + } + } + + if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Subject != nil { + for _, c := range rssItem.DublinCoreExt.Subject { + cats = append(cats, c) + } + } + + if len(cats) > 0 { + categories = cats + } + + return +} + +func (t *DefaultRSSTranslator) translateItemEnclosures(rssItem *rss.Item) (enclosures []*Enclosure) { + if rssItem.Enclosure != nil { + e := &Enclosure{} + e.URL = rssItem.Enclosure.URL + e.Type = rssItem.Enclosure.Type + e.Length = rssItem.Enclosure.Length + enclosures = []*Enclosure{e} + } + return +} + +func (t *DefaultRSSTranslator) extensionsForKeys(keys []string, extensions ext.Extensions) (matches []map[string][]ext.Extension) { + matches = []map[string][]ext.Extension{} + + if extensions == nil { + return + } + + for _, key := range keys { + if match, ok := extensions[key]; ok { + matches = append(matches, match) + } + } + return +} + +func (t *DefaultRSSTranslator) firstEntry(entries []string) (value string) { + if entries == nil { + return + } + + if len(entries) == 0 { + return + } + + return entries[0] +} + +// DefaultAtomTranslator converts an atom.Feed struct +// into the generic Feed struct. +// +// This default implementation defines a set of +// mapping rules between atom.Feed -> Feed +// for each of the fields in Feed. +type DefaultAtomTranslator struct{} + +// Translate converts an Atom feed into the universal +// feed type. +func (t *DefaultAtomTranslator) Translate(feed interface{}) (*Feed, error) { + atom, found := feed.(*atom.Feed) + if !found { + return nil, fmt.Errorf("Feed did not match expected type of *atom.Feed") + } + + result := &Feed{} + result.Title = t.translateFeedTitle(atom) + result.Description = t.translateFeedDescription(atom) + result.Link = t.translateFeedLink(atom) + result.FeedLink = t.translateFeedFeedLink(atom) + result.Updated = t.translateFeedUpdated(atom) + result.UpdatedParsed = t.translateFeedUpdatedParsed(atom) + result.Author = t.translateFeedAuthor(atom) + result.Language = t.translateFeedLanguage(atom) + result.Image = t.translateFeedImage(atom) + result.Copyright = t.translateFeedCopyright(atom) + result.Categories = t.translateFeedCategories(atom) + result.Generator = t.translateFeedGenerator(atom) + result.Items = t.translateFeedItems(atom) + result.Extensions = atom.Extensions + result.FeedVersion = atom.Version + result.FeedType = "atom" + return result, nil +} + +func (t *DefaultAtomTranslator) translateFeedItem(entry *atom.Entry) (item *Item) { + item = &Item{} + item.Title = t.translateItemTitle(entry) + item.Description = t.translateItemDescription(entry) + item.Content = t.translateItemContent(entry) + item.Link = t.translateItemLink(entry) + item.Updated = t.translateItemUpdated(entry) + item.UpdatedParsed = t.translateItemUpdatedParsed(entry) + item.Published = t.translateItemPublished(entry) + item.PublishedParsed = t.translateItemPublishedParsed(entry) + item.Author = t.translateItemAuthor(entry) + item.GUID = t.translateItemGUID(entry) + item.Image = t.translateItemImage(entry) + item.Categories = t.translateItemCategories(entry) + item.Enclosures = t.translateItemEnclosures(entry) + item.Extensions = entry.Extensions + return +} + +func (t *DefaultAtomTranslator) translateFeedTitle(atom *atom.Feed) (title string) { + return atom.Title +} + +func (t *DefaultAtomTranslator) translateFeedDescription(atom *atom.Feed) (desc string) { + return atom.Subtitle +} + +func (t *DefaultAtomTranslator) translateFeedLink(atom *atom.Feed) (link string) { + l := t.firstLinkWithType("alternate", atom.Links) + if l != nil { + link = l.Href + } + return +} + +func (t *DefaultAtomTranslator) translateFeedFeedLink(atom *atom.Feed) (link string) { + feedLink := t.firstLinkWithType("self", atom.Links) + if feedLink != nil { + link = feedLink.Href + } + return +} + +func (t *DefaultAtomTranslator) translateFeedUpdated(atom *atom.Feed) (updated string) { + return atom.Updated +} + +func (t *DefaultAtomTranslator) translateFeedUpdatedParsed(atom *atom.Feed) (updated *time.Time) { + return atom.UpdatedParsed +} + +func (t *DefaultAtomTranslator) translateFeedAuthor(atom *atom.Feed) (author *Person) { + a := t.firstPerson(atom.Authors) + if a != nil { + feedAuthor := Person{} + feedAuthor.Name = a.Name + feedAuthor.Email = a.Email + author = &feedAuthor + } + return +} + +func (t *DefaultAtomTranslator) translateFeedLanguage(atom *atom.Feed) (language string) { + return atom.Language +} + +func (t *DefaultAtomTranslator) translateFeedImage(atom *atom.Feed) (image *Image) { + if atom.Logo != "" { + feedImage := Image{} + feedImage.URL = atom.Logo + image = &feedImage + } + return +} + +func (t *DefaultAtomTranslator) translateFeedCopyright(atom *atom.Feed) (rights string) { + return atom.Rights +} + +func (t *DefaultAtomTranslator) translateFeedGenerator(atom *atom.Feed) (generator string) { + if atom.Generator != nil { + if atom.Generator.Value != "" { + generator += atom.Generator.Value + } + if atom.Generator.Version != "" { + generator += " v" + atom.Generator.Version + } + if atom.Generator.URI != "" { + generator += " " + atom.Generator.URI + } + generator = strings.TrimSpace(generator) + } + return +} + +func (t *DefaultAtomTranslator) translateFeedCategories(atom *atom.Feed) (categories []string) { + if atom.Categories != nil { + categories = []string{} + for _, c := range atom.Categories { + categories = append(categories, c.Term) + } + } + return +} + +func (t *DefaultAtomTranslator) translateFeedItems(atom *atom.Feed) (items []*Item) { + items = []*Item{} + for _, entry := range atom.Entries { + items = append(items, t.translateFeedItem(entry)) + } + return +} + +func (t *DefaultAtomTranslator) translateItemTitle(entry *atom.Entry) (title string) { + return entry.Title +} + +func (t *DefaultAtomTranslator) translateItemDescription(entry *atom.Entry) (desc string) { + return entry.Summary +} + +func (t *DefaultAtomTranslator) translateItemContent(entry *atom.Entry) (content string) { + if entry.Content != nil { + content = entry.Content.Value + } + return +} + +func (t *DefaultAtomTranslator) translateItemLink(entry *atom.Entry) (link string) { + l := t.firstLinkWithType("alternate", entry.Links) + if l != nil { + link = l.Href + } + return +} + +func (t *DefaultAtomTranslator) translateItemUpdated(entry *atom.Entry) (updated string) { + return entry.Updated +} + +func (t *DefaultAtomTranslator) translateItemUpdatedParsed(entry *atom.Entry) (updated *time.Time) { + return entry.UpdatedParsed +} + +func (t *DefaultAtomTranslator) translateItemPublished(entry *atom.Entry) (updated string) { + return entry.Published +} + +func (t *DefaultAtomTranslator) translateItemPublishedParsed(entry *atom.Entry) (updated *time.Time) { + return entry.PublishedParsed +} + +func (t *DefaultAtomTranslator) translateItemAuthor(entry *atom.Entry) (author *Person) { + a := t.firstPerson(entry.Authors) + if a != nil { + author = &Person{} + author.Name = a.Name + author.Email = a.Email + } + return +} + +func (t *DefaultAtomTranslator) translateItemGUID(entry *atom.Entry) (guid string) { + return entry.ID +} + +func (t *DefaultAtomTranslator) translateItemImage(entry *atom.Entry) (image *Image) { + return nil +} + +func (t *DefaultAtomTranslator) translateItemCategories(entry *atom.Entry) (categories []string) { + if entry.Categories != nil { + categories = []string{} + for _, c := range entry.Categories { + categories = append(categories, c.Term) + } + } + return +} + +func (t *DefaultAtomTranslator) translateItemEnclosures(entry *atom.Entry) (enclosures []*Enclosure) { + if entry.Links != nil { + enclosures = []*Enclosure{} + for _, e := range entry.Links { + if e.Rel == "enclosure" { + enclosure := &Enclosure{} + enclosure.URL = e.Href + enclosure.Length = e.Length + enclosure.Type = e.Type + enclosures = append(enclosures, enclosure) + } + } + + if len(enclosures) == 0 { + enclosures = nil + } + } + return +} + +func (t *DefaultAtomTranslator) firstLinkWithType(linkType string, links []*atom.Link) *atom.Link { + if links == nil { + return nil + } + + for _, link := range links { + if link.Rel == linkType { + return link + } + } + return nil +} + +func (t *DefaultAtomTranslator) firstPerson(persons []*atom.Person) (person *atom.Person) { + if persons == nil || len(persons) == 0 { + return + } + + person = persons[0] + return +} diff --git a/vendor/github.com/olekukonko/tablewriter/README.md b/vendor/github.com/olekukonko/tablewriter/README.md new file mode 100644 index 0000000..92d71ed --- /dev/null +++ b/vendor/github.com/olekukonko/tablewriter/README.md @@ -0,0 +1,305 @@ +ASCII Table Writer +========= + +[![Build Status](https://travis-ci.org/olekukonko/tablewriter.png?branch=master)](https://travis-ci.org/olekukonko/tablewriter) +[![Total views](https://img.shields.io/sourcegraph/rrc/github.com/olekukonko/tablewriter.svg)](https://sourcegraph.com/github.com/olekukonko/tablewriter) +[![Godoc](https://godoc.org/github.com/olekukonko/tablewriter?status.svg)](https://godoc.org/github.com/olekukonko/tablewriter) + +Generate ASCII table on the fly ... Installation is simple as + + go get github.com/olekukonko/tablewriter + + +#### Features +- Automatic Padding +- Support Multiple Lines +- Supports Alignment +- Support Custom Separators +- Automatic Alignment of numbers & percentage +- Write directly to http , file etc via `io.Writer` +- Read directly from CSV file +- Optional row line via `SetRowLine` +- Normalise table header +- Make CSV Headers optional +- Enable or disable table border +- Set custom footer support +- Optional identical cells merging +- Set custom caption +- Optional reflowing of paragrpahs in multi-line cells. + +#### Example 1 - Basic +```go +data := [][]string{ + []string{"A", "The Good", "500"}, + []string{"B", "The Very very Bad Man", "288"}, + []string{"C", "The Ugly", "120"}, + []string{"D", "The Gopher", "800"}, +} + +table := tablewriter.NewWriter(os.Stdout) +table.SetHeader([]string{"Name", "Sign", "Rating"}) + +for _, v := range data { + table.Append(v) +} +table.Render() // Send output +``` + +##### Output 1 +``` ++------+-----------------------+--------+ +| NAME | SIGN | RATING | ++------+-----------------------+--------+ +| A | The Good | 500 | +| B | The Very very Bad Man | 288 | +| C | The Ugly | 120 | +| D | The Gopher | 800 | ++------+-----------------------+--------+ +``` + +#### Example 2 - Without Border / Footer / Bulk Append +```go +data := [][]string{ + []string{"1/1/2014", "Domain name", "2233", "$10.98"}, + []string{"1/1/2014", "January Hosting", "2233", "$54.95"}, + []string{"1/4/2014", "February Hosting", "2233", "$51.00"}, + []string{"1/4/2014", "February Extra Bandwidth", "2233", "$30.00"}, +} + +table := tablewriter.NewWriter(os.Stdout) +table.SetHeader([]string{"Date", "Description", "CV2", "Amount"}) +table.SetFooter([]string{"", "", "Total", "$146.93"}) // Add Footer +table.SetBorder(false) // Set Border to false +table.AppendBulk(data) // Add Bulk Data +table.Render() +``` + +##### Output 2 +``` + + DATE | DESCRIPTION | CV2 | AMOUNT +-----------+--------------------------+-------+---------- + 1/1/2014 | Domain name | 2233 | $10.98 + 1/1/2014 | January Hosting | 2233 | $54.95 + 1/4/2014 | February Hosting | 2233 | $51.00 + 1/4/2014 | February Extra Bandwidth | 2233 | $30.00 +-----------+--------------------------+-------+---------- + TOTAL | $146 93 + --------+---------- + +``` + + +#### Example 3 - CSV +```go +table, _ := tablewriter.NewCSV(os.Stdout, "testdata/test_info.csv", true) +table.SetAlignment(tablewriter.ALIGN_LEFT) // Set Alignment +table.Render() +``` + +##### Output 3 +``` ++----------+--------------+------+-----+---------+----------------+ +| FIELD | TYPE | NULL | KEY | DEFAULT | EXTRA | ++----------+--------------+------+-----+---------+----------------+ +| user_id | smallint(5) | NO | PRI | NULL | auto_increment | +| username | varchar(10) | NO | | NULL | | +| password | varchar(100) | NO | | NULL | | ++----------+--------------+------+-----+---------+----------------+ +``` + +#### Example 4 - Custom Separator +```go +table, _ := tablewriter.NewCSV(os.Stdout, "testdata/test.csv", true) +table.SetRowLine(true) // Enable row line + +// Change table lines +table.SetCenterSeparator("*") +table.SetColumnSeparator("╪") +table.SetRowSeparator("-") + +table.SetAlignment(tablewriter.ALIGN_LEFT) +table.Render() +``` + +##### Output 4 +``` +*------------*-----------*---------* +╪ FIRST NAME ╪ LAST NAME ╪ SSN ╪ +*------------*-----------*---------* +╪ John ╪ Barry ╪ 123456 ╪ +*------------*-----------*---------* +╪ Kathy ╪ Smith ╪ 687987 ╪ +*------------*-----------*---------* +╪ Bob ╪ McCornick ╪ 3979870 ╪ +*------------*-----------*---------* +``` + +#### Example 5 - Markdown Format +```go +data := [][]string{ + []string{"1/1/2014", "Domain name", "2233", "$10.98"}, + []string{"1/1/2014", "January Hosting", "2233", "$54.95"}, + []string{"1/4/2014", "February Hosting", "2233", "$51.00"}, + []string{"1/4/2014", "February Extra Bandwidth", "2233", "$30.00"}, +} + +table := tablewriter.NewWriter(os.Stdout) +table.SetHeader([]string{"Date", "Description", "CV2", "Amount"}) +table.SetBorders(tablewriter.Border{Left: true, Top: false, Right: true, Bottom: false}) +table.SetCenterSeparator("|") +table.AppendBulk(data) // Add Bulk Data +table.Render() +``` + +##### Output 5 +``` +| DATE | DESCRIPTION | CV2 | AMOUNT | +|----------|--------------------------|------|--------| +| 1/1/2014 | Domain name | 2233 | $10.98 | +| 1/1/2014 | January Hosting | 2233 | $54.95 | +| 1/4/2014 | February Hosting | 2233 | $51.00 | +| 1/4/2014 | February Extra Bandwidth | 2233 | $30.00 | +``` + +#### Example 6 - Identical cells merging +```go +data := [][]string{ + []string{"1/1/2014", "Domain name", "1234", "$10.98"}, + []string{"1/1/2014", "January Hosting", "2345", "$54.95"}, + []string{"1/4/2014", "February Hosting", "3456", "$51.00"}, + []string{"1/4/2014", "February Extra Bandwidth", "4567", "$30.00"}, +} + +table := tablewriter.NewWriter(os.Stdout) +table.SetHeader([]string{"Date", "Description", "CV2", "Amount"}) +table.SetFooter([]string{"", "", "Total", "$146.93"}) +table.SetAutoMergeCells(true) +table.SetRowLine(true) +table.AppendBulk(data) +table.Render() +``` + +##### Output 6 +``` ++----------+--------------------------+-------+---------+ +| DATE | DESCRIPTION | CV2 | AMOUNT | ++----------+--------------------------+-------+---------+ +| 1/1/2014 | Domain name | 1234 | $10.98 | ++ +--------------------------+-------+---------+ +| | January Hosting | 2345 | $54.95 | ++----------+--------------------------+-------+---------+ +| 1/4/2014 | February Hosting | 3456 | $51.00 | ++ +--------------------------+-------+---------+ +| | February Extra Bandwidth | 4567 | $30.00 | ++----------+--------------------------+-------+---------+ +| TOTAL | $146 93 | ++----------+--------------------------+-------+---------+ +``` + + +#### Table with color +```go +data := [][]string{ + []string{"1/1/2014", "Domain name", "2233", "$10.98"}, + []string{"1/1/2014", "January Hosting", "2233", "$54.95"}, + []string{"1/4/2014", "February Hosting", "2233", "$51.00"}, + []string{"1/4/2014", "February Extra Bandwidth", "2233", "$30.00"}, +} + +table := tablewriter.NewWriter(os.Stdout) +table.SetHeader([]string{"Date", "Description", "CV2", "Amount"}) +table.SetFooter([]string{"", "", "Total", "$146.93"}) // Add Footer +table.SetBorder(false) // Set Border to false + +table.SetHeaderColor(tablewriter.Colors{tablewriter.Bold, tablewriter.BgGreenColor}, + tablewriter.Colors{tablewriter.FgHiRedColor, tablewriter.Bold, tablewriter.BgBlackColor}, + tablewriter.Colors{tablewriter.BgRedColor, tablewriter.FgWhiteColor}, + tablewriter.Colors{tablewriter.BgCyanColor, tablewriter.FgWhiteColor}) + +table.SetColumnColor(tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiBlackColor}, + tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiRedColor}, + tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiBlackColor}, + tablewriter.Colors{tablewriter.Bold, tablewriter.FgBlackColor}) + +table.SetFooterColor(tablewriter.Colors{}, tablewriter.Colors{}, + tablewriter.Colors{tablewriter.Bold}, + tablewriter.Colors{tablewriter.FgHiRedColor}) + +table.AppendBulk(data) +table.Render() +``` + +#### Table with color Output +![Table with Color](https://cloud.githubusercontent.com/assets/6460392/21101956/bbc7b356-c0a1-11e6-9f36-dba694746efc.png) + +#### Example 6 - Set table caption +```go +data := [][]string{ + []string{"A", "The Good", "500"}, + []string{"B", "The Very very Bad Man", "288"}, + []string{"C", "The Ugly", "120"}, + []string{"D", "The Gopher", "800"}, +} + +table := tablewriter.NewWriter(os.Stdout) +table.SetHeader([]string{"Name", "Sign", "Rating"}) +table.SetCaption(true, "Movie ratings.") + +for _, v := range data { + table.Append(v) +} +table.Render() // Send output +``` + +Note: Caption text will wrap with total width of rendered table. + +##### Output 6 +``` ++------+-----------------------+--------+ +| NAME | SIGN | RATING | ++------+-----------------------+--------+ +| A | The Good | 500 | +| B | The Very very Bad Man | 288 | +| C | The Ugly | 120 | +| D | The Gopher | 800 | ++------+-----------------------+--------+ +Movie ratings. +``` + +#### Render table into a string + +Instead of rendering the table to `io.Stdout` you can also render it into a string. Go 1.10 introduced the `strings.Builder` type which implements the `io.Writer` interface and can therefore be used for this task. Example: + +```go +package main + +import ( + "strings" + "fmt" + + "github.com/olekukonko/tablewriter" +) + +func main() { + tableString := &strings.Builder{} + table := tablewriter.NewWriter(tableString) + + /* + * Code to fill the table + */ + + table.Render() + + fmt.Println(tableString.String()) +} +``` + +#### TODO +- ~~Import Directly from CSV~~ - `done` +- ~~Support for `SetFooter`~~ - `done` +- ~~Support for `SetBorder`~~ - `done` +- ~~Support table with uneven rows~~ - `done` +- ~~Support custom alignment~~ +- General Improvement & Optimisation +- `NewHTML` Parse table from HTML diff --git a/vendor/github.com/olekukonko/tablewriter/table.go b/vendor/github.com/olekukonko/tablewriter/table.go new file mode 100644 index 0000000..3cf0996 --- /dev/null +++ b/vendor/github.com/olekukonko/tablewriter/table.go @@ -0,0 +1,876 @@ +// Copyright 2014 Oleku Konko All rights reserved. +// Use of this source code is governed by a MIT +// license that can be found in the LICENSE file. + +// This module is a Table Writer API for the Go Programming Language. +// The protocols were written in pure Go and works on windows and unix systems + +// Create & Generate text based table +package tablewriter + +import ( + "bytes" + "fmt" + "io" + "regexp" + "strings" +) + +const ( + MAX_ROW_WIDTH = 30 +) + +const ( + CENTER = "+" + ROW = "-" + COLUMN = "|" + SPACE = " " + NEWLINE = "\n" +) + +const ( + ALIGN_DEFAULT = iota + ALIGN_CENTER + ALIGN_RIGHT + ALIGN_LEFT +) + +var ( + decimal = regexp.MustCompile(`^-?(?:\d{1,3}(?:,\d{3})*|\d+)(?:\.\d+)?$`) + percent = regexp.MustCompile(`^-?\d+\.?\d*$%$`) +) + +type Border struct { + Left bool + Right bool + Top bool + Bottom bool +} + +type Table struct { + out io.Writer + rows [][]string + lines [][][]string + cs map[int]int + rs map[int]int + headers [][]string + footers [][]string + caption bool + captionText string + autoFmt bool + autoWrap bool + reflowText bool + mW int + pCenter string + pRow string + pColumn string + tColumn int + tRow int + hAlign int + fAlign int + align int + newLine string + rowLine bool + autoMergeCells bool + hdrLine bool + borders Border + colSize int + headerParams []string + columnsParams []string + footerParams []string + columnsAlign []int +} + +// Start New Table +// Take io.Writer Directly +func NewWriter(writer io.Writer) *Table { + t := &Table{ + out: writer, + rows: [][]string{}, + lines: [][][]string{}, + cs: make(map[int]int), + rs: make(map[int]int), + headers: [][]string{}, + footers: [][]string{}, + caption: false, + captionText: "Table caption.", + autoFmt: true, + autoWrap: true, + reflowText: true, + mW: MAX_ROW_WIDTH, + pCenter: CENTER, + pRow: ROW, + pColumn: COLUMN, + tColumn: -1, + tRow: -1, + hAlign: ALIGN_DEFAULT, + fAlign: ALIGN_DEFAULT, + align: ALIGN_DEFAULT, + newLine: NEWLINE, + rowLine: false, + hdrLine: true, + borders: Border{Left: true, Right: true, Bottom: true, Top: true}, + colSize: -1, + headerParams: []string{}, + columnsParams: []string{}, + footerParams: []string{}, + columnsAlign: []int{}} + return t +} + +// Render table output +func (t *Table) Render() { + if t.borders.Top { + t.printLine(true) + } + t.printHeading() + if t.autoMergeCells { + t.printRowsMergeCells() + } else { + t.printRows() + } + if !t.rowLine && t.borders.Bottom { + t.printLine(true) + } + t.printFooter() + + if t.caption { + t.printCaption() + } +} + +const ( + headerRowIdx = -1 + footerRowIdx = -2 +) + +// Set table header +func (t *Table) SetHeader(keys []string) { + t.colSize = len(keys) + for i, v := range keys { + lines := t.parseDimension(v, i, headerRowIdx) + t.headers = append(t.headers, lines) + } +} + +// Set table Footer +func (t *Table) SetFooter(keys []string) { + //t.colSize = len(keys) + for i, v := range keys { + lines := t.parseDimension(v, i, footerRowIdx) + t.footers = append(t.footers, lines) + } +} + +// Set table Caption +func (t *Table) SetCaption(caption bool, captionText ...string) { + t.caption = caption + if len(captionText) == 1 { + t.captionText = captionText[0] + } +} + +// Turn header autoformatting on/off. Default is on (true). +func (t *Table) SetAutoFormatHeaders(auto bool) { + t.autoFmt = auto +} + +// Turn automatic multiline text adjustment on/off. Default is on (true). +func (t *Table) SetAutoWrapText(auto bool) { + t.autoWrap = auto +} + +// Turn automatic reflowing of multiline text when rewrapping. Default is on (true). +func (t *Table) SetReflowDuringAutoWrap(auto bool) { + t.reflowText = auto +} + +// Set the Default column width +func (t *Table) SetColWidth(width int) { + t.mW = width +} + +// Set the minimal width for a column +func (t *Table) SetColMinWidth(column int, width int) { + t.cs[column] = width +} + +// Set the Column Separator +func (t *Table) SetColumnSeparator(sep string) { + t.pColumn = sep +} + +// Set the Row Separator +func (t *Table) SetRowSeparator(sep string) { + t.pRow = sep +} + +// Set the center Separator +func (t *Table) SetCenterSeparator(sep string) { + t.pCenter = sep +} + +// Set Header Alignment +func (t *Table) SetHeaderAlignment(hAlign int) { + t.hAlign = hAlign +} + +// Set Footer Alignment +func (t *Table) SetFooterAlignment(fAlign int) { + t.fAlign = fAlign +} + +// Set Table Alignment +func (t *Table) SetAlignment(align int) { + t.align = align +} + +func (t *Table) SetColumnAlignment(keys []int) { + for _, v := range keys { + switch v { + case ALIGN_CENTER: + break + case ALIGN_LEFT: + break + case ALIGN_RIGHT: + break + default: + v = ALIGN_DEFAULT + } + t.columnsAlign = append(t.columnsAlign, v) + } +} + +// Set New Line +func (t *Table) SetNewLine(nl string) { + t.newLine = nl +} + +// Set Header Line +// This would enable / disable a line after the header +func (t *Table) SetHeaderLine(line bool) { + t.hdrLine = line +} + +// Set Row Line +// This would enable / disable a line on each row of the table +func (t *Table) SetRowLine(line bool) { + t.rowLine = line +} + +// Set Auto Merge Cells +// This would enable / disable the merge of cells with identical values +func (t *Table) SetAutoMergeCells(auto bool) { + t.autoMergeCells = auto +} + +// Set Table Border +// This would enable / disable line around the table +func (t *Table) SetBorder(border bool) { + t.SetBorders(Border{border, border, border, border}) +} + +func (t *Table) SetBorders(border Border) { + t.borders = border +} + +// Append row to table +func (t *Table) Append(row []string) { + rowSize := len(t.headers) + if rowSize > t.colSize { + t.colSize = rowSize + } + + n := len(t.lines) + line := [][]string{} + for i, v := range row { + + // Detect string width + // Detect String height + // Break strings into words + out := t.parseDimension(v, i, n) + + // Append broken words + line = append(line, out) + } + t.lines = append(t.lines, line) +} + +// Allow Support for Bulk Append +// Eliminates repeated for loops +func (t *Table) AppendBulk(rows [][]string) { + for _, row := range rows { + t.Append(row) + } +} + +// NumLines to get the number of lines +func (t *Table) NumLines() int { + return len(t.lines) +} + +// Clear rows +func (t *Table) ClearRows() { + t.lines = [][][]string{} +} + +// Clear footer +func (t *Table) ClearFooter() { + t.footers = [][]string{} +} + +// Center based on position and border. +func (t *Table) center(i int) string { + if i == -1 && !t.borders.Left { + return t.pRow + } + + if i == len(t.cs)-1 && !t.borders.Right { + return t.pRow + } + + return t.pCenter +} + +// Print line based on row width +func (t *Table) printLine(nl bool) { + fmt.Fprint(t.out, t.center(-1)) + for i := 0; i < len(t.cs); i++ { + v := t.cs[i] + fmt.Fprintf(t.out, "%s%s%s%s", + t.pRow, + strings.Repeat(string(t.pRow), v), + t.pRow, + t.center(i)) + } + if nl { + fmt.Fprint(t.out, t.newLine) + } +} + +// Print line based on row width with our without cell separator +func (t *Table) printLineOptionalCellSeparators(nl bool, displayCellSeparator []bool) { + fmt.Fprint(t.out, t.pCenter) + for i := 0; i < len(t.cs); i++ { + v := t.cs[i] + if i > len(displayCellSeparator) || displayCellSeparator[i] { + // Display the cell separator + fmt.Fprintf(t.out, "%s%s%s%s", + t.pRow, + strings.Repeat(string(t.pRow), v), + t.pRow, + t.pCenter) + } else { + // Don't display the cell separator for this cell + fmt.Fprintf(t.out, "%s%s", + strings.Repeat(" ", v+2), + t.pCenter) + } + } + if nl { + fmt.Fprint(t.out, t.newLine) + } +} + +// Return the PadRight function if align is left, PadLeft if align is right, +// and Pad by default +func pad(align int) func(string, string, int) string { + padFunc := Pad + switch align { + case ALIGN_LEFT: + padFunc = PadRight + case ALIGN_RIGHT: + padFunc = PadLeft + } + return padFunc +} + +// Print heading information +func (t *Table) printHeading() { + // Check if headers is available + if len(t.headers) < 1 { + return + } + + // Identify last column + end := len(t.cs) - 1 + + // Get pad function + padFunc := pad(t.hAlign) + + // Checking for ANSI escape sequences for header + is_esc_seq := false + if len(t.headerParams) > 0 { + is_esc_seq = true + } + + // Maximum height. + max := t.rs[headerRowIdx] + + // Print Heading + for x := 0; x < max; x++ { + // Check if border is set + // Replace with space if not set + fmt.Fprint(t.out, ConditionString(t.borders.Left, t.pColumn, SPACE)) + + for y := 0; y <= end; y++ { + v := t.cs[y] + h := "" + if y < len(t.headers) && x < len(t.headers[y]) { + h = t.headers[y][x] + } + if t.autoFmt { + h = Title(h) + } + pad := ConditionString((y == end && !t.borders.Left), SPACE, t.pColumn) + + if is_esc_seq { + fmt.Fprintf(t.out, " %s %s", + format(padFunc(h, SPACE, v), + t.headerParams[y]), pad) + } else { + fmt.Fprintf(t.out, " %s %s", + padFunc(h, SPACE, v), + pad) + } + } + // Next line + fmt.Fprint(t.out, t.newLine) + } + if t.hdrLine { + t.printLine(true) + } +} + +// Print heading information +func (t *Table) printFooter() { + // Check if headers is available + if len(t.footers) < 1 { + return + } + + // Only print line if border is not set + if !t.borders.Bottom { + t.printLine(true) + } + + // Identify last column + end := len(t.cs) - 1 + + // Get pad function + padFunc := pad(t.fAlign) + + // Checking for ANSI escape sequences for header + is_esc_seq := false + if len(t.footerParams) > 0 { + is_esc_seq = true + } + + // Maximum height. + max := t.rs[footerRowIdx] + + // Print Footer + erasePad := make([]bool, len(t.footers)) + for x := 0; x < max; x++ { + // Check if border is set + // Replace with space if not set + fmt.Fprint(t.out, ConditionString(t.borders.Bottom, t.pColumn, SPACE)) + + for y := 0; y <= end; y++ { + v := t.cs[y] + f := "" + if y < len(t.footers) && x < len(t.footers[y]) { + f = t.footers[y][x] + } + if t.autoFmt { + f = Title(f) + } + pad := ConditionString((y == end && !t.borders.Top), SPACE, t.pColumn) + + if erasePad[y] || (x == 0 && len(f) == 0) { + pad = SPACE + erasePad[y] = true + } + + if is_esc_seq { + fmt.Fprintf(t.out, " %s %s", + format(padFunc(f, SPACE, v), + t.footerParams[y]), pad) + } else { + fmt.Fprintf(t.out, " %s %s", + padFunc(f, SPACE, v), + pad) + } + + //fmt.Fprintf(t.out, " %s %s", + // padFunc(f, SPACE, v), + // pad) + } + // Next line + fmt.Fprint(t.out, t.newLine) + //t.printLine(true) + } + + hasPrinted := false + + for i := 0; i <= end; i++ { + v := t.cs[i] + pad := t.pRow + center := t.pCenter + length := len(t.footers[i][0]) + + if length > 0 { + hasPrinted = true + } + + // Set center to be space if length is 0 + if length == 0 && !t.borders.Right { + center = SPACE + } + + // Print first junction + if i == 0 { + if length > 0 && !t.borders.Left { + center = t.pRow + } + fmt.Fprint(t.out, center) + } + + // Pad With space of length is 0 + if length == 0 { + pad = SPACE + } + // Ignore left space as it has printed before + if hasPrinted || t.borders.Left { + pad = t.pRow + center = t.pCenter + } + + // Change Center end position + if center != SPACE { + if i == end && !t.borders.Right { + center = t.pRow + } + } + + // Change Center start position + if center == SPACE { + if i < end && len(t.footers[i+1][0]) != 0 { + if !t.borders.Left { + center = t.pRow + } else { + center = t.pCenter + } + } + } + + // Print the footer + fmt.Fprintf(t.out, "%s%s%s%s", + pad, + strings.Repeat(string(pad), v), + pad, + center) + + } + + fmt.Fprint(t.out, t.newLine) +} + +// Print caption text +func (t Table) printCaption() { + width := t.getTableWidth() + paragraph, _ := WrapString(t.captionText, width) + for linecount := 0; linecount < len(paragraph); linecount++ { + fmt.Fprintln(t.out, paragraph[linecount]) + } +} + +// Calculate the total number of characters in a row +func (t Table) getTableWidth() int { + var chars int + for _, v := range t.cs { + chars += v + } + + // Add chars, spaces, seperators to calculate the total width of the table. + // ncols := t.colSize + // spaces := ncols * 2 + // seps := ncols + 1 + + return (chars + (3 * t.colSize) + 2) +} + +func (t Table) printRows() { + for i, lines := range t.lines { + t.printRow(lines, i) + } +} + +func (t *Table) fillAlignment(num int) { + if len(t.columnsAlign) < num { + t.columnsAlign = make([]int, num) + for i := range t.columnsAlign { + t.columnsAlign[i] = t.align + } + } +} + +// Print Row Information +// Adjust column alignment based on type + +func (t *Table) printRow(columns [][]string, rowIdx int) { + // Get Maximum Height + max := t.rs[rowIdx] + total := len(columns) + + // TODO Fix uneven col size + // if total < t.colSize { + // for n := t.colSize - total; n < t.colSize ; n++ { + // columns = append(columns, []string{SPACE}) + // t.cs[n] = t.mW + // } + //} + + // Pad Each Height + pads := []int{} + + // Checking for ANSI escape sequences for columns + is_esc_seq := false + if len(t.columnsParams) > 0 { + is_esc_seq = true + } + t.fillAlignment(total) + + for i, line := range columns { + length := len(line) + pad := max - length + pads = append(pads, pad) + for n := 0; n < pad; n++ { + columns[i] = append(columns[i], " ") + } + } + //fmt.Println(max, "\n") + for x := 0; x < max; x++ { + for y := 0; y < total; y++ { + + // Check if border is set + fmt.Fprint(t.out, ConditionString((!t.borders.Left && y == 0), SPACE, t.pColumn)) + + fmt.Fprintf(t.out, SPACE) + str := columns[y][x] + + // Embedding escape sequence with column value + if is_esc_seq { + str = format(str, t.columnsParams[y]) + } + + // This would print alignment + // Default alignment would use multiple configuration + switch t.columnsAlign[y] { + case ALIGN_CENTER: // + fmt.Fprintf(t.out, "%s", Pad(str, SPACE, t.cs[y])) + case ALIGN_RIGHT: + fmt.Fprintf(t.out, "%s", PadLeft(str, SPACE, t.cs[y])) + case ALIGN_LEFT: + fmt.Fprintf(t.out, "%s", PadRight(str, SPACE, t.cs[y])) + default: + if decimal.MatchString(strings.TrimSpace(str)) || percent.MatchString(strings.TrimSpace(str)) { + fmt.Fprintf(t.out, "%s", PadLeft(str, SPACE, t.cs[y])) + } else { + fmt.Fprintf(t.out, "%s", PadRight(str, SPACE, t.cs[y])) + + // TODO Custom alignment per column + //if max == 1 || pads[y] > 0 { + // fmt.Fprintf(t.out, "%s", Pad(str, SPACE, t.cs[y])) + //} else { + // fmt.Fprintf(t.out, "%s", PadRight(str, SPACE, t.cs[y])) + //} + + } + } + fmt.Fprintf(t.out, SPACE) + } + // Check if border is set + // Replace with space if not set + fmt.Fprint(t.out, ConditionString(t.borders.Left, t.pColumn, SPACE)) + fmt.Fprint(t.out, t.newLine) + } + + if t.rowLine { + t.printLine(true) + } +} + +// Print the rows of the table and merge the cells that are identical +func (t *Table) printRowsMergeCells() { + var previousLine []string + var displayCellBorder []bool + var tmpWriter bytes.Buffer + for i, lines := range t.lines { + // We store the display of the current line in a tmp writer, as we need to know which border needs to be print above + previousLine, displayCellBorder = t.printRowMergeCells(&tmpWriter, lines, i, previousLine) + if i > 0 { //We don't need to print borders above first line + if t.rowLine { + t.printLineOptionalCellSeparators(true, displayCellBorder) + } + } + tmpWriter.WriteTo(t.out) + } + //Print the end of the table + if t.rowLine { + t.printLine(true) + } +} + +// Print Row Information to a writer and merge identical cells. +// Adjust column alignment based on type + +func (t *Table) printRowMergeCells(writer io.Writer, columns [][]string, rowIdx int, previousLine []string) ([]string, []bool) { + // Get Maximum Height + max := t.rs[rowIdx] + total := len(columns) + + // Pad Each Height + pads := []int{} + + // Checking for ANSI escape sequences for columns + is_esc_seq := false + if len(t.columnsParams) > 0 { + is_esc_seq = true + } + for i, line := range columns { + length := len(line) + pad := max - length + pads = append(pads, pad) + for n := 0; n < pad; n++ { + columns[i] = append(columns[i], " ") + } + } + + var displayCellBorder []bool + t.fillAlignment(total) + for x := 0; x < max; x++ { + for y := 0; y < total; y++ { + + // Check if border is set + fmt.Fprint(writer, ConditionString((!t.borders.Left && y == 0), SPACE, t.pColumn)) + + fmt.Fprintf(writer, SPACE) + + str := columns[y][x] + + // Embedding escape sequence with column value + if is_esc_seq { + str = format(str, t.columnsParams[y]) + } + + if t.autoMergeCells { + //Store the full line to merge mutli-lines cells + fullLine := strings.Join(columns[y], " ") + if len(previousLine) > y && fullLine == previousLine[y] && fullLine != "" { + // If this cell is identical to the one above but not empty, we don't display the border and keep the cell empty. + displayCellBorder = append(displayCellBorder, false) + str = "" + } else { + // First line or different content, keep the content and print the cell border + displayCellBorder = append(displayCellBorder, true) + } + } + + // This would print alignment + // Default alignment would use multiple configuration + switch t.columnsAlign[y] { + case ALIGN_CENTER: // + fmt.Fprintf(writer, "%s", Pad(str, SPACE, t.cs[y])) + case ALIGN_RIGHT: + fmt.Fprintf(writer, "%s", PadLeft(str, SPACE, t.cs[y])) + case ALIGN_LEFT: + fmt.Fprintf(writer, "%s", PadRight(str, SPACE, t.cs[y])) + default: + if decimal.MatchString(strings.TrimSpace(str)) || percent.MatchString(strings.TrimSpace(str)) { + fmt.Fprintf(writer, "%s", PadLeft(str, SPACE, t.cs[y])) + } else { + fmt.Fprintf(writer, "%s", PadRight(str, SPACE, t.cs[y])) + } + } + fmt.Fprintf(writer, SPACE) + } + // Check if border is set + // Replace with space if not set + fmt.Fprint(writer, ConditionString(t.borders.Left, t.pColumn, SPACE)) + fmt.Fprint(writer, t.newLine) + } + + //The new previous line is the current one + previousLine = make([]string, total) + for y := 0; y < total; y++ { + previousLine[y] = strings.Join(columns[y], " ") //Store the full line for multi-lines cells + } + //Returns the newly added line and wether or not a border should be displayed above. + return previousLine, displayCellBorder +} + +func (t *Table) parseDimension(str string, colKey, rowKey int) []string { + var ( + raw []string + maxWidth int + ) + + raw = getLines(str) + maxWidth = 0 + for _, line := range raw { + if w := DisplayWidth(line); w > maxWidth { + maxWidth = w + } + } + + // If wrapping, ensure that all paragraphs in the cell fit in the + // specified width. + if t.autoWrap { + // If there's a maximum allowed width for wrapping, use that. + if maxWidth > t.mW { + maxWidth = t.mW + } + + // In the process of doing so, we need to recompute maxWidth. This + // is because perhaps a word in the cell is longer than the + // allowed maximum width in t.mW. + newMaxWidth := maxWidth + newRaw := make([]string, 0, len(raw)) + + if t.reflowText { + // Make a single paragraph of everything. + raw = []string{strings.Join(raw, " ")} + } + for i, para := range raw { + paraLines, _ := WrapString(para, maxWidth) + for _, line := range paraLines { + if w := DisplayWidth(line); w > newMaxWidth { + newMaxWidth = w + } + } + if i > 0 { + newRaw = append(newRaw, " ") + } + newRaw = append(newRaw, paraLines...) + } + raw = newRaw + maxWidth = newMaxWidth + } + + // Store the new known maximum width. + v, ok := t.cs[colKey] + if !ok || v < maxWidth || v == 0 { + t.cs[colKey] = maxWidth + } + + // Remember the number of lines for the row printer. + h := len(raw) + v, ok = t.rs[rowKey] + + if !ok || v < h || v == 0 { + t.rs[rowKey] = h + } + //fmt.Printf("Raw %+v %d\n", raw, len(raw)) + return raw +} diff --git a/vendor/github.com/olekukonko/tablewriter/util.go b/vendor/github.com/olekukonko/tablewriter/util.go new file mode 100644 index 0000000..380e7ab --- /dev/null +++ b/vendor/github.com/olekukonko/tablewriter/util.go @@ -0,0 +1,93 @@ +// Copyright 2014 Oleku Konko All rights reserved. +// Use of this source code is governed by a MIT +// license that can be found in the LICENSE file. + +// This module is a Table Writer API for the Go Programming Language. +// The protocols were written in pure Go and works on windows and unix systems + +package tablewriter + +import ( + "math" + "regexp" + "strings" + + "github.com/mattn/go-runewidth" +) + +var ansi = regexp.MustCompile("\033\\[(?:[0-9]{1,3}(?:;[0-9]{1,3})*)?[m|K]") + +func DisplayWidth(str string) int { + return runewidth.StringWidth(ansi.ReplaceAllLiteralString(str, "")) +} + +// Simple Condition for string +// Returns value based on condition +func ConditionString(cond bool, valid, inValid string) string { + if cond { + return valid + } + return inValid +} + +func isNumOrSpace(r rune) bool { + return ('0' <= r && r <= '9') || r == ' ' +} + +// Format Table Header +// Replace _ , . and spaces +func Title(name string) string { + origLen := len(name) + rs := []rune(name) + for i, r := range rs { + switch r { + case '_': + rs[i] = ' ' + case '.': + // ignore floating number 0.0 + if (i != 0 && !isNumOrSpace(rs[i-1])) || (i != len(rs)-1 && !isNumOrSpace(rs[i+1])) { + rs[i] = ' ' + } + } + } + name = string(rs) + name = strings.TrimSpace(name) + if len(name) == 0 && origLen > 0 { + // Keep at least one character. This is important to preserve + // empty lines in multi-line headers/footers. + name = " " + } + return strings.ToUpper(name) +} + +// Pad String +// Attempts to place string in the center +func Pad(s, pad string, width int) string { + gap := width - DisplayWidth(s) + if gap > 0 { + gapLeft := int(math.Ceil(float64(gap / 2))) + gapRight := gap - gapLeft + return strings.Repeat(string(pad), gapLeft) + s + strings.Repeat(string(pad), gapRight) + } + return s +} + +// Pad String Right position +// This would place string at the left side of the screen +func PadRight(s, pad string, width int) string { + gap := width - DisplayWidth(s) + if gap > 0 { + return s + strings.Repeat(string(pad), gap) + } + return s +} + +// Pad String Left position +// This would place string at the right side of the screen +func PadLeft(s, pad string, width int) string { + gap := width - DisplayWidth(s) + if gap > 0 { + return strings.Repeat(string(pad), gap) + s + } + return s +} diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go new file mode 100644 index 0000000..992cff2 --- /dev/null +++ b/vendor/golang.org/x/net/html/parse.go @@ -0,0 +1,2417 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "errors" + "fmt" + "io" + "strings" + + a "golang.org/x/net/html/atom" +) + +// A parser implements the HTML5 parsing algorithm: +// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction +type parser struct { + // tokenizer provides the tokens for the parser. + tokenizer *Tokenizer + // tok is the most recently read token. + tok Token + // Self-closing tags like
are treated as start tags, except that + // hasSelfClosingToken is set while they are being processed. + hasSelfClosingToken bool + // doc is the document root element. + doc *Node + // The stack of open elements (section 12.2.4.2) and active formatting + // elements (section 12.2.4.3). + oe, afe nodeStack + // Element pointers (section 12.2.4.4). + head, form *Node + // Other parsing state flags (section 12.2.4.5). + scripting, framesetOK bool + // The stack of template insertion modes + templateStack insertionModeStack + // im is the current insertion mode. + im insertionMode + // originalIM is the insertion mode to go back to after completing a text + // or inTableText insertion mode. + originalIM insertionMode + // fosterParenting is whether new elements should be inserted according to + // the foster parenting rules (section 12.2.6.1). + fosterParenting bool + // quirks is whether the parser is operating in "quirks mode." + quirks bool + // fragment is whether the parser is parsing an HTML fragment. + fragment bool + // context is the context element when parsing an HTML fragment + // (section 12.4). + context *Node +} + +func (p *parser) top() *Node { + if n := p.oe.top(); n != nil { + return n + } + return p.doc +} + +// Stop tags for use in popUntil. These come from section 12.2.4.2. +var ( + defaultScopeStopTags = map[string][]a.Atom{ + "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template}, + "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext}, + "svg": {a.Desc, a.ForeignObject, a.Title}, + } +) + +type scope int + +const ( + defaultScope scope = iota + listItemScope + buttonScope + tableScope + tableRowScope + tableBodyScope + selectScope +) + +// popUntil pops the stack of open elements at the highest element whose tag +// is in matchTags, provided there is no higher element in the scope's stop +// tags (as defined in section 12.2.4.2). It returns whether or not there was +// such an element. If there was not, popUntil leaves the stack unchanged. +// +// For example, the set of stop tags for table scope is: "html", "table". If +// the stack was: +// ["html", "body", "font", "table", "b", "i", "u"] +// then popUntil(tableScope, "font") would return false, but +// popUntil(tableScope, "i") would return true and the stack would become: +// ["html", "body", "font", "table", "b"] +// +// If an element's tag is in both the stop tags and matchTags, then the stack +// will be popped and the function returns true (provided, of course, there was +// no higher element in the stack that was also in the stop tags). For example, +// popUntil(tableScope, "table") returns true and leaves: +// ["html", "body", "font"] +func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool { + if i := p.indexOfElementInScope(s, matchTags...); i != -1 { + p.oe = p.oe[:i] + return true + } + return false +} + +// indexOfElementInScope returns the index in p.oe of the highest element whose +// tag is in matchTags that is in scope. If no matching element is in scope, it +// returns -1. +func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int { + for i := len(p.oe) - 1; i >= 0; i-- { + tagAtom := p.oe[i].DataAtom + if p.oe[i].Namespace == "" { + for _, t := range matchTags { + if t == tagAtom { + return i + } + } + switch s { + case defaultScope: + // No-op. + case listItemScope: + if tagAtom == a.Ol || tagAtom == a.Ul { + return -1 + } + case buttonScope: + if tagAtom == a.Button { + return -1 + } + case tableScope: + if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template { + return -1 + } + case selectScope: + if tagAtom != a.Optgroup && tagAtom != a.Option { + return -1 + } + default: + panic("unreachable") + } + } + switch s { + case defaultScope, listItemScope, buttonScope: + for _, t := range defaultScopeStopTags[p.oe[i].Namespace] { + if t == tagAtom { + return -1 + } + } + } + } + return -1 +} + +// elementInScope is like popUntil, except that it doesn't modify the stack of +// open elements. +func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool { + return p.indexOfElementInScope(s, matchTags...) != -1 +} + +// clearStackToContext pops elements off the stack of open elements until a +// scope-defined element is found. +func (p *parser) clearStackToContext(s scope) { + for i := len(p.oe) - 1; i >= 0; i-- { + tagAtom := p.oe[i].DataAtom + switch s { + case tableScope: + if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template { + p.oe = p.oe[:i+1] + return + } + case tableRowScope: + if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template { + p.oe = p.oe[:i+1] + return + } + case tableBodyScope: + if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template { + p.oe = p.oe[:i+1] + return + } + default: + panic("unreachable") + } + } +} + +// generateImpliedEndTags pops nodes off the stack of open elements as long as +// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. +// If exceptions are specified, nodes with that name will not be popped off. +func (p *parser) generateImpliedEndTags(exceptions ...string) { + var i int +loop: + for i = len(p.oe) - 1; i >= 0; i-- { + n := p.oe[i] + if n.Type == ElementNode { + switch n.DataAtom { + case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: + for _, except := range exceptions { + if n.Data == except { + break loop + } + } + continue + } + } + break + } + + p.oe = p.oe[:i+1] +} + +// addChild adds a child node n to the top element, and pushes n onto the stack +// of open elements if it is an element node. +func (p *parser) addChild(n *Node) { + if p.shouldFosterParent() { + p.fosterParent(n) + } else { + p.top().AppendChild(n) + } + + if n.Type == ElementNode { + p.oe = append(p.oe, n) + } +} + +// shouldFosterParent returns whether the next node to be added should be +// foster parented. +func (p *parser) shouldFosterParent() bool { + if p.fosterParenting { + switch p.top().DataAtom { + case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: + return true + } + } + return false +} + +// fosterParent adds a child node according to the foster parenting rules. +// Section 12.2.6.1, "foster parenting". +func (p *parser) fosterParent(n *Node) { + var table, parent, prev, template *Node + var i int + for i = len(p.oe) - 1; i >= 0; i-- { + if p.oe[i].DataAtom == a.Table { + table = p.oe[i] + break + } + } + + var j int + for j = len(p.oe) - 1; j >= 0; j-- { + if p.oe[j].DataAtom == a.Template { + template = p.oe[j] + break + } + } + + if template != nil && (table == nil || j > i) { + template.AppendChild(n) + return + } + + if table == nil { + // The foster parent is the html element. + parent = p.oe[0] + } else { + parent = table.Parent + } + if parent == nil { + parent = p.oe[i-1] + } + + if table != nil { + prev = table.PrevSibling + } else { + prev = parent.LastChild + } + if prev != nil && prev.Type == TextNode && n.Type == TextNode { + prev.Data += n.Data + return + } + + parent.InsertBefore(n, table) +} + +// addText adds text to the preceding node if it is a text node, or else it +// calls addChild with a new text node. +func (p *parser) addText(text string) { + if text == "" { + return + } + + if p.shouldFosterParent() { + p.fosterParent(&Node{ + Type: TextNode, + Data: text, + }) + return + } + + t := p.top() + if n := t.LastChild; n != nil && n.Type == TextNode { + n.Data += text + return + } + p.addChild(&Node{ + Type: TextNode, + Data: text, + }) +} + +// addElement adds a child element based on the current token. +func (p *parser) addElement() { + p.addChild(&Node{ + Type: ElementNode, + DataAtom: p.tok.DataAtom, + Data: p.tok.Data, + Attr: p.tok.Attr, + }) +} + +// Section 12.2.4.3. +func (p *parser) addFormattingElement() { + tagAtom, attr := p.tok.DataAtom, p.tok.Attr + p.addElement() + + // Implement the Noah's Ark clause, but with three per family instead of two. + identicalElements := 0 +findIdenticalElements: + for i := len(p.afe) - 1; i >= 0; i-- { + n := p.afe[i] + if n.Type == scopeMarkerNode { + break + } + if n.Type != ElementNode { + continue + } + if n.Namespace != "" { + continue + } + if n.DataAtom != tagAtom { + continue + } + if len(n.Attr) != len(attr) { + continue + } + compareAttributes: + for _, t0 := range n.Attr { + for _, t1 := range attr { + if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val { + // Found a match for this attribute, continue with the next attribute. + continue compareAttributes + } + } + // If we get here, there is no attribute that matches a. + // Therefore the element is not identical to the new one. + continue findIdenticalElements + } + + identicalElements++ + if identicalElements >= 3 { + p.afe.remove(n) + } + } + + p.afe = append(p.afe, p.top()) +} + +// Section 12.2.4.3. +func (p *parser) clearActiveFormattingElements() { + for { + n := p.afe.pop() + if len(p.afe) == 0 || n.Type == scopeMarkerNode { + return + } + } +} + +// Section 12.2.4.3. +func (p *parser) reconstructActiveFormattingElements() { + n := p.afe.top() + if n == nil { + return + } + if n.Type == scopeMarkerNode || p.oe.index(n) != -1 { + return + } + i := len(p.afe) - 1 + for n.Type != scopeMarkerNode && p.oe.index(n) == -1 { + if i == 0 { + i = -1 + break + } + i-- + n = p.afe[i] + } + for { + i++ + clone := p.afe[i].clone() + p.addChild(clone) + p.afe[i] = clone + if i == len(p.afe)-1 { + break + } + } +} + +// Section 12.2.5. +func (p *parser) acknowledgeSelfClosingTag() { + p.hasSelfClosingToken = false +} + +// An insertion mode (section 12.2.4.1) is the state transition function from +// a particular state in the HTML5 parser's state machine. It updates the +// parser's fields depending on parser.tok (where ErrorToken means EOF). +// It returns whether the token was consumed. +type insertionMode func(*parser) bool + +// setOriginalIM sets the insertion mode to return to after completing a text or +// inTableText insertion mode. +// Section 12.2.4.1, "using the rules for". +func (p *parser) setOriginalIM() { + if p.originalIM != nil { + panic("html: bad parser state: originalIM was set twice") + } + p.originalIM = p.im +} + +// Section 12.2.4.1, "reset the insertion mode". +func (p *parser) resetInsertionMode() { + for i := len(p.oe) - 1; i >= 0; i-- { + n := p.oe[i] + last := i == 0 + if last && p.context != nil { + n = p.context + } + + switch n.DataAtom { + case a.Select: + if !last { + for ancestor, first := n, p.oe[0]; ancestor != first; { + ancestor = p.oe[p.oe.index(ancestor)-1] + switch ancestor.DataAtom { + case a.Template: + p.im = inSelectIM + return + case a.Table: + p.im = inSelectInTableIM + return + } + } + } + p.im = inSelectIM + case a.Td, a.Th: + // TODO: remove this divergence from the HTML5 spec. + // + // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 + p.im = inCellIM + case a.Tr: + p.im = inRowIM + case a.Tbody, a.Thead, a.Tfoot: + p.im = inTableBodyIM + case a.Caption: + p.im = inCaptionIM + case a.Colgroup: + p.im = inColumnGroupIM + case a.Table: + p.im = inTableIM + case a.Template: + // TODO: remove this divergence from the HTML5 spec. + if n.Namespace != "" { + continue + } + p.im = p.templateStack.top() + case a.Head: + // TODO: remove this divergence from the HTML5 spec. + // + // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 + p.im = inHeadIM + case a.Body: + p.im = inBodyIM + case a.Frameset: + p.im = inFramesetIM + case a.Html: + if p.head == nil { + p.im = beforeHeadIM + } else { + p.im = afterHeadIM + } + default: + if last { + p.im = inBodyIM + return + } + continue + } + return + } +} + +const whitespace = " \t\r\n\f" + +// Section 12.2.6.4.1. +func initialIM(p *parser) bool { + switch p.tok.Type { + case TextToken: + p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) + if len(p.tok.Data) == 0 { + // It was all whitespace, so ignore it. + return true + } + case CommentToken: + p.doc.AppendChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + n, quirks := parseDoctype(p.tok.Data) + p.doc.AppendChild(n) + p.quirks = quirks + p.im = beforeHTMLIM + return true + } + p.quirks = true + p.im = beforeHTMLIM + return false +} + +// Section 12.2.6.4.2. +func beforeHTMLIM(p *parser) bool { + switch p.tok.Type { + case DoctypeToken: + // Ignore the token. + return true + case TextToken: + p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) + if len(p.tok.Data) == 0 { + // It was all whitespace, so ignore it. + return true + } + case StartTagToken: + if p.tok.DataAtom == a.Html { + p.addElement() + p.im = beforeHeadIM + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Head, a.Body, a.Html, a.Br: + p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) + return false + default: + // Ignore the token. + return true + } + case CommentToken: + p.doc.AppendChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + } + p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) + return false +} + +// Section 12.2.6.4.3. +func beforeHeadIM(p *parser) bool { + switch p.tok.Type { + case TextToken: + p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) + if len(p.tok.Data) == 0 { + // It was all whitespace, so ignore it. + return true + } + case StartTagToken: + switch p.tok.DataAtom { + case a.Head: + p.addElement() + p.head = p.top() + p.im = inHeadIM + return true + case a.Html: + return inBodyIM(p) + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Head, a.Body, a.Html, a.Br: + p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) + return false + default: + // Ignore the token. + return true + } + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + // Ignore the token. + return true + } + + p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) + return false +} + +// Section 12.2.6.4.4. +func inHeadIM(p *parser) bool { + switch p.tok.Type { + case TextToken: + s := strings.TrimLeft(p.tok.Data, whitespace) + if len(s) < len(p.tok.Data) { + // Add the initial whitespace to the current node. + p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) + if s == "" { + return true + } + p.tok.Data = s + } + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta: + p.addElement() + p.oe.pop() + p.acknowledgeSelfClosingTag() + return true + case a.Noscript: + p.addElement() + if p.scripting { + p.setOriginalIM() + p.im = textIM + } else { + p.im = inHeadNoscriptIM + } + return true + case a.Script, a.Title, a.Noframes, a.Style: + p.addElement() + p.setOriginalIM() + p.im = textIM + return true + case a.Head: + // Ignore the token. + return true + case a.Template: + p.addElement() + p.afe = append(p.afe, &scopeMarker) + p.framesetOK = false + p.im = inTemplateIM + p.templateStack = append(p.templateStack, inTemplateIM) + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Head: + p.oe.pop() + p.im = afterHeadIM + return true + case a.Body, a.Html, a.Br: + p.parseImpliedToken(EndTagToken, a.Head, a.Head.String()) + return false + case a.Template: + if !p.oe.contains(a.Template) { + return true + } + // TODO: remove this divergence from the HTML5 spec. + // + // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 + p.generateImpliedEndTags() + for i := len(p.oe) - 1; i >= 0; i-- { + if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template { + p.oe = p.oe[:i] + break + } + } + p.clearActiveFormattingElements() + p.templateStack.pop() + p.resetInsertionMode() + return true + default: + // Ignore the token. + return true + } + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + // Ignore the token. + return true + } + + p.parseImpliedToken(EndTagToken, a.Head, a.Head.String()) + return false +} + +// 12.2.6.4.5. +func inHeadNoscriptIM(p *parser) bool { + switch p.tok.Type { + case DoctypeToken: + // Ignore the token. + return true + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style: + return inHeadIM(p) + case a.Head, a.Noscript: + // Ignore the token. + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Noscript, a.Br: + default: + // Ignore the token. + return true + } + case TextToken: + s := strings.TrimLeft(p.tok.Data, whitespace) + if len(s) == 0 { + // It was all whitespace. + return inHeadIM(p) + } + case CommentToken: + return inHeadIM(p) + } + p.oe.pop() + if p.top().DataAtom != a.Head { + panic("html: the new current node will be a head element.") + } + p.im = inHeadIM + if p.tok.DataAtom == a.Noscript { + return true + } + return false +} + +// Section 12.2.6.4.6. +func afterHeadIM(p *parser) bool { + switch p.tok.Type { + case TextToken: + s := strings.TrimLeft(p.tok.Data, whitespace) + if len(s) < len(p.tok.Data) { + // Add the initial whitespace to the current node. + p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) + if s == "" { + return true + } + p.tok.Data = s + } + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Body: + p.addElement() + p.framesetOK = false + p.im = inBodyIM + return true + case a.Frameset: + p.addElement() + p.im = inFramesetIM + return true + case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: + p.oe = append(p.oe, p.head) + defer p.oe.remove(p.head) + return inHeadIM(p) + case a.Head: + // Ignore the token. + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Body, a.Html, a.Br: + // Drop down to creating an implied tag. + case a.Template: + return inHeadIM(p) + default: + // Ignore the token. + return true + } + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + // Ignore the token. + return true + } + + p.parseImpliedToken(StartTagToken, a.Body, a.Body.String()) + p.framesetOK = true + return false +} + +// copyAttributes copies attributes of src not found on dst to dst. +func copyAttributes(dst *Node, src Token) { + if len(src.Attr) == 0 { + return + } + attr := map[string]string{} + for _, t := range dst.Attr { + attr[t.Key] = t.Val + } + for _, t := range src.Attr { + if _, ok := attr[t.Key]; !ok { + dst.Attr = append(dst.Attr, t) + attr[t.Key] = t.Val + } + } +} + +// Section 12.2.6.4.7. +func inBodyIM(p *parser) bool { + switch p.tok.Type { + case TextToken: + d := p.tok.Data + switch n := p.oe.top(); n.DataAtom { + case a.Pre, a.Listing: + if n.FirstChild == nil { + // Ignore a newline at the start of a
 block.
+				if d != "" && d[0] == '\r' {
+					d = d[1:]
+				}
+				if d != "" && d[0] == '\n' {
+					d = d[1:]
+				}
+			}
+		}
+		d = strings.Replace(d, "\x00", "", -1)
+		if d == "" {
+			return true
+		}
+		p.reconstructActiveFormattingElements()
+		p.addText(d)
+		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
+			// There were non-whitespace characters inserted.
+			p.framesetOK = false
+		}
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Html:
+			if p.oe.contains(a.Template) {
+				return true
+			}
+			copyAttributes(p.oe[0], p.tok)
+		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
+			return inHeadIM(p)
+		case a.Body:
+			if p.oe.contains(a.Template) {
+				return true
+			}
+			if len(p.oe) >= 2 {
+				body := p.oe[1]
+				if body.Type == ElementNode && body.DataAtom == a.Body {
+					p.framesetOK = false
+					copyAttributes(body, p.tok)
+				}
+			}
+		case a.Frameset:
+			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
+				// Ignore the token.
+				return true
+			}
+			body := p.oe[1]
+			if body.Parent != nil {
+				body.Parent.RemoveChild(body)
+			}
+			p.oe = p.oe[:1]
+			p.addElement()
+			p.im = inFramesetIM
+			return true
+		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+			p.popUntil(buttonScope, a.P)
+			switch n := p.top(); n.DataAtom {
+			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+				p.oe.pop()
+			}
+			p.addElement()
+		case a.Pre, a.Listing:
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+			// The newline, if any, will be dealt with by the TextToken case.
+			p.framesetOK = false
+		case a.Form:
+			if p.form != nil && !p.oe.contains(a.Template) {
+				// Ignore the token
+				return true
+			}
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+			if !p.oe.contains(a.Template) {
+				p.form = p.top()
+			}
+		case a.Li:
+			p.framesetOK = false
+			for i := len(p.oe) - 1; i >= 0; i-- {
+				node := p.oe[i]
+				switch node.DataAtom {
+				case a.Li:
+					p.oe = p.oe[:i]
+				case a.Address, a.Div, a.P:
+					continue
+				default:
+					if !isSpecialElement(node) {
+						continue
+					}
+				}
+				break
+			}
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+		case a.Dd, a.Dt:
+			p.framesetOK = false
+			for i := len(p.oe) - 1; i >= 0; i-- {
+				node := p.oe[i]
+				switch node.DataAtom {
+				case a.Dd, a.Dt:
+					p.oe = p.oe[:i]
+				case a.Address, a.Div, a.P:
+					continue
+				default:
+					if !isSpecialElement(node) {
+						continue
+					}
+				}
+				break
+			}
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+		case a.Plaintext:
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+		case a.Button:
+			p.popUntil(defaultScope, a.Button)
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+			p.framesetOK = false
+		case a.A:
+			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
+				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
+					p.inBodyEndTagFormatting(a.A, "a")
+					p.oe.remove(n)
+					p.afe.remove(n)
+					break
+				}
+			}
+			p.reconstructActiveFormattingElements()
+			p.addFormattingElement()
+		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
+			p.reconstructActiveFormattingElements()
+			p.addFormattingElement()
+		case a.Nobr:
+			p.reconstructActiveFormattingElements()
+			if p.elementInScope(defaultScope, a.Nobr) {
+				p.inBodyEndTagFormatting(a.Nobr, "nobr")
+				p.reconstructActiveFormattingElements()
+			}
+			p.addFormattingElement()
+		case a.Applet, a.Marquee, a.Object:
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+			p.afe = append(p.afe, &scopeMarker)
+			p.framesetOK = false
+		case a.Table:
+			if !p.quirks {
+				p.popUntil(buttonScope, a.P)
+			}
+			p.addElement()
+			p.framesetOK = false
+			p.im = inTableIM
+			return true
+		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+			p.oe.pop()
+			p.acknowledgeSelfClosingTag()
+			if p.tok.DataAtom == a.Input {
+				for _, t := range p.tok.Attr {
+					if t.Key == "type" {
+						if strings.ToLower(t.Val) == "hidden" {
+							// Skip setting framesetOK = false
+							return true
+						}
+					}
+				}
+			}
+			p.framesetOK = false
+		case a.Param, a.Source, a.Track:
+			p.addElement()
+			p.oe.pop()
+			p.acknowledgeSelfClosingTag()
+		case a.Hr:
+			p.popUntil(buttonScope, a.P)
+			p.addElement()
+			p.oe.pop()
+			p.acknowledgeSelfClosingTag()
+			p.framesetOK = false
+		case a.Image:
+			p.tok.DataAtom = a.Img
+			p.tok.Data = a.Img.String()
+			return false
+		case a.Isindex:
+			if p.form != nil {
+				// Ignore the token.
+				return true
+			}
+			action := ""
+			prompt := "This is a searchable index. Enter search keywords: "
+			attr := []Attribute{{Key: "name", Val: "isindex"}}
+			for _, t := range p.tok.Attr {
+				switch t.Key {
+				case "action":
+					action = t.Val
+				case "name":
+					// Ignore the attribute.
+				case "prompt":
+					prompt = t.Val
+				default:
+					attr = append(attr, t)
+				}
+			}
+			p.acknowledgeSelfClosingTag()
+			p.popUntil(buttonScope, a.P)
+			p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
+			if p.form == nil {
+				// NOTE: The 'isindex' element has been removed,
+				// and the 'template' element has not been designed to be
+				// collaborative with the index element.
+				//
+				// Ignore the token.
+				return true
+			}
+			if action != "" {
+				p.form.Attr = []Attribute{{Key: "action", Val: action}}
+			}
+			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
+			p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
+			p.addText(prompt)
+			p.addChild(&Node{
+				Type:     ElementNode,
+				DataAtom: a.Input,
+				Data:     a.Input.String(),
+				Attr:     attr,
+			})
+			p.oe.pop()
+			p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
+			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
+			p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
+		case a.Textarea:
+			p.addElement()
+			p.setOriginalIM()
+			p.framesetOK = false
+			p.im = textIM
+		case a.Xmp:
+			p.popUntil(buttonScope, a.P)
+			p.reconstructActiveFormattingElements()
+			p.framesetOK = false
+			p.addElement()
+			p.setOriginalIM()
+			p.im = textIM
+		case a.Iframe:
+			p.framesetOK = false
+			p.addElement()
+			p.setOriginalIM()
+			p.im = textIM
+		case a.Noembed, a.Noscript:
+			p.addElement()
+			p.setOriginalIM()
+			p.im = textIM
+		case a.Select:
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+			p.framesetOK = false
+			p.im = inSelectIM
+			return true
+		case a.Optgroup, a.Option:
+			if p.top().DataAtom == a.Option {
+				p.oe.pop()
+			}
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+		case a.Rb, a.Rtc:
+			if p.elementInScope(defaultScope, a.Ruby) {
+				p.generateImpliedEndTags()
+			}
+			p.addElement()
+		case a.Rp, a.Rt:
+			if p.elementInScope(defaultScope, a.Ruby) {
+				p.generateImpliedEndTags("rtc")
+			}
+			p.addElement()
+		case a.Math, a.Svg:
+			p.reconstructActiveFormattingElements()
+			if p.tok.DataAtom == a.Math {
+				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
+			} else {
+				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
+			}
+			adjustForeignAttributes(p.tok.Attr)
+			p.addElement()
+			p.top().Namespace = p.tok.Data
+			if p.hasSelfClosingToken {
+				p.oe.pop()
+				p.acknowledgeSelfClosingTag()
+			}
+			return true
+		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
+			// Ignore the token.
+		default:
+			p.reconstructActiveFormattingElements()
+			p.addElement()
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Body:
+			if p.elementInScope(defaultScope, a.Body) {
+				p.im = afterBodyIM
+			}
+		case a.Html:
+			if p.elementInScope(defaultScope, a.Body) {
+				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
+				return false
+			}
+			return true
+		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
+			p.popUntil(defaultScope, p.tok.DataAtom)
+		case a.Form:
+			if p.oe.contains(a.Template) {
+				i := p.indexOfElementInScope(defaultScope, a.Form)
+				if i == -1 {
+					// Ignore the token.
+					return true
+				}
+				p.generateImpliedEndTags()
+				if p.oe[i].DataAtom != a.Form {
+					// Ignore the token.
+					return true
+				}
+				p.popUntil(defaultScope, a.Form)
+			} else {
+				node := p.form
+				p.form = nil
+				i := p.indexOfElementInScope(defaultScope, a.Form)
+				if node == nil || i == -1 || p.oe[i] != node {
+					// Ignore the token.
+					return true
+				}
+				p.generateImpliedEndTags()
+				p.oe.remove(node)
+			}
+		case a.P:
+			if !p.elementInScope(buttonScope, a.P) {
+				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
+			}
+			p.popUntil(buttonScope, a.P)
+		case a.Li:
+			p.popUntil(listItemScope, a.Li)
+		case a.Dd, a.Dt:
+			p.popUntil(defaultScope, p.tok.DataAtom)
+		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
+		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
+			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
+		case a.Applet, a.Marquee, a.Object:
+			if p.popUntil(defaultScope, p.tok.DataAtom) {
+				p.clearActiveFormattingElements()
+			}
+		case a.Br:
+			p.tok.Type = StartTagToken
+			return false
+		case a.Template:
+			return inHeadIM(p)
+		default:
+			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
+		}
+	case CommentToken:
+		p.addChild(&Node{
+			Type: CommentNode,
+			Data: p.tok.Data,
+		})
+	case ErrorToken:
+		// TODO: remove this divergence from the HTML5 spec.
+		if len(p.templateStack) > 0 {
+			p.im = inTemplateIM
+			return false
+		} else {
+			for _, e := range p.oe {
+				switch e.DataAtom {
+				case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
+					a.Thead, a.Tr, a.Body, a.Html:
+				default:
+					return true
+				}
+			}
+		}
+	}
+
+	return true
+}
+
+func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
+	// This is the "adoption agency" algorithm, described at
+	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
+
+	// TODO: this is a fairly literal line-by-line translation of that algorithm.
+	// Once the code successfully parses the comprehensive test suite, we should
+	// refactor this code to be more idiomatic.
+
+	// Steps 1-4. The outer loop.
+	for i := 0; i < 8; i++ {
+		// Step 5. Find the formatting element.
+		var formattingElement *Node
+		for j := len(p.afe) - 1; j >= 0; j-- {
+			if p.afe[j].Type == scopeMarkerNode {
+				break
+			}
+			if p.afe[j].DataAtom == tagAtom {
+				formattingElement = p.afe[j]
+				break
+			}
+		}
+		if formattingElement == nil {
+			p.inBodyEndTagOther(tagAtom, tagName)
+			return
+		}
+		feIndex := p.oe.index(formattingElement)
+		if feIndex == -1 {
+			p.afe.remove(formattingElement)
+			return
+		}
+		if !p.elementInScope(defaultScope, tagAtom) {
+			// Ignore the tag.
+			return
+		}
+
+		// Steps 9-10. Find the furthest block.
+		var furthestBlock *Node
+		for _, e := range p.oe[feIndex:] {
+			if isSpecialElement(e) {
+				furthestBlock = e
+				break
+			}
+		}
+		if furthestBlock == nil {
+			e := p.oe.pop()
+			for e != formattingElement {
+				e = p.oe.pop()
+			}
+			p.afe.remove(e)
+			return
+		}
+
+		// Steps 11-12. Find the common ancestor and bookmark node.
+		commonAncestor := p.oe[feIndex-1]
+		bookmark := p.afe.index(formattingElement)
+
+		// Step 13. The inner loop. Find the lastNode to reparent.
+		lastNode := furthestBlock
+		node := furthestBlock
+		x := p.oe.index(node)
+		// Steps 13.1-13.2
+		for j := 0; j < 3; j++ {
+			// Step 13.3.
+			x--
+			node = p.oe[x]
+			// Step 13.4 - 13.5.
+			if p.afe.index(node) == -1 {
+				p.oe.remove(node)
+				continue
+			}
+			// Step 13.6.
+			if node == formattingElement {
+				break
+			}
+			// Step 13.7.
+			clone := node.clone()
+			p.afe[p.afe.index(node)] = clone
+			p.oe[p.oe.index(node)] = clone
+			node = clone
+			// Step 13.8.
+			if lastNode == furthestBlock {
+				bookmark = p.afe.index(node) + 1
+			}
+			// Step 13.9.
+			if lastNode.Parent != nil {
+				lastNode.Parent.RemoveChild(lastNode)
+			}
+			node.AppendChild(lastNode)
+			// Step 13.10.
+			lastNode = node
+		}
+
+		// Step 14. Reparent lastNode to the common ancestor,
+		// or for misnested table nodes, to the foster parent.
+		if lastNode.Parent != nil {
+			lastNode.Parent.RemoveChild(lastNode)
+		}
+		switch commonAncestor.DataAtom {
+		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+			p.fosterParent(lastNode)
+		default:
+			commonAncestor.AppendChild(lastNode)
+		}
+
+		// Steps 15-17. Reparent nodes from the furthest block's children
+		// to a clone of the formatting element.
+		clone := formattingElement.clone()
+		reparentChildren(clone, furthestBlock)
+		furthestBlock.AppendChild(clone)
+
+		// Step 18. Fix up the list of active formatting elements.
+		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
+			// Move the bookmark with the rest of the list.
+			bookmark--
+		}
+		p.afe.remove(formattingElement)
+		p.afe.insert(bookmark, clone)
+
+		// Step 19. Fix up the stack of open elements.
+		p.oe.remove(formattingElement)
+		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
+	}
+}
+
+// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
+// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
+// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
+func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
+	for i := len(p.oe) - 1; i >= 0; i-- {
+		// Two element nodes have the same tag if they have the same Data (a
+		// string-typed field). As an optimization, for common HTML tags, each
+		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
+		// field), since integer comparison is faster than string comparison.
+		// Uncommon (custom) tags get a zero DataAtom.
+		//
+		// The if condition here is equivalent to (p.oe[i].Data == tagName).
+		if (p.oe[i].DataAtom == tagAtom) &&
+			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
+			p.oe = p.oe[:i]
+			break
+		}
+		if isSpecialElement(p.oe[i]) {
+			break
+		}
+	}
+}
+
+// Section 12.2.6.4.8.
+func textIM(p *parser) bool {
+	switch p.tok.Type {
+	case ErrorToken:
+		p.oe.pop()
+	case TextToken:
+		d := p.tok.Data
+		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
+			// Ignore a newline at the start of a