Update vendored dependencies.

This commit is contained in:
Martin Dosch 2020-05-14 16:07:09 +02:00
parent 48690e232f
commit d6f44d987e
29 changed files with 1240 additions and 488 deletions

9
go.mod
View file

@ -4,14 +4,15 @@ go 1.14
require ( require (
github.com/PuerkitoBio/goquery v1.5.1 // indirect github.com/PuerkitoBio/goquery v1.5.1 // indirect
github.com/andybalholm/cascadia v1.2.0 // indirect
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9 github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9
github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195 github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7
github.com/mattn/go-runewidth v0.0.9 // indirect
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2 github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2
github.com/mmcdole/gofeed v1.0.0-beta2 github.com/mmcdole/gofeed v1.0.0
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf // indirect
github.com/olekukonko/tablewriter v0.0.4 // indirect github.com/olekukonko/tablewriter v0.0.4 // indirect
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
github.com/stretchr/testify v1.5.1 // indirect github.com/stretchr/testify v1.5.1 // indirect
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e // indirect golang.org/x/net v0.0.0-20200513185701-a91f0712d120 // indirect
golang.org/x/text v0.3.2 // indirect golang.org/x/text v0.3.2 // indirect
) )

24
go.sum
View file

@ -1,19 +1,28 @@
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9 h1:z0uK8UQqjMVYzvk4tiiu3obv2B44+XBsvgEJREQfnO8= github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9 h1:z0uK8UQqjMVYzvk4tiiu3obv2B44+XBsvgEJREQfnO8=
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9/go.mod h1:Jl2neWsQaDanWORdqZ4emBl50J4/aRBBS4FyyG9/PFo= github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9/go.mod h1:Jl2neWsQaDanWORdqZ4emBl50J4/aRBBS4FyyG9/PFo=
github.com/codegangsta/cli v1.20.0/go.mod h1:/qJNoX69yVSKu5o4jLyXAENLRyk1uhi7zkbQ3slBdOA=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195 h1:j0UEFmS7wSjAwKEIkgKBn8PRDfjcuggzr93R9wk53nQ= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 h1:g0fAGBisHaEQ0TRq1iBvemFRf+8AEWEmBESSiWB3Vsc=
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54= github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54=
github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2 h1:F544zRtDc/pMpFNHN46oeXV2jIAG4DoMH+6zlVSn0Q8= github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2 h1:F544zRtDc/pMpFNHN46oeXV2jIAG4DoMH+6zlVSn0Q8=
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2/go.mod h1:Cs5mF0OsrRRmhkyOod//ldNPOwJsrBvJ+1WRspv0xoc= github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2/go.mod h1:Cs5mF0OsrRRmhkyOod//ldNPOwJsrBvJ+1WRspv0xoc=
github.com/mmcdole/gofeed v1.0.0-beta2 h1:CjQ0ADhAwNSb08zknAkGOEYqr8zfZKfrzgk9BxpWP2E= github.com/mmcdole/gofeed v1.0.0 h1:PHqwr8fsEm8xarj9s53XeEAFYhRM3E9Ib7Ie766/LTE=
github.com/mmcdole/gofeed v1.0.0-beta2/go.mod h1:/BF9JneEL2/flujm8XHoxUcghdTV6vvb3xx/vKyChFU= github.com/mmcdole/gofeed v1.0.0/go.mod h1:tkVcyzS3qVMlQrQxJoEH1hkTiuo9a8emDzkMi7TZBu0=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI= github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8= github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8= github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8=
@ -23,13 +32,16 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo= github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM= github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k= golang.org/x/net v0.0.0-20200513185701-a91f0712d120 h1:EZ3cVSzKOlJxAd8e8YAJ7no8nNypTxexh/YE/xW3ZEY=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

View file

@ -1,3 +1,5 @@
module "github.com/andybalholm/cascadia" module github.com/andybalholm/cascadia
require "golang.org/x/net" v0.0.0-20180218175443-cbe0f9307d01 require golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01
go 1.13

View file

@ -13,6 +13,10 @@ import (
type parser struct { type parser struct {
s string // the source text s string // the source text
i int // the current position i int // the current position
// if `false`, parsing a pseudo-element
// returns an error.
acceptPseudoElements bool
} }
// parseEscape parses a backslash escape. // parseEscape parses a backslash escape.
@ -29,7 +33,7 @@ func (p *parser) parseEscape() (result string, err error) {
case hexDigit(c): case hexDigit(c):
// unicode escape (hex) // unicode escape (hex)
var i int var i int
for i = start; i < p.i+6 && i < len(p.s) && hexDigit(p.s[i]); i++ { for i = start; i < start+6 && i < len(p.s) && hexDigit(p.s[i]); i++ {
// empty // empty
} }
v, _ := strconv.ParseUint(p.s[start:i], 16, 21) v, _ := strconv.ParseUint(p.s[start:i], 16, 21)
@ -422,17 +426,25 @@ var errExpectedParenthesis = errors.New("expected '(' but didn't find it")
var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it") var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
var errUnmatchedParenthesis = errors.New("unmatched '('") var errUnmatchedParenthesis = errors.New("unmatched '('")
// parsePseudoclassSelector parses a pseudoclass selector like :not(p) // parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element
func (p *parser) parsePseudoclassSelector() (out Sel, err error) { // For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements.
// https://drafts.csswg.org/selectors-3/#pseudo-elements
// Returning a nil `Sel` (and a nil `error`) means we found a pseudo-element.
func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err error) {
if p.i >= len(p.s) { if p.i >= len(p.s) {
return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") return nil, "", fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
} }
if p.s[p.i] != ':' { if p.s[p.i] != ':' {
return nil, fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i]) return nil, "", fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i])
} }
p.i++ p.i++
var mustBePseudoElement bool
if p.i >= len(p.s) {
return nil, "", fmt.Errorf("got empty pseudoclass (or pseudoelement)")
}
if p.s[p.i] == ':' { // we found a pseudo-element if p.s[p.i] == ':' { // we found a pseudo-element
mustBePseudoElement = true
p.i++ p.i++
} }
@ -441,27 +453,33 @@ func (p *parser) parsePseudoclassSelector() (out Sel, err error) {
return return
} }
name = toLowerASCII(name) name = toLowerASCII(name)
if mustBePseudoElement && (name != "after" && name != "backdrop" && name != "before" &&
name != "cue" && name != "first-letter" && name != "first-line" && name != "grammar-error" &&
name != "marker" && name != "placeholder" && name != "selection" && name != "spelling-error") {
return out, "", fmt.Errorf("unknown pseudoelement :%s", name)
}
switch name { switch name {
case "not", "has", "haschild": case "not", "has", "haschild":
if !p.consumeParenthesis() { if !p.consumeParenthesis() {
return out, errExpectedParenthesis return out, "", errExpectedParenthesis
} }
sel, parseErr := p.parseSelectorGroup() sel, parseErr := p.parseSelectorGroup()
if parseErr != nil { if parseErr != nil {
return out, parseErr return out, "", parseErr
} }
if !p.consumeClosingParenthesis() { if !p.consumeClosingParenthesis() {
return out, errExpectedClosingParenthesis return out, "", errExpectedClosingParenthesis
} }
out = relativePseudoClassSelector{name: name, match: sel} out = relativePseudoClassSelector{name: name, match: sel}
case "contains", "containsown": case "contains", "containsown":
if !p.consumeParenthesis() { if !p.consumeParenthesis() {
return out, errExpectedParenthesis return out, "", errExpectedParenthesis
} }
if p.i == len(p.s) { if p.i == len(p.s) {
return out, errUnmatchedParenthesis return out, "", errUnmatchedParenthesis
} }
var val string var val string
switch p.s[p.i] { switch p.s[p.i] {
@ -471,46 +489,46 @@ func (p *parser) parsePseudoclassSelector() (out Sel, err error) {
val, err = p.parseIdentifier() val, err = p.parseIdentifier()
} }
if err != nil { if err != nil {
return out, err return out, "", err
} }
val = strings.ToLower(val) val = strings.ToLower(val)
p.skipWhitespace() p.skipWhitespace()
if p.i >= len(p.s) { if p.i >= len(p.s) {
return out, errors.New("unexpected EOF in pseudo selector") return out, "", errors.New("unexpected EOF in pseudo selector")
} }
if !p.consumeClosingParenthesis() { if !p.consumeClosingParenthesis() {
return out, errExpectedClosingParenthesis return out, "", errExpectedClosingParenthesis
} }
out = containsPseudoClassSelector{own: name == "containsown", value: val} out = containsPseudoClassSelector{own: name == "containsown", value: val}
case "matches", "matchesown": case "matches", "matchesown":
if !p.consumeParenthesis() { if !p.consumeParenthesis() {
return out, errExpectedParenthesis return out, "", errExpectedParenthesis
} }
rx, err := p.parseRegex() rx, err := p.parseRegex()
if err != nil { if err != nil {
return out, err return out, "", err
} }
if p.i >= len(p.s) { if p.i >= len(p.s) {
return out, errors.New("unexpected EOF in pseudo selector") return out, "", errors.New("unexpected EOF in pseudo selector")
} }
if !p.consumeClosingParenthesis() { if !p.consumeClosingParenthesis() {
return out, errExpectedClosingParenthesis return out, "", errExpectedClosingParenthesis
} }
out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx} out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx}
case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type": case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type":
if !p.consumeParenthesis() { if !p.consumeParenthesis() {
return out, errExpectedParenthesis return out, "", errExpectedParenthesis
} }
a, b, err := p.parseNth() a, b, err := p.parseNth()
if err != nil { if err != nil {
return out, err return out, "", err
} }
if !p.consumeClosingParenthesis() { if !p.consumeClosingParenthesis() {
return out, errExpectedClosingParenthesis return out, "", errExpectedClosingParenthesis
} }
last := name == "nth-last-child" || name == "nth-last-of-type" last := name == "nth-last-child" || name == "nth-last-of-type"
ofType := name == "nth-of-type" || name == "nth-last-of-type" ofType := name == "nth-of-type" || name == "nth-last-of-type"
@ -535,9 +553,9 @@ func (p *parser) parsePseudoclassSelector() (out Sel, err error) {
case "root": case "root":
out = rootPseudoClassSelector{} out = rootPseudoClassSelector{}
case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error": case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error":
return out, errors.New("pseudo-elements are not yet supported") return nil, name, nil
default: default:
return out, fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name) return out, "", fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name)
} }
return return
} }
@ -706,10 +724,12 @@ func (p *parser) parseSimpleSelectorSequence() (Sel, error) {
selectors = append(selectors, r) selectors = append(selectors, r)
} }
var pseudoElement string
loop: loop:
for p.i < len(p.s) { for p.i < len(p.s) {
var ( var (
ns Sel ns Sel
newPseudoElement string
err error err error
) )
switch p.s[p.i] { switch p.s[p.i] {
@ -720,20 +740,37 @@ loop:
case '[': case '[':
ns, err = p.parseAttributeSelector() ns, err = p.parseAttributeSelector()
case ':': case ':':
ns, err = p.parsePseudoclassSelector() ns, newPseudoElement, err = p.parsePseudoclassSelector()
default: default:
break loop break loop
} }
if err != nil { if err != nil {
return nil, err return nil, err
} }
// From https://drafts.csswg.org/selectors-3/#pseudo-elements :
// "Only one pseudo-element may appear per selector, and if present
// it must appear after the sequence of simple selectors that
// represents the subjects of the selector.""
if ns == nil { // we found a pseudo-element
if pseudoElement != "" {
return nil, fmt.Errorf("only one pseudo-element is accepted per selector, got %s and %s", pseudoElement, newPseudoElement)
}
if !p.acceptPseudoElements {
return nil, fmt.Errorf("pseudo-element %s found, but pseudo-elements support is disabled", newPseudoElement)
}
pseudoElement = newPseudoElement
} else {
if pseudoElement != "" {
return nil, fmt.Errorf("pseudo-element %s must be at the end of selector", pseudoElement)
}
selectors = append(selectors, ns) selectors = append(selectors, ns)
} }
if len(selectors) == 1 { // no need wrap the selectors in compoundSelector
}
if len(selectors) == 1 && pseudoElement == "" { // no need wrap the selectors in compoundSelector
return selectors[0], nil return selectors[0], nil
} }
return compoundSelector{selectors: selectors}, nil return compoundSelector{selectors: selectors, pseudoElement: pseudoElement}, nil
} }
// parseSelector parses a selector that may include combinators. // parseSelector parses a selector that may include combinators.

View file

@ -16,14 +16,19 @@ type Matcher interface {
} }
// Sel is the interface for all the functionality provided by selectors. // Sel is the interface for all the functionality provided by selectors.
// It is currently the same as Matcher, but other methods may be added in the
// future.
type Sel interface { type Sel interface {
Matcher Matcher
Specificity() Specificity Specificity() Specificity
// Returns a CSS input compiling to this selector.
String() string
// Returns a pseudo-element, or an empty string.
PseudoElement() string
} }
// Parse parses a selector. // Parse parses a selector. Use `ParseWithPseudoElement`
// if you need support for pseudo-elements.
func Parse(sel string) (Sel, error) { func Parse(sel string) (Sel, error) {
p := &parser{s: sel} p := &parser{s: sel}
compiled, err := p.parseSelector() compiled, err := p.parseSelector()
@ -38,7 +43,25 @@ func Parse(sel string) (Sel, error) {
return compiled, nil return compiled, nil
} }
// ParseWithPseudoElement parses a single selector,
// with support for pseudo-element.
func ParseWithPseudoElement(sel string) (Sel, error) {
p := &parser{s: sel, acceptPseudoElements: true}
compiled, err := p.parseSelector()
if err != nil {
return nil, err
}
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
}
// ParseGroup parses a selector, or a group of selectors separated by commas. // ParseGroup parses a selector, or a group of selectors separated by commas.
// Use `ParseGroupWithPseudoElements`
// if you need support for pseudo-elements.
func ParseGroup(sel string) (SelectorGroup, error) { func ParseGroup(sel string) (SelectorGroup, error) {
p := &parser{s: sel} p := &parser{s: sel}
compiled, err := p.parseSelectorGroup() compiled, err := p.parseSelectorGroup()
@ -53,6 +76,22 @@ func ParseGroup(sel string) (SelectorGroup, error) {
return compiled, nil return compiled, nil
} }
// ParseGroupWithPseudoElements parses a selector, or a group of selectors separated by commas.
// It supports pseudo-elements.
func ParseGroupWithPseudoElements(sel string) (SelectorGroup, error) {
p := &parser{s: sel, acceptPseudoElements: true}
compiled, err := p.parseSelectorGroup()
if err != nil {
return nil, err
}
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
}
// A Selector is a function which tells whether a node matches or not. // A Selector is a function which tells whether a node matches or not.
// //
// This type is maintained for compatibility; I recommend using the newer and // This type is maintained for compatibility; I recommend using the newer and
@ -182,6 +221,10 @@ func (c tagSelector) Specificity() Specificity {
return Specificity{0, 0, 1} return Specificity{0, 0, 1}
} }
func (c tagSelector) PseudoElement() string {
return ""
}
type classSelector struct { type classSelector struct {
class string class string
} }
@ -197,6 +240,10 @@ func (c classSelector) Specificity() Specificity {
return Specificity{0, 1, 0} return Specificity{0, 1, 0}
} }
func (c classSelector) PseudoElement() string {
return ""
}
type idSelector struct { type idSelector struct {
id string id string
} }
@ -212,6 +259,10 @@ func (c idSelector) Specificity() Specificity {
return Specificity{1, 0, 0} return Specificity{1, 0, 0}
} }
func (c idSelector) PseudoElement() string {
return ""
}
type attrSelector struct { type attrSelector struct {
key, val, operation string key, val, operation string
regexp *regexp.Regexp regexp *regexp.Regexp
@ -352,6 +403,10 @@ func (c attrSelector) Specificity() Specificity {
return Specificity{0, 1, 0} return Specificity{0, 1, 0}
} }
func (c attrSelector) PseudoElement() string {
return ""
}
// ---------------- Pseudo class selectors ---------------- // ---------------- Pseudo class selectors ----------------
// we use severals concrete types of pseudo-class selectors // we use severals concrete types of pseudo-class selectors
@ -415,6 +470,10 @@ func (s relativePseudoClassSelector) Specificity() Specificity {
return max return max
} }
func (c relativePseudoClassSelector) PseudoElement() string {
return ""
}
type containsPseudoClassSelector struct { type containsPseudoClassSelector struct {
own bool own bool
value string value string
@ -436,6 +495,10 @@ func (s containsPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0} return Specificity{0, 1, 0}
} }
func (c containsPseudoClassSelector) PseudoElement() string {
return ""
}
type regexpPseudoClassSelector struct { type regexpPseudoClassSelector struct {
own bool own bool
regexp *regexp.Regexp regexp *regexp.Regexp
@ -488,6 +551,10 @@ func (s regexpPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0} return Specificity{0, 1, 0}
} }
func (c regexpPseudoClassSelector) PseudoElement() string {
return ""
}
type nthPseudoClassSelector struct { type nthPseudoClassSelector struct {
a, b int a, b int
last, ofType bool last, ofType bool
@ -623,6 +690,10 @@ func (s nthPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0} return Specificity{0, 1, 0}
} }
func (c nthPseudoClassSelector) PseudoElement() string {
return ""
}
type onlyChildPseudoClassSelector struct { type onlyChildPseudoClassSelector struct {
ofType bool ofType bool
} }
@ -661,6 +732,10 @@ func (s onlyChildPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0} return Specificity{0, 1, 0}
} }
func (c onlyChildPseudoClassSelector) PseudoElement() string {
return ""
}
type inputPseudoClassSelector struct{} type inputPseudoClassSelector struct{}
// Matches input, select, textarea and button elements. // Matches input, select, textarea and button elements.
@ -672,6 +747,10 @@ func (s inputPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0} return Specificity{0, 1, 0}
} }
func (c inputPseudoClassSelector) PseudoElement() string {
return ""
}
type emptyElementPseudoClassSelector struct{} type emptyElementPseudoClassSelector struct{}
// Matches empty elements. // Matches empty elements.
@ -694,6 +773,10 @@ func (s emptyElementPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0} return Specificity{0, 1, 0}
} }
func (c emptyElementPseudoClassSelector) PseudoElement() string {
return ""
}
type rootPseudoClassSelector struct{} type rootPseudoClassSelector struct{}
// Match implements :root // Match implements :root
@ -711,8 +794,13 @@ func (s rootPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0} return Specificity{0, 1, 0}
} }
func (c rootPseudoClassSelector) PseudoElement() string {
return ""
}
type compoundSelector struct { type compoundSelector struct {
selectors []Sel selectors []Sel
pseudoElement string
} }
// Matches elements if each sub-selectors matches. // Matches elements if each sub-selectors matches.
@ -734,9 +822,17 @@ func (s compoundSelector) Specificity() Specificity {
for _, sel := range s.selectors { for _, sel := range s.selectors {
out = out.Add(sel.Specificity()) out = out.Add(sel.Specificity())
} }
if s.pseudoElement != "" {
// https://drafts.csswg.org/selectors-3/#specificity
out = out.Add(Specificity{0, 0, 1})
}
return out return out
} }
func (c compoundSelector) PseudoElement() string {
return c.pseudoElement
}
type combinedSelector struct { type combinedSelector struct {
first Sel first Sel
combinator byte combinator byte
@ -818,6 +914,15 @@ func (s combinedSelector) Specificity() Specificity {
return spec return spec
} }
// on combinedSelector, a pseudo-element only makes sens on the last
// selector, although others increase specificity.
func (c combinedSelector) PseudoElement() string {
if c.second == nil {
return ""
}
return c.second.PseudoElement()
}
// A SelectorGroup is a list of selectors, which matches if any of the // A SelectorGroup is a list of selectors, which matches if any of the
// individual selectors matches. // individual selectors matches.
type SelectorGroup []Sel type SelectorGroup []Sel

120
vendor/github.com/andybalholm/cascadia/serialize.go generated vendored Normal file
View file

@ -0,0 +1,120 @@
package cascadia
import (
"fmt"
"strings"
)
// implements the reverse operation Sel -> string
func (c tagSelector) String() string {
return c.tag
}
func (c idSelector) String() string {
return "#" + c.id
}
func (c classSelector) String() string {
return "." + c.class
}
func (c attrSelector) String() string {
val := c.val
if c.operation == "#=" {
val = c.regexp.String()
} else if c.operation != "" {
val = fmt.Sprintf(`"%s"`, val)
}
return fmt.Sprintf(`[%s%s%s]`, c.key, c.operation, val)
}
func (c relativePseudoClassSelector) String() string {
return fmt.Sprintf(":%s(%s)", c.name, c.match.String())
}
func (c containsPseudoClassSelector) String() string {
s := "contains"
if c.own {
s += "Own"
}
return fmt.Sprintf(`:%s("%s")`, s, c.value)
}
func (c regexpPseudoClassSelector) String() string {
s := "matches"
if c.own {
s += "Own"
}
return fmt.Sprintf(":%s(%s)", s, c.regexp.String())
}
func (c nthPseudoClassSelector) String() string {
if c.a == 0 && c.b == 1 { // special cases
s := ":first-"
if c.last {
s = ":last-"
}
if c.ofType {
s += "of-type"
} else {
s += "child"
}
return s
}
var name string
switch [2]bool{c.last, c.ofType} {
case [2]bool{true, true}:
name = "nth-last-of-type"
case [2]bool{true, false}:
name = "nth-last-child"
case [2]bool{false, true}:
name = "nth-of-type"
case [2]bool{false, false}:
name = "nth-child"
}
return fmt.Sprintf(":%s(%dn+%d)", name, c.a, c.b)
}
func (c onlyChildPseudoClassSelector) String() string {
if c.ofType {
return ":only-of-type"
}
return ":only-child"
}
func (c inputPseudoClassSelector) String() string {
return ":input"
}
func (c emptyElementPseudoClassSelector) String() string {
return ":empty"
}
func (c rootPseudoClassSelector) String() string {
return ":root"
}
func (c compoundSelector) String() string {
if len(c.selectors) == 0 && c.pseudoElement == "" {
return "*"
}
chunks := make([]string, len(c.selectors))
for i, sel := range c.selectors {
chunks[i] = sel.String()
}
s := strings.Join(chunks, "")
if c.pseudoElement != "" {
s += "::" + c.pseudoElement
}
return s
}
func (c combinedSelector) String() string {
start := c.first.String()
if c.second != nil {
start += fmt.Sprintf(" %s %s", string(c.combinator), c.second.String())
}
return start
}
func (c SelectorGroup) String() string {
ck := make([]string, len(c))
for i, s := range c {
ck[i] = s.String()
}
return strings.Join(ck, ", ")
}

View file

@ -135,3 +135,6 @@ Email: jay at (my github username).com
Twitter: [@jtaylor](https://twitter.com/jtaylor) Twitter: [@jtaylor](https://twitter.com/jtaylor)
# Alternatives
https://github.com/k3a/html2text - Lightweight

View file

@ -1,8 +1,16 @@
language: go language: go
sudo: false
go: go:
- 1.13.x
- tip - tip
before_install: before_install:
- go get github.com/mattn/goveralls - go get -t -v ./...
- go get golang.org/x/tools/cmd/cover
script: script:
- $HOME/gopath/bin/goveralls -repotoken lAKAWPzcGsD3A8yBX3BGGtRUdJ6CaGERL - go generate
- git diff --cached --exit-code
- ./go.test.sh
after_success:
- bash <(curl -s https://codecov.io/bash)

View file

@ -2,7 +2,7 @@ go-runewidth
============ ============
[![Build Status](https://travis-ci.org/mattn/go-runewidth.png?branch=master)](https://travis-ci.org/mattn/go-runewidth) [![Build Status](https://travis-ci.org/mattn/go-runewidth.png?branch=master)](https://travis-ci.org/mattn/go-runewidth)
[![Coverage Status](https://coveralls.io/repos/mattn/go-runewidth/badge.png?branch=HEAD)](https://coveralls.io/r/mattn/go-runewidth?branch=HEAD) [![Codecov](https://codecov.io/gh/mattn/go-runewidth/branch/master/graph/badge.svg)](https://codecov.io/gh/mattn/go-runewidth)
[![GoDoc](https://godoc.org/github.com/mattn/go-runewidth?status.svg)](http://godoc.org/github.com/mattn/go-runewidth) [![GoDoc](https://godoc.org/github.com/mattn/go-runewidth?status.svg)](http://godoc.org/github.com/mattn/go-runewidth)
[![Go Report Card](https://goreportcard.com/badge/github.com/mattn/go-runewidth)](https://goreportcard.com/report/github.com/mattn/go-runewidth) [![Go Report Card](https://goreportcard.com/badge/github.com/mattn/go-runewidth)](https://goreportcard.com/report/github.com/mattn/go-runewidth)

12
vendor/github.com/mattn/go-runewidth/go.test.sh generated vendored Normal file
View file

@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -e
echo "" > coverage.txt
for d in $(go list ./... | grep -v vendor); do
go test -race -coverprofile=profile.out -covermode=atomic "$d"
if [ -f profile.out ]; then
cat profile.out >> coverage.txt
rm profile.out
fi
done

View file

@ -50,7 +50,6 @@ func inTables(r rune, ts ...table) bool {
} }
func inTable(r rune, t table) bool { func inTable(r rune, t table) bool {
// func (t table) IncludesRune(r rune) bool {
if r < t[0].first { if r < t[0].first {
return false return false
} }

View file

@ -62,7 +62,10 @@ func isEastAsian(locale string) bool {
// IsEastAsian return true if the current locale is CJK // IsEastAsian return true if the current locale is CJK
func IsEastAsian() bool { func IsEastAsian() bool {
locale := os.Getenv("LC_CTYPE") locale := os.Getenv("LC_ALL")
if locale == "" {
locale = os.Getenv("LC_CTYPE")
}
if locale == "" { if locale == "" {
locale = os.Getenv("LANG") locale = os.Getenv("LANG")
} }

View file

@ -1,20 +1,23 @@
// Code generated by script/generate.go. DO NOT EDIT.
package runewidth package runewidth
var combining = table{ var combining = table{
{0x0300, 0x036F}, {0x0483, 0x0489}, {0x07EB, 0x07F3}, {0x0300, 0x036F}, {0x0483, 0x0489}, {0x07EB, 0x07F3},
{0x0C00, 0x0C00}, {0x0C04, 0x0C04}, {0x0D00, 0x0D01}, {0x0C00, 0x0C00}, {0x0C04, 0x0C04}, {0x0D00, 0x0D01},
{0x135D, 0x135F}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1ABE}, {0x135D, 0x135F}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1AC0},
{0x1B6B, 0x1B73}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF}, {0x1B6B, 0x1B73}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF},
{0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2DE0, 0x2DFF}, {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2DE0, 0x2DFF},
{0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D},
{0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA8E0, 0xA8F1}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA8E0, 0xA8F1},
{0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x10376, 0x1037A}, {0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x10376, 0x1037A},
{0x10F46, 0x10F50}, {0x11300, 0x11301}, {0x1133B, 0x1133C}, {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x11300, 0x11301},
{0x11366, 0x1136C}, {0x11370, 0x11374}, {0x16AF0, 0x16AF4}, {0x1133B, 0x1133C}, {0x11366, 0x1136C}, {0x11370, 0x11374},
{0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x16AF0, 0x16AF4}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172},
{0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD},
{0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1D242, 0x1D244}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018},
{0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E8D0, 0x1E8D6}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A},
{0x1E8D0, 0x1E8D6},
} }
var doublewidth = table{ var doublewidth = table{
@ -32,29 +35,30 @@ var doublewidth = table{
{0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99}, {0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99},
{0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB},
{0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF}, {0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF},
{0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31BA}, {0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31E3},
{0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x4DBF},
{0x3250, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
{0xA960, 0xA97C}, {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19},
{0xFE10, 0xFE19}, {0xFE30, 0xFE52}, {0xFE54, 0xFE66}, {0xFE30, 0xFE52}, {0xFE54, 0xFE66}, {0xFE68, 0xFE6B},
{0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4},
{0x16FE0, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18AF2}, {0x16FF0, 0x16FF1}, {0x17000, 0x187F7}, {0x18800, 0x18CD5},
{0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167}, {0x18D00, 0x18D08}, {0x1B000, 0x1B11E}, {0x1B150, 0x1B152},
{0x1B170, 0x1B2FB}, {0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1F004, 0x1F004},
{0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A}, {0x1F200, 0x1F202}, {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A},
{0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248},
{0x1F260, 0x1F265}, {0x1F300, 0x1F320}, {0x1F32D, 0x1F335}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F320},
{0x1F337, 0x1F37C}, {0x1F37E, 0x1F393}, {0x1F3A0, 0x1F3CA}, {0x1F32D, 0x1F335}, {0x1F337, 0x1F37C}, {0x1F37E, 0x1F393},
{0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0}, {0x1F3F4, 0x1F3F4}, {0x1F3A0, 0x1F3CA}, {0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0},
{0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440}, {0x1F442, 0x1F4FC}, {0x1F3F4, 0x1F3F4}, {0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440},
{0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E}, {0x1F550, 0x1F567}, {0x1F442, 0x1F4FC}, {0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E},
{0x1F57A, 0x1F57A}, {0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A4}, {0x1F550, 0x1F567}, {0x1F57A, 0x1F57A}, {0x1F595, 0x1F596},
{0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC}, {0x1F5A4, 0x1F5A4}, {0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5},
{0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D5}, {0x1F6EB, 0x1F6EC}, {0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D7},
{0x1F6F4, 0x1F6FA}, {0x1F7E0, 0x1F7EB}, {0x1F90D, 0x1F971}, {0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB},
{0x1F973, 0x1F976}, {0x1F97A, 0x1F9A2}, {0x1F9A5, 0x1F9AA}, {0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1F978},
{0x1F9AE, 0x1F9CA}, {0x1F9CD, 0x1F9FF}, {0x1FA70, 0x1FA73}, {0x1F97A, 0x1F9CB}, {0x1F9CD, 0x1F9FF}, {0x1FA70, 0x1FA74},
{0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA82}, {0x1FA90, 0x1FA95}, {0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAA8},
{0x1FAB0, 0x1FAB6}, {0x1FAC0, 0x1FAC2}, {0x1FAD0, 0x1FAD6},
{0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
} }
@ -151,7 +155,7 @@ var neutral = table{
{0x0600, 0x061C}, {0x061E, 0x070D}, {0x070F, 0x074A}, {0x0600, 0x061C}, {0x061E, 0x070D}, {0x070F, 0x074A},
{0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D}, {0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D},
{0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E}, {0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E},
{0x0860, 0x086A}, {0x08A0, 0x08B4}, {0x08B6, 0x08BD}, {0x0860, 0x086A}, {0x08A0, 0x08B4}, {0x08B6, 0x08C7},
{0x08D3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990}, {0x08D3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
{0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2}, {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2},
{0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8}, {0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8},
@ -170,7 +174,7 @@ var neutral = table{
{0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28},
{0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39},
{0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D}, {0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
{0x0B56, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63}, {0x0B55, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63},
{0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A}, {0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
{0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A},
{0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4},
@ -184,166 +188,169 @@ var neutral = table{
{0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9},
{0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD},
{0x0CD5, 0x0CD6}, {0x0CDE, 0x0CDE}, {0x0CE0, 0x0CE3}, {0x0CD5, 0x0CD6}, {0x0CDE, 0x0CDE}, {0x0CE0, 0x0CE3},
{0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2}, {0x0D00, 0x0D03}, {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2}, {0x0D00, 0x0D0C},
{0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D44}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D44}, {0x0D46, 0x0D48},
{0x0D46, 0x0D48}, {0x0D4A, 0x0D4F}, {0x0D54, 0x0D63}, {0x0D4A, 0x0D4F}, {0x0D54, 0x0D63}, {0x0D66, 0x0D7F},
{0x0D66, 0x0D7F}, {0x0D82, 0x0D83}, {0x0D85, 0x0D96}, {0x0D81, 0x0D83}, {0x0D85, 0x0D96}, {0x0D9A, 0x0DB1},
{0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD}, {0x0DC0, 0x0DC6},
{0x0DC0, 0x0DC6}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4}, {0x0DD6, 0x0DD6},
{0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF}, {0x0DF2, 0x0DF4},
{0x0DF2, 0x0DF4}, {0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B}, {0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B}, {0x0E81, 0x0E82},
{0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A}, {0x0E8C, 0x0EA3},
{0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD}, {0x0EC0, 0x0EC4},
{0x0EC0, 0x0EC4}, {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD}, {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9},
{0x0ED0, 0x0ED9}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F47}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F47}, {0x0F49, 0x0F6C},
{0x0F49, 0x0F6C}, {0x0F71, 0x0F97}, {0x0F99, 0x0FBC}, {0x0F71, 0x0F97}, {0x0F99, 0x0FBC}, {0x0FBE, 0x0FCC},
{0x0FBE, 0x0FCC}, {0x0FCE, 0x0FDA}, {0x1000, 0x10C5}, {0x0FCE, 0x0FDA}, {0x1000, 0x10C5}, {0x10C7, 0x10C7},
{0x10C7, 0x10C7}, {0x10CD, 0x10CD}, {0x10D0, 0x10FF}, {0x10CD, 0x10CD}, {0x10D0, 0x10FF}, {0x1160, 0x1248},
{0x1160, 0x1248}, {0x124A, 0x124D}, {0x1250, 0x1256}, {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258},
{0x1258, 0x1258}, {0x125A, 0x125D}, {0x1260, 0x1288}, {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D},
{0x128A, 0x128D}, {0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x12B8, 0x12BE},
{0x12B8, 0x12BE}, {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6},
{0x12C8, 0x12D6}, {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A},
{0x1318, 0x135A}, {0x135D, 0x137C}, {0x1380, 0x1399}, {0x135D, 0x137C}, {0x1380, 0x1399}, {0x13A0, 0x13F5},
{0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x16A0, 0x16F8},
{0x16A0, 0x16F8}, {0x1700, 0x170C}, {0x170E, 0x1714}, {0x1700, 0x170C}, {0x170E, 0x1714}, {0x1720, 0x1736},
{0x1720, 0x1736}, {0x1740, 0x1753}, {0x1760, 0x176C}, {0x1740, 0x1753}, {0x1760, 0x176C}, {0x176E, 0x1770},
{0x176E, 0x1770}, {0x1772, 0x1773}, {0x1780, 0x17DD}, {0x1772, 0x1773}, {0x1780, 0x17DD}, {0x17E0, 0x17E9},
{0x17E0, 0x17E9}, {0x17F0, 0x17F9}, {0x1800, 0x180E}, {0x17F0, 0x17F9}, {0x1800, 0x180E}, {0x1810, 0x1819},
{0x1810, 0x1819}, {0x1820, 0x1878}, {0x1880, 0x18AA}, {0x1820, 0x1878}, {0x1880, 0x18AA}, {0x18B0, 0x18F5},
{0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1920, 0x192B}, {0x1900, 0x191E}, {0x1920, 0x192B}, {0x1930, 0x193B},
{0x1930, 0x193B}, {0x1940, 0x1940}, {0x1944, 0x196D}, {0x1940, 0x1940}, {0x1944, 0x196D}, {0x1970, 0x1974},
{0x1970, 0x1974}, {0x1980, 0x19AB}, {0x19B0, 0x19C9}, {0x1980, 0x19AB}, {0x19B0, 0x19C9}, {0x19D0, 0x19DA},
{0x19D0, 0x19DA}, {0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E}, {0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E}, {0x1A60, 0x1A7C},
{0x1A60, 0x1A7C}, {0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, {0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, {0x1AA0, 0x1AAD},
{0x1AA0, 0x1AAD}, {0x1AB0, 0x1ABE}, {0x1B00, 0x1B4B}, {0x1AB0, 0x1AC0}, {0x1B00, 0x1B4B}, {0x1B50, 0x1B7C},
{0x1B50, 0x1B7C}, {0x1B80, 0x1BF3}, {0x1BFC, 0x1C37}, {0x1B80, 0x1BF3}, {0x1BFC, 0x1C37}, {0x1C3B, 0x1C49},
{0x1C3B, 0x1C49}, {0x1C4D, 0x1C88}, {0x1C90, 0x1CBA}, {0x1C4D, 0x1C88}, {0x1C90, 0x1CBA}, {0x1CBD, 0x1CC7},
{0x1CBD, 0x1CC7}, {0x1CD0, 0x1CFA}, {0x1D00, 0x1DF9}, {0x1CD0, 0x1CFA}, {0x1D00, 0x1DF9}, {0x1DFB, 0x1F15},
{0x1DFB, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
{0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B},
{0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4},
{0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3}, {0x1FD6, 0x1FDB},
{0x1FD6, 0x1FDB}, {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4}, {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFE},
{0x1FF6, 0x1FFE}, {0x2000, 0x200F}, {0x2011, 0x2012}, {0x2000, 0x200F}, {0x2011, 0x2012}, {0x2017, 0x2017},
{0x2017, 0x2017}, {0x201A, 0x201B}, {0x201E, 0x201F}, {0x201A, 0x201B}, {0x201E, 0x201F}, {0x2023, 0x2023},
{0x2023, 0x2023}, {0x2028, 0x202F}, {0x2031, 0x2031}, {0x2028, 0x202F}, {0x2031, 0x2031}, {0x2034, 0x2034},
{0x2034, 0x2034}, {0x2036, 0x203A}, {0x203C, 0x203D}, {0x2036, 0x203A}, {0x203C, 0x203D}, {0x203F, 0x2064},
{0x203F, 0x2064}, {0x2066, 0x2071}, {0x2075, 0x207E}, {0x2066, 0x2071}, {0x2075, 0x207E}, {0x2080, 0x2080},
{0x2080, 0x2080}, {0x2085, 0x208E}, {0x2090, 0x209C}, {0x2085, 0x208E}, {0x2090, 0x209C}, {0x20A0, 0x20A8},
{0x20A0, 0x20A8}, {0x20AA, 0x20AB}, {0x20AD, 0x20BF}, {0x20AA, 0x20AB}, {0x20AD, 0x20BF}, {0x20D0, 0x20F0},
{0x20D0, 0x20F0}, {0x2100, 0x2102}, {0x2104, 0x2104}, {0x2100, 0x2102}, {0x2104, 0x2104}, {0x2106, 0x2108},
{0x2106, 0x2108}, {0x210A, 0x2112}, {0x2114, 0x2115}, {0x210A, 0x2112}, {0x2114, 0x2115}, {0x2117, 0x2120},
{0x2117, 0x2120}, {0x2123, 0x2125}, {0x2127, 0x212A}, {0x2123, 0x2125}, {0x2127, 0x212A}, {0x212C, 0x2152},
{0x212C, 0x2152}, {0x2155, 0x215A}, {0x215F, 0x215F}, {0x2155, 0x215A}, {0x215F, 0x215F}, {0x216C, 0x216F},
{0x216C, 0x216F}, {0x217A, 0x2188}, {0x218A, 0x218B}, {0x217A, 0x2188}, {0x218A, 0x218B}, {0x219A, 0x21B7},
{0x219A, 0x21B7}, {0x21BA, 0x21D1}, {0x21D3, 0x21D3}, {0x21BA, 0x21D1}, {0x21D3, 0x21D3}, {0x21D5, 0x21E6},
{0x21D5, 0x21E6}, {0x21E8, 0x21FF}, {0x2201, 0x2201}, {0x21E8, 0x21FF}, {0x2201, 0x2201}, {0x2204, 0x2206},
{0x2204, 0x2206}, {0x2209, 0x220A}, {0x220C, 0x220E}, {0x2209, 0x220A}, {0x220C, 0x220E}, {0x2210, 0x2210},
{0x2210, 0x2210}, {0x2212, 0x2214}, {0x2216, 0x2219}, {0x2212, 0x2214}, {0x2216, 0x2219}, {0x221B, 0x221C},
{0x221B, 0x221C}, {0x2221, 0x2222}, {0x2224, 0x2224}, {0x2221, 0x2222}, {0x2224, 0x2224}, {0x2226, 0x2226},
{0x2226, 0x2226}, {0x222D, 0x222D}, {0x222F, 0x2233}, {0x222D, 0x222D}, {0x222F, 0x2233}, {0x2238, 0x223B},
{0x2238, 0x223B}, {0x223E, 0x2247}, {0x2249, 0x224B}, {0x223E, 0x2247}, {0x2249, 0x224B}, {0x224D, 0x2251},
{0x224D, 0x2251}, {0x2253, 0x225F}, {0x2262, 0x2263}, {0x2253, 0x225F}, {0x2262, 0x2263}, {0x2268, 0x2269},
{0x2268, 0x2269}, {0x226C, 0x226D}, {0x2270, 0x2281}, {0x226C, 0x226D}, {0x2270, 0x2281}, {0x2284, 0x2285},
{0x2284, 0x2285}, {0x2288, 0x2294}, {0x2296, 0x2298}, {0x2288, 0x2294}, {0x2296, 0x2298}, {0x229A, 0x22A4},
{0x229A, 0x22A4}, {0x22A6, 0x22BE}, {0x22C0, 0x2311}, {0x22A6, 0x22BE}, {0x22C0, 0x2311}, {0x2313, 0x2319},
{0x2313, 0x2319}, {0x231C, 0x2328}, {0x232B, 0x23E8}, {0x231C, 0x2328}, {0x232B, 0x23E8}, {0x23ED, 0x23EF},
{0x23ED, 0x23EF}, {0x23F1, 0x23F2}, {0x23F4, 0x2426}, {0x23F1, 0x23F2}, {0x23F4, 0x2426}, {0x2440, 0x244A},
{0x2440, 0x244A}, {0x24EA, 0x24EA}, {0x254C, 0x254F}, {0x24EA, 0x24EA}, {0x254C, 0x254F}, {0x2574, 0x257F},
{0x2574, 0x257F}, {0x2590, 0x2591}, {0x2596, 0x259F}, {0x2590, 0x2591}, {0x2596, 0x259F}, {0x25A2, 0x25A2},
{0x25A2, 0x25A2}, {0x25AA, 0x25B1}, {0x25B4, 0x25B5}, {0x25AA, 0x25B1}, {0x25B4, 0x25B5}, {0x25B8, 0x25BB},
{0x25B8, 0x25BB}, {0x25BE, 0x25BF}, {0x25C2, 0x25C5}, {0x25BE, 0x25BF}, {0x25C2, 0x25C5}, {0x25C9, 0x25CA},
{0x25C9, 0x25CA}, {0x25CC, 0x25CD}, {0x25D2, 0x25E1}, {0x25CC, 0x25CD}, {0x25D2, 0x25E1}, {0x25E6, 0x25EE},
{0x25E6, 0x25EE}, {0x25F0, 0x25FC}, {0x25FF, 0x2604}, {0x25F0, 0x25FC}, {0x25FF, 0x2604}, {0x2607, 0x2608},
{0x2607, 0x2608}, {0x260A, 0x260D}, {0x2610, 0x2613}, {0x260A, 0x260D}, {0x2610, 0x2613}, {0x2616, 0x261B},
{0x2616, 0x261B}, {0x261D, 0x261D}, {0x261F, 0x263F}, {0x261D, 0x261D}, {0x261F, 0x263F}, {0x2641, 0x2641},
{0x2641, 0x2641}, {0x2643, 0x2647}, {0x2654, 0x265F}, {0x2643, 0x2647}, {0x2654, 0x265F}, {0x2662, 0x2662},
{0x2662, 0x2662}, {0x2666, 0x2666}, {0x266B, 0x266B}, {0x2666, 0x2666}, {0x266B, 0x266B}, {0x266E, 0x266E},
{0x266E, 0x266E}, {0x2670, 0x267E}, {0x2680, 0x2692}, {0x2670, 0x267E}, {0x2680, 0x2692}, {0x2694, 0x269D},
{0x2694, 0x269D}, {0x26A0, 0x26A0}, {0x26A2, 0x26A9}, {0x26A0, 0x26A0}, {0x26A2, 0x26A9}, {0x26AC, 0x26BC},
{0x26AC, 0x26BC}, {0x26C0, 0x26C3}, {0x26E2, 0x26E2}, {0x26C0, 0x26C3}, {0x26E2, 0x26E2}, {0x26E4, 0x26E7},
{0x26E4, 0x26E7}, {0x2700, 0x2704}, {0x2706, 0x2709}, {0x2700, 0x2704}, {0x2706, 0x2709}, {0x270C, 0x2727},
{0x270C, 0x2727}, {0x2729, 0x273C}, {0x273E, 0x274B}, {0x2729, 0x273C}, {0x273E, 0x274B}, {0x274D, 0x274D},
{0x274D, 0x274D}, {0x274F, 0x2752}, {0x2756, 0x2756}, {0x274F, 0x2752}, {0x2756, 0x2756}, {0x2758, 0x2775},
{0x2758, 0x2775}, {0x2780, 0x2794}, {0x2798, 0x27AF}, {0x2780, 0x2794}, {0x2798, 0x27AF}, {0x27B1, 0x27BE},
{0x27B1, 0x27BE}, {0x27C0, 0x27E5}, {0x27EE, 0x2984}, {0x27C0, 0x27E5}, {0x27EE, 0x2984}, {0x2987, 0x2B1A},
{0x2987, 0x2B1A}, {0x2B1D, 0x2B4F}, {0x2B51, 0x2B54}, {0x2B1D, 0x2B4F}, {0x2B51, 0x2B54}, {0x2B5A, 0x2B73},
{0x2B5A, 0x2B73}, {0x2B76, 0x2B95}, {0x2B98, 0x2C2E}, {0x2B76, 0x2B95}, {0x2B97, 0x2C2E}, {0x2C30, 0x2C5E},
{0x2C30, 0x2C5E}, {0x2C60, 0x2CF3}, {0x2CF9, 0x2D25}, {0x2C60, 0x2CF3}, {0x2CF9, 0x2D25}, {0x2D27, 0x2D27},
{0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D70},
{0x2D6F, 0x2D70}, {0x2D7F, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2D7F, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE},
{0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6},
{0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE},
{0x2DD8, 0x2DDE}, {0x2DE0, 0x2E4F}, {0x303F, 0x303F}, {0x2DE0, 0x2E52}, {0x303F, 0x303F}, {0x4DC0, 0x4DFF},
{0x4DC0, 0x4DFF}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7}, {0xA700, 0xA7BF},
{0xA700, 0xA7BF}, {0xA7C2, 0xA7C6}, {0xA7F7, 0xA82B}, {0xA7C2, 0xA7CA}, {0xA7F5, 0xA82C}, {0xA830, 0xA839},
{0xA830, 0xA839}, {0xA840, 0xA877}, {0xA880, 0xA8C5}, {0xA840, 0xA877}, {0xA880, 0xA8C5}, {0xA8CE, 0xA8D9},
{0xA8CE, 0xA8D9}, {0xA8E0, 0xA953}, {0xA95F, 0xA95F}, {0xA8E0, 0xA953}, {0xA95F, 0xA95F}, {0xA980, 0xA9CD},
{0xA980, 0xA9CD}, {0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xAA00, 0xAA36},
{0xAA00, 0xAA36}, {0xAA40, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA40, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA5C, 0xAAC2},
{0xAA5C, 0xAAC2}, {0xAADB, 0xAAF6}, {0xAB01, 0xAB06}, {0xAADB, 0xAAF6}, {0xAB01, 0xAB06}, {0xAB09, 0xAB0E},
{0xAB09, 0xAB0E}, {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E},
{0xAB28, 0xAB2E}, {0xAB30, 0xAB67}, {0xAB70, 0xABED}, {0xAB30, 0xAB6B}, {0xAB70, 0xABED}, {0xABF0, 0xABF9},
{0xABF0, 0xABF9}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, {0xD800, 0xDFFF},
{0xD800, 0xDFFF}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB36},
{0xFB1D, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41},
{0xFB40, 0xFB41}, {0xFB43, 0xFB44}, {0xFB46, 0xFBC1}, {0xFB43, 0xFB44}, {0xFB46, 0xFBC1}, {0xFBD3, 0xFD3F},
{0xFBD3, 0xFD3F}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFD},
{0xFDF0, 0xFDFD}, {0xFE20, 0xFE2F}, {0xFE70, 0xFE74}, {0xFE20, 0xFE2F}, {0xFE70, 0xFE74}, {0xFE76, 0xFEFC},
{0xFE76, 0xFEFC}, {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFC}, {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFC}, {0x10000, 0x1000B},
{0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D},
{0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x10080, 0x100FA},
{0x10080, 0x100FA}, {0x10100, 0x10102}, {0x10107, 0x10133}, {0x10100, 0x10102}, {0x10107, 0x10133}, {0x10137, 0x1018E},
{0x10137, 0x1018E}, {0x10190, 0x1019B}, {0x101A0, 0x101A0}, {0x10190, 0x1019C}, {0x101A0, 0x101A0}, {0x101D0, 0x101FD},
{0x101D0, 0x101FD}, {0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x102E0, 0x102FB},
{0x102E0, 0x102FB}, {0x10300, 0x10323}, {0x1032D, 0x1034A}, {0x10300, 0x10323}, {0x1032D, 0x1034A}, {0x10350, 0x1037A},
{0x10350, 0x1037A}, {0x10380, 0x1039D}, {0x1039F, 0x103C3}, {0x10380, 0x1039D}, {0x1039F, 0x103C3}, {0x103C8, 0x103D5},
{0x103C8, 0x103D5}, {0x10400, 0x1049D}, {0x104A0, 0x104A9}, {0x10400, 0x1049D}, {0x104A0, 0x104A9}, {0x104B0, 0x104D3},
{0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563},
{0x10530, 0x10563}, {0x1056F, 0x1056F}, {0x10600, 0x10736}, {0x1056F, 0x1056F}, {0x10600, 0x10736}, {0x10740, 0x10755},
{0x10740, 0x10755}, {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808},
{0x10808, 0x10808}, {0x1080A, 0x10835}, {0x10837, 0x10838}, {0x1080A, 0x10835}, {0x10837, 0x10838}, {0x1083C, 0x1083C},
{0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10857, 0x1089E}, {0x1083F, 0x10855}, {0x10857, 0x1089E}, {0x108A7, 0x108AF},
{0x108A7, 0x108AF}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x108FB, 0x1091B},
{0x108FB, 0x1091B}, {0x1091F, 0x10939}, {0x1093F, 0x1093F}, {0x1091F, 0x10939}, {0x1093F, 0x1093F}, {0x10980, 0x109B7},
{0x10980, 0x109B7}, {0x109BC, 0x109CF}, {0x109D2, 0x10A03}, {0x109BC, 0x109CF}, {0x109D2, 0x10A03}, {0x10A05, 0x10A06},
{0x10A05, 0x10A06}, {0x10A0C, 0x10A13}, {0x10A15, 0x10A17}, {0x10A0C, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35},
{0x10A19, 0x10A35}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A48}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A48}, {0x10A50, 0x10A58},
{0x10A50, 0x10A58}, {0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6}, {0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6}, {0x10AEB, 0x10AF6},
{0x10AEB, 0x10AF6}, {0x10B00, 0x10B35}, {0x10B39, 0x10B55}, {0x10B00, 0x10B35}, {0x10B39, 0x10B55}, {0x10B58, 0x10B72},
{0x10B58, 0x10B72}, {0x10B78, 0x10B91}, {0x10B99, 0x10B9C}, {0x10B78, 0x10B91}, {0x10B99, 0x10B9C}, {0x10BA9, 0x10BAF},
{0x10BA9, 0x10BAF}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2},
{0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27}, {0x10D30, 0x10D39}, {0x10CFA, 0x10D27}, {0x10D30, 0x10D39}, {0x10E60, 0x10E7E},
{0x10E60, 0x10E7E}, {0x10F00, 0x10F27}, {0x10F30, 0x10F59}, {0x10E80, 0x10EA9}, {0x10EAB, 0x10EAD}, {0x10EB0, 0x10EB1},
{0x10F00, 0x10F27}, {0x10F30, 0x10F59}, {0x10FB0, 0x10FCB},
{0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x1106F}, {0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x1106F},
{0x1107F, 0x110C1}, {0x110CD, 0x110CD}, {0x110D0, 0x110E8}, {0x1107F, 0x110C1}, {0x110CD, 0x110CD}, {0x110D0, 0x110E8},
{0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11146}, {0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11147},
{0x11150, 0x11176}, {0x11180, 0x111CD}, {0x111D0, 0x111DF}, {0x11150, 0x11176}, {0x11180, 0x111DF}, {0x111E1, 0x111F4},
{0x111E1, 0x111F4}, {0x11200, 0x11211}, {0x11213, 0x1123E}, {0x11200, 0x11211}, {0x11213, 0x1123E}, {0x11280, 0x11286},
{0x11280, 0x11286}, {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D},
{0x1128F, 0x1129D}, {0x1129F, 0x112A9}, {0x112B0, 0x112EA}, {0x1129F, 0x112A9}, {0x112B0, 0x112EA}, {0x112F0, 0x112F9},
{0x112F0, 0x112F9}, {0x11300, 0x11303}, {0x11305, 0x1130C}, {0x11300, 0x11303}, {0x11305, 0x1130C}, {0x1130F, 0x11310},
{0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333},
{0x11332, 0x11333}, {0x11335, 0x11339}, {0x1133B, 0x11344}, {0x11335, 0x11339}, {0x1133B, 0x11344}, {0x11347, 0x11348},
{0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11350, 0x11350}, {0x1134B, 0x1134D}, {0x11350, 0x11350}, {0x11357, 0x11357},
{0x11357, 0x11357}, {0x1135D, 0x11363}, {0x11366, 0x1136C}, {0x1135D, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374},
{0x11370, 0x11374}, {0x11400, 0x11459}, {0x1145B, 0x1145B}, {0x11400, 0x1145B}, {0x1145D, 0x11461}, {0x11480, 0x114C7},
{0x1145D, 0x1145F}, {0x11480, 0x114C7}, {0x114D0, 0x114D9}, {0x114D0, 0x114D9}, {0x11580, 0x115B5}, {0x115B8, 0x115DD},
{0x11580, 0x115B5}, {0x115B8, 0x115DD}, {0x11600, 0x11644}, {0x11600, 0x11644}, {0x11650, 0x11659}, {0x11660, 0x1166C},
{0x11650, 0x11659}, {0x11660, 0x1166C}, {0x11680, 0x116B8}, {0x11680, 0x116B8}, {0x116C0, 0x116C9}, {0x11700, 0x1171A},
{0x116C0, 0x116C9}, {0x11700, 0x1171A}, {0x1171D, 0x1172B}, {0x1171D, 0x1172B}, {0x11730, 0x1173F}, {0x11800, 0x1183B},
{0x11730, 0x1173F}, {0x11800, 0x1183B}, {0x118A0, 0x118F2}, {0x118A0, 0x118F2}, {0x118FF, 0x11906}, {0x11909, 0x11909},
{0x118FF, 0x118FF}, {0x119A0, 0x119A7}, {0x119AA, 0x119D7}, {0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x11935},
{0x119DA, 0x119E4}, {0x11A00, 0x11A47}, {0x11A50, 0x11AA2}, {0x11937, 0x11938}, {0x1193B, 0x11946}, {0x11950, 0x11959},
{0x11AC0, 0x11AF8}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C36}, {0x119A0, 0x119A7}, {0x119AA, 0x119D7}, {0x119DA, 0x119E4},
{0x11C38, 0x11C45}, {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F}, {0x11A00, 0x11A47}, {0x11A50, 0x11AA2}, {0x11AC0, 0x11AF8},
{0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C36}, {0x11C38, 0x11C45},
{0x11D08, 0x11D09}, {0x11D0B, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F}, {0x11C92, 0x11CA7},
{0x11D3C, 0x11D3D}, {0x11D3F, 0x11D47}, {0x11D50, 0x11D59}, {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06}, {0x11D08, 0x11D09},
{0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E}, {0x11D0B, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D},
{0x11D90, 0x11D91}, {0x11D93, 0x11D98}, {0x11DA0, 0x11DA9}, {0x11D3F, 0x11D47}, {0x11D50, 0x11D59}, {0x11D60, 0x11D65},
{0x11EE0, 0x11EF8}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E}, {0x11D90, 0x11D91},
{0x11D93, 0x11D98}, {0x11DA0, 0x11DA9}, {0x11EE0, 0x11EF8},
{0x11FB0, 0x11FB0}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399},
{0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543}, {0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543},
{0x13000, 0x1342E}, {0x13430, 0x13438}, {0x14400, 0x14646}, {0x13000, 0x1342E}, {0x13430, 0x13438}, {0x14400, 0x14646},
{0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A60, 0x16A69}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A60, 0x16A69},
@ -382,20 +389,22 @@ var neutral = table{
{0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, {0x1EEF0, 0x1EEF1}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, {0x1EEF0, 0x1EEF1},
{0x1F000, 0x1F003}, {0x1F005, 0x1F02B}, {0x1F030, 0x1F093}, {0x1F000, 0x1F003}, {0x1F005, 0x1F02B}, {0x1F030, 0x1F093},
{0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF}, {0x1F0C1, 0x1F0CE}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF}, {0x1F0C1, 0x1F0CE},
{0x1F0D1, 0x1F0F5}, {0x1F10B, 0x1F10C}, {0x1F12E, 0x1F12F}, {0x1F0D1, 0x1F0F5}, {0x1F10B, 0x1F10F}, {0x1F12E, 0x1F12F},
{0x1F16A, 0x1F16C}, {0x1F1E6, 0x1F1FF}, {0x1F321, 0x1F32C}, {0x1F16A, 0x1F16F}, {0x1F1AD, 0x1F1AD}, {0x1F1E6, 0x1F1FF},
{0x1F336, 0x1F336}, {0x1F37D, 0x1F37D}, {0x1F394, 0x1F39F}, {0x1F321, 0x1F32C}, {0x1F336, 0x1F336}, {0x1F37D, 0x1F37D},
{0x1F3CB, 0x1F3CE}, {0x1F3D4, 0x1F3DF}, {0x1F3F1, 0x1F3F3}, {0x1F394, 0x1F39F}, {0x1F3CB, 0x1F3CE}, {0x1F3D4, 0x1F3DF},
{0x1F3F5, 0x1F3F7}, {0x1F43F, 0x1F43F}, {0x1F441, 0x1F441}, {0x1F3F1, 0x1F3F3}, {0x1F3F5, 0x1F3F7}, {0x1F43F, 0x1F43F},
{0x1F4FD, 0x1F4FE}, {0x1F53E, 0x1F54A}, {0x1F54F, 0x1F54F}, {0x1F441, 0x1F441}, {0x1F4FD, 0x1F4FE}, {0x1F53E, 0x1F54A},
{0x1F568, 0x1F579}, {0x1F57B, 0x1F594}, {0x1F597, 0x1F5A3}, {0x1F54F, 0x1F54F}, {0x1F568, 0x1F579}, {0x1F57B, 0x1F594},
{0x1F5A5, 0x1F5FA}, {0x1F650, 0x1F67F}, {0x1F6C6, 0x1F6CB}, {0x1F597, 0x1F5A3}, {0x1F5A5, 0x1F5FA}, {0x1F650, 0x1F67F},
{0x1F6CD, 0x1F6CF}, {0x1F6D3, 0x1F6D4}, {0x1F6E0, 0x1F6EA}, {0x1F6C6, 0x1F6CB}, {0x1F6CD, 0x1F6CF}, {0x1F6D3, 0x1F6D4},
{0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F773}, {0x1F780, 0x1F7D8}, {0x1F6E0, 0x1F6EA}, {0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F773},
{0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859}, {0x1F780, 0x1F7D8}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847},
{0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F900, 0x1F90B}, {0x1F850, 0x1F859}, {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD},
{0x1FA00, 0x1FA53}, {0x1FA60, 0x1FA6D}, {0xE0001, 0xE0001}, {0x1F8B0, 0x1F8B1}, {0x1F900, 0x1F90B}, {0x1F93B, 0x1F93B},
{0xE0020, 0xE007F}, {0x1F946, 0x1F946}, {0x1FA00, 0x1FA53}, {0x1FA60, 0x1FA6D},
{0x1FB00, 0x1FB92}, {0x1FB94, 0x1FBCA}, {0x1FBF0, 0x1FBF9},
{0xE0001, 0xE0001}, {0xE0020, 0xE007F},
} }
var emoji = table{ var emoji = table{
@ -423,5 +432,6 @@ var emoji = table{
{0x1F546, 0x1F64F}, {0x1F680, 0x1F6FF}, {0x1F774, 0x1F77F}, {0x1F546, 0x1F64F}, {0x1F680, 0x1F6FF}, {0x1F774, 0x1F77F},
{0x1F7D5, 0x1F7FF}, {0x1F80C, 0x1F80F}, {0x1F848, 0x1F84F}, {0x1F7D5, 0x1F7FF}, {0x1F80C, 0x1F80F}, {0x1F848, 0x1F84F},
{0x1F85A, 0x1F85F}, {0x1F888, 0x1F88F}, {0x1F8AE, 0x1F8FF}, {0x1F85A, 0x1F85F}, {0x1F888, 0x1F88F}, {0x1F8AE, 0x1F8FF},
{0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1FFFD}, {0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1FAFF},
{0x1FC00, 0x1FFFD},
} }

View file

@ -24,3 +24,8 @@ _testmain.go
*.prof *.prof
.DS_STORE .DS_STORE
cmd/ftest/ftest
# Goland specific files
.idea

View file

@ -1,16 +1,24 @@
language: go language: go
env:
global:
- GO111MODULE="on"
go: go:
- 1.11.x
- 1.12.x
- 1.13.x
- tip - tip
- 1.6
- 1.5 matrix:
- 1.4 allow_failures:
install: - go: tip
- go get -t -v ./... fast_finish: true
- go get github.com/go-playground/overalls
before_install:
- go get github.com/mattn/goveralls - go get github.com/mattn/goveralls
- go get golang.org/x/tools/cmd/cover
script: script:
- go install ./...
- go test -v ./... - go test -v ./...
- $GOPATH/bin/overalls -project=github.com/mmcdole/gofeed -covermode=count -ignore=.git,vendor -debug - $GOPATH/bin/goveralls -service=travis-ci
after_success:
- $GOPATH/bin/goveralls -coverprofile=overalls.coverprofile -service=travis-ci

View file

@ -2,9 +2,23 @@
[![Build Status](https://travis-ci.org/mmcdole/gofeed.svg?branch=master)](https://travis-ci.org/mmcdole/gofeed) [![Coverage Status](https://coveralls.io/repos/github/mmcdole/gofeed/badge.svg?branch=master)](https://coveralls.io/github/mmcdole/gofeed?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/mmcdole/gofeed)](https://goreportcard.com/report/github.com/mmcdole/gofeed) [![](https://godoc.org/github.com/mmcdole/gofeed?status.svg)](http://godoc.org/github.com/mmcdole/gofeed) [![License](http://img.shields.io/:license-mit-blue.svg)](http://doge.mit-license.org) [![Build Status](https://travis-ci.org/mmcdole/gofeed.svg?branch=master)](https://travis-ci.org/mmcdole/gofeed) [![Coverage Status](https://coveralls.io/repos/github/mmcdole/gofeed/badge.svg?branch=master)](https://coveralls.io/github/mmcdole/gofeed?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/mmcdole/gofeed)](https://goreportcard.com/report/github.com/mmcdole/gofeed) [![](https://godoc.org/github.com/mmcdole/gofeed?status.svg)](http://godoc.org/github.com/mmcdole/gofeed) [![License](http://img.shields.io/:license-mit-blue.svg)](http://doge.mit-license.org)
The `gofeed` library is a robust feed parser that supports parsing both [RSS](https://en.wikipedia.org/wiki/RSS) and [Atom](https://en.wikipedia.org/wiki/Atom_(standard)) feeds. The universal `gofeed.Parser` will parse and convert all feed types into a hybrid `gofeed.Feed` model. You also have the option of parsing them into their respective `atom.Feed` and `rss.Feed` models using the feed specific `atom.Parser` or `rss.Parser`. The `gofeed` library is a robust feed parser that supports parsing both [RSS](https://en.wikipedia.org/wiki/RSS) and [Atom](https://en.wikipedia.org/wiki/Atom_(standard)) feeds. The library provides a universal `gofeed.Parser` that will parse and convert all feed types into a hybrid `gofeed.Feed` model. You also have the option of utilizing the feed specific `atom.Parser` or `rss.Parser` parsers which generate `atom.Feed` and `rss.Feed` respectively.
##### Supported feed types: ## Table of Contents
- [Features](#features)
- [Overview](#overview)
- [Basic Usage](#basic-usage)
- [Advanced Usage](#advanced-usage)
- [Extensions](#extensions)
- [Invalid Feeds](#invalid-feeds)
- [Default Mappings](#default-mappings)
- [Dependencies](#dependencies)
- [License](#license)
- [Credits](#credits)
## Features
#### Supported feed types:
* RSS 0.90 * RSS 0.90
* Netscape RSS 0.91 * Netscape RSS 0.91
* Userland RSS 0.91 * Userland RSS 0.91
@ -16,22 +30,26 @@ The `gofeed` library is a robust feed parser that supports parsing both [RSS](ht
* Atom 0.3 * Atom 0.3
* Atom 1.0 * Atom 1.0
It also provides support for parsing several popular predefined extension modules, including [Dublin Core](http://dublincore.org/documents/dces/) and [Apples iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390), as well as arbitrary extensions. See the [Extensions](#extensions) section for more details. #### Extension Support
## Table of Contents The `gofeed` library provides support for parsing several popular predefined extensions into ready-made structs, including [Dublin Core](http://dublincore.org/documents/dces/) and [Apples iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390).
- [Overview](#overview)
- [Basic Usage](#basic-usage) It parses all other feed extensions in a generic way (see the [Extensions](#extensions) section for more details).
- [Advanced Usage](#advanced-usage)
- [Extensions](#extensions) #### Invalid Feeds
- [Invalid Feeds](#invalid-feeds)
- [Default Mappings](#default-mappings) A best-effort attempt is made at parsing broken and invalid XML feeds. Currently, `gofeed` can succesfully parse feeds with the following issues:
- [Dependencies](#dependencies) - Unescaped/Naked Markup in feed elements
- [License](#license) - Undeclared namespace prefixes
- [Donate](#donate) - Missing closing tags on certain elements
- [Credits](#credits) - Illegal tags within feed elements without namespace prefixes
- Missing "required" elements as specified by the respective feed specs.
- Incorrect date formats
## Overview ## Overview
The `gofeed` library is comprised of a universal feed parser and several feed specific parsers. Which one you choose depends entirely on your usecase. If you will be handling both rss and atom feeds then it makes sense to use the `gofeed.Parser`. If you know ahead of time that you will only be parsing one feed type then it would make sense to use `rss.Parser` or `atom.Parser`.
#### Universal Feed Parser #### Universal Feed Parser
The universal `gofeed.Parser` works in 3 stages: detection, parsing and translation. It first detects the feed type that it is currently parsing. Then it uses a feed specific parser to parse the feed into its true representation which will be either a `rss.Feed` or `atom.Feed`. These models cover every field possible for their respective feed types. Finally, they are *translated* into a `gofeed.Feed` model that is a hybrid of both feed types. Performing the universal feed parsing in these 3 stages allows for more flexibility and keeps the code base more maintainable by separating RSS and Atom parsing into seperate packages. The universal `gofeed.Parser` works in 3 stages: detection, parsing and translation. It first detects the feed type that it is currently parsing. Then it uses a feed specific parser to parse the feed into its true representation which will be either a `rss.Feed` or `atom.Feed`. These models cover every field possible for their respective feed types. Finally, they are *translated* into a `gofeed.Feed` model that is a hybrid of both feed types. Performing the universal feed parsing in these 3 stages allows for more flexibility and keeps the code base more maintainable by separating RSS and Atom parsing into seperate packages.
@ -44,8 +62,6 @@ The translation step is done by anything which adheres to the `gofeed.Translator
The `gofeed` library provides two feed specific parsers: `atom.Parser` and `rss.Parser`. If the hybrid `gofeed.Feed` model that the universal `gofeed.Parser` produces does not contain a field from the `atom.Feed` or `rss.Feed` model that you require, it might be beneficial to use the feed specific parsers. When using the `atom.Parser` or `rss.Parser` directly, you can access all of fields found in the `atom.Feed` and `rss.Feed` models. It is also marginally faster because you are able to skip the translation step. The `gofeed` library provides two feed specific parsers: `atom.Parser` and `rss.Parser`. If the hybrid `gofeed.Feed` model that the universal `gofeed.Parser` produces does not contain a field from the `atom.Feed` or `rss.Feed` model that you require, it might be beneficial to use the feed specific parsers. When using the `atom.Parser` or `rss.Parser` directly, you can access all of fields found in the `atom.Feed` and `rss.Feed` models. It is also marginally faster because you are able to skip the translation step.
However, for the *vast* majority of users, the universal `gofeed.Parser` is the best way to parse feeds. This allows the user of `gofeed` library to not care about the differences between RSS or Atom feeds.
## Basic Usage ## Basic Usage
#### Universal Feed Parser #### Universal Feed Parser
@ -83,6 +99,16 @@ feed, _ := fp.Parse(file)
fmt.Println(feed.Title) fmt.Println(feed.Title)
``` ```
##### Parse a feed from an URL with a 60s timeout:
```go
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
fp := gofeed.NewParser()
feed, _ := fp.ParseURLWithContext("http://feeds.twit.tv/twit.xml", ctx)
fmt.Println(feed.Title)
```
#### Feed Specific Parsers #### Feed Specific Parsers
You can easily use the `rss.Parser` and `atom.Parser` directly if you have a usage scenario that requires it: You can easily use the `rss.Parser` and `atom.Parser` directly if you have a usage scenario that requires it:
@ -185,16 +211,6 @@ Every element which does not belong to the feed's default namespace is considere
In addition to the generic handling of extensions, `gofeed` also has built in support for parsing certain popular extensions into their own structs for convenience. It currently supports the [Dublin Core](http://dublincore.org/documents/dces/) and [Apple iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390) extensions which you can access at `Feed.ItunesExt`, `feed.DublinCoreExt` and `Item.ITunesExt` and `Item.DublinCoreExt` In addition to the generic handling of extensions, `gofeed` also has built in support for parsing certain popular extensions into their own structs for convenience. It currently supports the [Dublin Core](http://dublincore.org/documents/dces/) and [Apple iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390) extensions which you can access at `Feed.ItunesExt`, `feed.DublinCoreExt` and `Item.ITunesExt` and `Item.DublinCoreExt`
## Invalid Feeds
A best-effort attempt is made at parsing broken and invalid XML feeds. Currently, `gofeed` can succesfully parse feeds with the following issues:
- Unescaped/Naked Markup in feed elements
- Undeclared namespace prefixes
- Missing closing tags on certain elements
- Illegal tags within feed elements without namespace prefixes
- Missing "required" elements as specified by the respective feed specs.
- Incorrect date formats
## Default Mappings ## Default Mappings
The ```DefaultRSSTranslator``` and the ```DefaultAtomTranslator``` map the following ```rss.Feed``` and ```atom.Feed``` fields to their respective ```gofeed.Feed``` fields. They are listed in order of precedence (highest to lowest): The ```DefaultRSSTranslator``` and the ```DefaultAtomTranslator``` map the following ```rss.Feed``` and ```atom.Feed``` fields to their respective ```gofeed.Feed``` fields. They are listed in order of precedence (highest to lowest):
@ -220,12 +236,12 @@ Categories | /rss/channel/category<br>/rss/channel/itunes:category<br>/rss/chann
--- | --- | --- --- | --- | ---
Title | /rss/channel/item/title<br>/rdf:RDF/item/title<br>/rdf:RDF/item/dc:title<br>/rss/channel/item/dc:title | /feed/entry/title Title | /rss/channel/item/title<br>/rdf:RDF/item/title<br>/rdf:RDF/item/dc:title<br>/rss/channel/item/dc:title | /feed/entry/title
Description | /rss/channel/item/description<br>/rdf:RDF/item/description<br>/rss/channel/item/dc:description<br>/rdf:RDF/item/dc:description | /feed/entry/summary Description | /rss/channel/item/description<br>/rdf:RDF/item/description<br>/rss/channel/item/dc:description<br>/rdf:RDF/item/dc:description | /feed/entry/summary
Content | | /feed/entry/content Content | /rss/channel/item/content:encoded | /feed/entry/content
Link | /rss/channel/item/link<br>/rdf:RDF/item/link | /feed/entry/link[@rel=”alternate”]/@href<br>/feed/entry/link[not(@rel)]/@href Link | /rss/channel/item/link<br>/rdf:RDF/item/link | /feed/entry/link[@rel=”alternate”]/@href<br>/feed/entry/link[not(@rel)]/@href
Updated | /rss/channel/item/dc:date<br>/rdf:RDF/rdf:item/dc:date | /feed/entry/modified<br>/feed/entry/updated Updated | /rss/channel/item/dc:date<br>/rdf:RDF/rdf:item/dc:date | /feed/entry/modified<br>/feed/entry/updated
Published | /rss/channel/item/pubDate<br>/rss/channel/item/dc:date | /feed/entry/published<br>/feed/entry/issued Published | /rss/channel/item/pubDate<br>/rss/channel/item/dc:date | /feed/entry/published<br>/feed/entry/issued
Author | /rss/channel/item/author<br>/rss/channel/item/dc:author<br>/rdf:RDF/item/dc:author<br>/rss/channel/item/dc:creator<br>/rdf:RDF/item/dc:creator<br>/rss/channel/item/itunes:author | /feed/entry/author Author | /rss/channel/item/author<br>/rss/channel/item/dc:author<br>/rdf:RDF/item/dc:author<br>/rss/channel/item/dc:creator<br>/rdf:RDF/item/dc:creator<br>/rss/channel/item/itunes:author | /feed/entry/author
Guid | /rss/channel/item/guid | /feed/entry/id GUID | /rss/channel/item/guid | /feed/entry/id
Image | /rss/channel/item/itunes:image<br>/rss/channel/item/media:image | Image | /rss/channel/item/itunes:image<br>/rss/channel/item/media:image |
Categories | /rss/channel/item/category<br>/rss/channel/item/dc:subject<br>/rss/channel/item/itunes:keywords<br>/rdf:RDF/channel/item/dc:subject | /feed/entry/category Categories | /rss/channel/item/category<br>/rss/channel/item/dc:subject<br>/rss/channel/item/itunes:keywords<br>/rdf:RDF/channel/item/dc:subject | /feed/entry/category
Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”] Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”]
@ -240,14 +256,9 @@ Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”
This project is licensed under the [MIT License](https://raw.githubusercontent.com/mmcdole/gofeed/master/LICENSE) This project is licensed under the [MIT License](https://raw.githubusercontent.com/mmcdole/gofeed/master/LICENSE)
## Donate
I write open source software for fun. However, if you want to buy me a beer because you found something I wrote useful, feel free!
Bitcoin: 1CXrjBBkxgVNgKXRAq5MnsR7zzZbHvUHkJ
## Credits ## Credits
* [cristoper](https://github.com/cristoper) for his work on implementing xml:base relative URI handling.
* [Mark Pilgrim](https://en.wikipedia.org/wiki/Mark_Pilgrim) and [Kurt McKee](http://kurtmckee.org) for their work on the excellent [Universal Feed Parser](https://github.com/kurtmckee/feedparser) Python library. This library was the inspiration for the `gofeed` library. * [Mark Pilgrim](https://en.wikipedia.org/wiki/Mark_Pilgrim) and [Kurt McKee](http://kurtmckee.org) for their work on the excellent [Universal Feed Parser](https://github.com/kurtmckee/feedparser) Python library. This library was the inspiration for the `gofeed` library.
* [Dan MacTough](http://blog.mact.me) for his work on [node-feedparser](https://github.com/danmactough/node-feedparser). It provided inspiration for the set of fields that should be covered in the hybrid `gofeed.Feed` model. * [Dan MacTough](http://blog.mact.me) for his work on [node-feedparser](https://github.com/danmactough/node-feedparser). It provided inspiration for the set of fields that should be covered in the hybrid `gofeed.Feed` model.
* [Matt Jibson](https://mattjibson.com/) for his date parsing function in the [goread](https://github.com/mjibson/goread) project. * [Matt Jibson](https://mattjibson.com/) for his date parsing function in the [goread](https://github.com/mjibson/goread) project.

View file

@ -6,19 +6,43 @@ import (
"strings" "strings"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"github.com/mmcdole/gofeed/extensions" ext "github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/gofeed/internal/shared" "github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/goxpp" xpp "github.com/mmcdole/goxpp"
)
var (
// Atom elements which contain URIs
// https://tools.ietf.org/html/rfc4287
uriElements = map[string]bool{
"icon": true,
"id": true,
"logo": true,
"uri": true,
"url": true, // atom 0.3
}
// Atom attributes which contain URIs
// https://tools.ietf.org/html/rfc4287
atomURIAttrs = map[string]bool{
"href": true,
"scheme": true,
"src": true,
"uri": true,
}
) )
// Parser is an Atom Parser // Parser is an Atom Parser
type Parser struct{} type Parser struct {
base *shared.XMLBase
}
// Parse parses an xml feed into an atom.Feed // Parse parses an xml feed into an atom.Feed
func (ap *Parser) Parse(feed io.Reader) (*Feed, error) { func (ap *Parser) Parse(feed io.Reader) (*Feed, error) {
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel) p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
ap.base = &shared.XMLBase{URIAttrs: atomURIAttrs}
_, err := shared.FindRoot(p) _, err := ap.base.FindRoot(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -43,7 +67,7 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
extensions := ext.Extensions{} extensions := ext.Extensions{}
for { for {
tok, err := shared.NextTag(p) tok, err := ap.base.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -197,7 +221,7 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
extensions := ext.Extensions{} extensions := ext.Extensions{}
for { for {
tok, err := shared.NextTag(p) tok, err := ap.base.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -352,7 +376,7 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {
extensions := ext.Extensions{} extensions := ext.Extensions{}
for { for {
tok, err := shared.NextTag(p) tok, err := ap.base.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -510,7 +534,7 @@ func (ap *Parser) parsePerson(name string, p *xpp.XMLPullParser) (*Person, error
person := &Person{} person := &Person{}
for { for {
tok, err := shared.NextTag(p) tok, err := ap.base.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -654,31 +678,46 @@ func (ap *Parser) parseAtomText(p *xpp.XMLPullParser) (string, error) {
result := text.InnerXML result := text.InnerXML
result = strings.TrimSpace(result) result = strings.TrimSpace(result)
if strings.HasPrefix(result, "<![CDATA[") &&
strings.HasSuffix(result, "]]>") {
result = strings.TrimPrefix(result, "<![CDATA[")
result = strings.TrimSuffix(result, "]]>")
return result, nil
}
lowerType := strings.ToLower(text.Type) lowerType := strings.ToLower(text.Type)
lowerMode := strings.ToLower(text.Mode) lowerMode := strings.ToLower(text.Mode)
if strings.Contains(result, "<![CDATA[") {
result = shared.StripCDATA(result)
if lowerType == "html" || strings.Contains(lowerType, "xhtml") {
result, _ = ap.base.ResolveHTML(result)
}
} else {
// decode non-CDATA contents depending on type
if lowerType == "text" || if lowerType == "text" ||
strings.HasPrefix(lowerType, "text/") || strings.HasPrefix(lowerType, "text/") ||
(lowerType == "" && lowerMode == "") { (lowerType == "" && lowerMode == "") {
result, err = shared.DecodeEntities(result) result, err = shared.DecodeEntities(result)
} else if strings.Contains(lowerType, "xhtml") { } else if strings.Contains(lowerType, "xhtml") {
result = ap.stripWrappingDiv(result) result = ap.stripWrappingDiv(result)
result, _ = ap.base.ResolveHTML(result)
} else if lowerType == "html" { } else if lowerType == "html" {
result = ap.stripWrappingDiv(result) result = ap.stripWrappingDiv(result)
result, err = shared.DecodeEntities(result) result, err = shared.DecodeEntities(result)
if err == nil {
result, _ = ap.base.ResolveHTML(result)
}
} else { } else {
decodedStr, err := base64.StdEncoding.DecodeString(result) decodedStr, err := base64.StdEncoding.DecodeString(result)
if err == nil { if err == nil {
result = string(decodedStr) result = string(decodedStr)
} }
} }
}
// resolve relative URIs in URI-containing elements according to xml:base
name := strings.ToLower(p.Name)
if uriElements[name] {
resolved, err := ap.base.ResolveURL(result)
if err == nil {
result = resolved
}
}
return result, err return result, err
} }

View file

@ -5,7 +5,7 @@ import (
"strings" "strings"
"github.com/mmcdole/gofeed/internal/shared" "github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/goxpp" xpp "github.com/mmcdole/goxpp"
) )
// FeedType represents one of the possible feed // FeedType represents one of the possible feed
@ -28,7 +28,8 @@ const (
func DetectFeedType(feed io.Reader) FeedType { func DetectFeedType(feed io.Reader) FeedType {
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel) p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
_, err := shared.FindRoot(p) xmlBase := shared.XMLBase{}
_, err := xmlBase.FindRoot(p)
if err != nil { if err != nil {
return FeedTypeUnknown return FeedTypeUnknown
} }

View file

@ -14,6 +14,7 @@ type ITunesFeedExtension struct {
Image string `json:"image,omitempty"` Image string `json:"image,omitempty"`
Complete string `json:"complete,omitempty"` Complete string `json:"complete,omitempty"`
NewFeedURL string `json:"newFeedUrl,omitempty"` NewFeedURL string `json:"newFeedUrl,omitempty"`
Type string `json:"type,omitempty"`
} }
// ITunesItemExtension is a set of extension // ITunesItemExtension is a set of extension
@ -28,7 +29,10 @@ type ITunesItemExtension struct {
Summary string `json:"summary,omitempty"` Summary string `json:"summary,omitempty"`
Image string `json:"image,omitempty"` Image string `json:"image,omitempty"`
IsClosedCaptioned string `json:"isClosedCaptioned,omitempty"` IsClosedCaptioned string `json:"isClosedCaptioned,omitempty"`
Episode string `json:"episode,omitempty"`
Season string `json:"season,omitempty"`
Order string `json:"order,omitempty"` Order string `json:"order,omitempty"`
EpisodeType string `json:"episodeType,omitempty"`
} }
// ITunesCategory is a category element for itunes feeds. // ITunesCategory is a category element for itunes feeds.
@ -58,6 +62,7 @@ func NewITunesFeedExtension(extensions map[string][]Extension) *ITunesFeedExtens
feed.NewFeedURL = parseTextExtension("new-feed-url", extensions) feed.NewFeedURL = parseTextExtension("new-feed-url", extensions)
feed.Categories = parseCategories(extensions) feed.Categories = parseCategories(extensions)
feed.Owner = parseOwner(extensions) feed.Owner = parseOwner(extensions)
feed.Type = parseTextExtension("type", extensions)
return feed return feed
} }
@ -71,9 +76,13 @@ func NewITunesItemExtension(extensions map[string][]Extension) *ITunesItemExtens
entry.Explicit = parseTextExtension("explicit", extensions) entry.Explicit = parseTextExtension("explicit", extensions)
entry.Subtitle = parseTextExtension("subtitle", extensions) entry.Subtitle = parseTextExtension("subtitle", extensions)
entry.Summary = parseTextExtension("summary", extensions) entry.Summary = parseTextExtension("summary", extensions)
entry.Keywords = parseTextExtension("keywords", extensions)
entry.Image = parseImage(extensions) entry.Image = parseImage(extensions)
entry.IsClosedCaptioned = parseTextExtension("isClosedCaptioned", extensions) entry.IsClosedCaptioned = parseTextExtension("isClosedCaptioned", extensions)
entry.Episode = parseTextExtension("episode", extensions)
entry.Season = parseTextExtension("season", extensions)
entry.Order = parseTextExtension("order", extensions) entry.Order = parseTextExtension("order", extensions)
entry.EpisodeType = parseTextExtension("episodeType", extensions)
return entry return entry
} }

View file

@ -10,6 +10,8 @@ import (
// Feed is the universal Feed type that atom.Feed // Feed is the universal Feed type that atom.Feed
// and rss.Feed gets translated to. It represents // and rss.Feed gets translated to. It represents
// a web feed. // a web feed.
// Sorting with sort.Sort will order the Items by
// oldest to newest publish time.
type Feed struct { type Feed struct {
Title string `json:"title,omitempty"` Title string `json:"title,omitempty"`
Description string `json:"description,omitempty"` Description string `json:"description,omitempty"`
@ -82,3 +84,21 @@ type Enclosure struct {
Length string `json:"length,omitempty"` Length string `json:"length,omitempty"`
Type string `json:"type,omitempty"` Type string `json:"type,omitempty"`
} }
// Len returns the length of Items.
func (f Feed) Len() int {
return len(f.Items)
}
// Less compares PublishedParsed of Items[i], Items[k]
// and returns true if Items[i] is less than Items[k].
func (f Feed) Less(i, k int) bool {
return f.Items[i].PublishedParsed.Before(
*f.Items[k].PublishedParsed,
)
}
// Swap swaps Items[i] and Items[k].
func (f Feed) Swap(i, k int) {
f.Items[i], f.Items[k] = f.Items[k], f.Items[i]
}

12
vendor/github.com/mmcdole/gofeed/go.mod generated vendored Normal file
View file

@ -0,0 +1,12 @@
module github.com/mmcdole/gofeed
require (
github.com/PuerkitoBio/goquery v1.5.0
github.com/codegangsta/cli v1.20.0
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/stretchr/testify v1.2.2
golang.org/x/net v0.0.0-20181220203305-927f97764cc3
golang.org/x/text v0.3.0
)

20
vendor/github.com/mmcdole/gofeed/go.sum generated vendored Normal file
View file

@ -0,0 +1,20 @@
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/codegangsta/cli v1.20.0 h1:iX1FXEgwzd5+XN6wk5cVHOGQj6Q3Dcp20lUeS4lHNTw=
github.com/codegangsta/cli v1.20.0/go.mod h1:/qJNoX69yVSKu5o4jLyXAENLRyk1uhi7zkbQ3slBdOA=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181220203305-927f97764cc3 h1:eH6Eip3UpmR+yM/qI9Ijluzb1bNv/cAU/n+6l8tRSis=
golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

View file

@ -18,47 +18,27 @@ var dateFormats = []string{
time.RFC1123, time.RFC1123,
time.ANSIC, time.ANSIC,
"Mon, January 2 2006 15:04:05 -0700", "Mon, January 2 2006 15:04:05 -0700",
"Mon, January 02, 2006, 15:04:05 MST",
"Mon, January 02, 2006 15:04:05 MST",
"Mon, Jan 2, 2006 15:04 MST",
"Mon, Jan 2 2006 15:04 MST",
"Mon, Jan 2, 2006 15:04:05 MST",
"Mon, Jan 2 2006 15:04:05 -700", "Mon, Jan 2 2006 15:04:05 -700",
"Mon, Jan 2 2006 15:04:05 -0700", "Mon, Jan 2 2006 15:04:05 -0700",
"Mon Jan 2 15:04 2006", "Mon Jan 2 15:04 2006",
"Mon Jan 2 15:04:05 2006 MST",
"Mon Jan 02, 2006 3:04 pm", "Mon Jan 02, 2006 3:04 pm",
"Mon, Jan 02,2006 15:04:05 MST",
"Mon Jan 02 2006 15:04:05 -0700", "Mon Jan 02 2006 15:04:05 -0700",
"Monday, January 2, 2006 15:04:05 MST",
"Monday, January 2, 2006 03:04 PM", "Monday, January 2, 2006 03:04 PM",
"Monday, January 2, 2006", "Monday, January 2, 2006",
"Monday, January 02, 2006", "Monday, January 02, 2006",
"Monday, 2 January 2006 15:04:05 MST",
"Monday, 2 January 2006 15:04:05 -0700", "Monday, 2 January 2006 15:04:05 -0700",
"Monday, 2 Jan 2006 15:04:05 MST",
"Monday, 2 Jan 2006 15:04:05 -0700", "Monday, 2 Jan 2006 15:04:05 -0700",
"Monday, 02 January 2006 15:04:05 MST",
"Monday, 02 January 2006 15:04:05 -0700", "Monday, 02 January 2006 15:04:05 -0700",
"Monday, 02 January 2006 15:04:05", "Monday, 02 January 2006 15:04:05",
"Mon, 2 January 2006 15:04 MST",
"Mon, 2 January 2006, 15:04 -0700", "Mon, 2 January 2006, 15:04 -0700",
"Mon, 2 January 2006, 15:04:05 MST",
"Mon, 2 January 2006 15:04:05 MST",
"Mon, 2 January 2006 15:04:05 -0700", "Mon, 2 January 2006 15:04:05 -0700",
"Mon, 2 January 2006", "Mon, 2 January 2006",
"Mon, 2 Jan 2006 3:04:05 PM -0700", "Mon, 2 Jan 2006 3:04:05 PM -0700",
"Mon, 2 Jan 2006 15:4:5 MST",
"Mon, 2 Jan 2006 15:4:5 -0700 GMT", "Mon, 2 Jan 2006 15:4:5 -0700 GMT",
"Mon, 2, Jan 2006 15:4", "Mon, 2, Jan 2006 15:4",
"Mon, 2 Jan 2006 15:04 MST",
"Mon, 2 Jan 2006, 15:04 -0700", "Mon, 2 Jan 2006, 15:04 -0700",
"Mon, 2 Jan 2006 15:04 -0700", "Mon, 2 Jan 2006 15:04 -0700",
"Mon, 2 Jan 2006 15:04:05 UT", "Mon, 2 Jan 2006 15:04:05 UT",
"Mon, 2 Jan 2006 15:04:05MST",
"Mon, 2 Jan 2006 15:04:05 MST",
"Mon 2 Jan 2006 15:04:05 MST",
"mon,2 Jan 2006 15:04:05 MST",
"Mon, 2 Jan 2006 15:04:05 -0700 MST", "Mon, 2 Jan 2006 15:04:05 -0700 MST",
"Mon, 2 Jan 2006 15:04:05-0700", "Mon, 2 Jan 2006 15:04:05-0700",
"Mon, 2 Jan 2006 15:04:05 -0700", "Mon, 2 Jan 2006 15:04:05 -0700",
@ -66,25 +46,15 @@ var dateFormats = []string{
"Mon, 2 Jan 2006 15:04", "Mon, 2 Jan 2006 15:04",
"Mon,2 Jan 2006", "Mon,2 Jan 2006",
"Mon, 2 Jan 2006", "Mon, 2 Jan 2006",
"Mon, 2 Jan 15:04:05 MST",
"Mon, 2 Jan 06 15:04:05 MST",
"Mon, 2 Jan 06 15:04:05 -0700", "Mon, 2 Jan 06 15:04:05 -0700",
"Mon, 2006-01-02 15:04", "Mon, 2006-01-02 15:04",
"Mon,02 January 2006 14:04:05 MST",
"Mon, 02 January 2006", "Mon, 02 January 2006",
"Mon, 02 Jan 2006 3:04:05 PM MST",
"Mon, 02 Jan 2006 15 -0700", "Mon, 02 Jan 2006 15 -0700",
"Mon,02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006 15:04 -0700", "Mon, 02 Jan 2006 15:04 -0700",
"Mon, 02 Jan 2006 15:04:05 Z", "Mon, 02 Jan 2006 15:04:05 Z",
"Mon, 02 Jan 2006 15:04:05 UT", "Mon, 02 Jan 2006 15:04:05 UT",
"Mon, 02 Jan 2006 15:04:05 MST-07:00", "Mon, 02 Jan 2006 15:04:05 MST-07:00",
"Mon, 02 Jan 2006 15:04:05 MST -0700", "Mon, 02 Jan 2006 15:04:05 MST -0700",
"Mon, 02 Jan 2006, 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05MST",
"Mon, 02 Jan 2006 15:04:05 MST",
"Mon , 02 Jan 2006 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05 GMT-0700", "Mon, 02 Jan 2006 15:04:05 GMT-0700",
"Mon,02 Jan 2006 15:04:05 -0700", "Mon,02 Jan 2006 15:04:05 -0700",
"Mon, 02 Jan 2006 15:04:05 -0700", "Mon, 02 Jan 2006 15:04:05 -0700",
@ -95,30 +65,23 @@ var dateFormats = []string{
"Mon, 02 Jan 2006 15:04:05 00", "Mon, 02 Jan 2006 15:04:05 00",
"Mon, 02 Jan 2006 15:04:05", "Mon, 02 Jan 2006 15:04:05",
"Mon, 02 Jan 2006", "Mon, 02 Jan 2006",
"Mon, 02 Jan 06 15:04:05 MST",
"January 2, 2006 3:04 PM", "January 2, 2006 3:04 PM",
"January 2, 2006, 3:04 p.m.", "January 2, 2006, 3:04 p.m.",
"January 2, 2006 15:04:05 MST",
"January 2, 2006 15:04:05", "January 2, 2006 15:04:05",
"January 2, 2006 03:04 PM", "January 2, 2006 03:04 PM",
"January 2, 2006", "January 2, 2006",
"January 02, 2006 15:04:05 MST",
"January 02, 2006 15:04", "January 02, 2006 15:04",
"January 02, 2006 03:04 PM", "January 02, 2006 03:04 PM",
"January 02, 2006", "January 02, 2006",
"Jan 2, 2006 3:04:05 PM MST",
"Jan 2, 2006 3:04:05 PM", "Jan 2, 2006 3:04:05 PM",
"Jan 2, 2006 15:04:05 MST",
"Jan 2, 2006", "Jan 2, 2006",
"Jan 02 2006 03:04:05PM", "Jan 02 2006 03:04:05PM",
"Jan 02, 2006", "Jan 02, 2006",
"6/1/2 15:04", "6/1/2 15:04",
"6-1-2 15:04", "6-1-2 15:04",
"2 January 2006 15:04:05 MST",
"2 January 2006 15:04:05 -0700", "2 January 2006 15:04:05 -0700",
"2 January 2006", "2 January 2006",
"2 Jan 2006 15:04:05 Z", "2 Jan 2006 15:04:05 Z",
"2 Jan 2006 15:04:05 MST",
"2 Jan 2006 15:04:05 -0700", "2 Jan 2006 15:04:05 -0700",
"2 Jan 2006", "2 Jan 2006",
"2.1.2006 15:04:05", "2.1.2006 15:04:05",
@ -141,7 +104,6 @@ var dateFormats = []string{
"2006-01-02T15:04:05", "2006-01-02T15:04:05",
"2006-01-02 at 15:04:05", "2006-01-02 at 15:04:05",
"2006-01-02 15:04:05Z", "2006-01-02 15:04:05Z",
"2006-01-02 15:04:05 MST",
"2006-01-02 15:04:05-0700", "2006-01-02 15:04:05-0700",
"2006-01-02 15:04:05-07:00", "2006-01-02 15:04:05-07:00",
"2006-01-02 15:04:05 -0700", "2006-01-02 15:04:05 -0700",
@ -150,21 +112,15 @@ var dateFormats = []string{
"2006/01/02", "2006/01/02",
"2006-01-02", "2006-01-02",
"15:04 02.01.2006 -0700", "15:04 02.01.2006 -0700",
"1/2/2006 3:04:05 PM MST",
"1/2/2006 3:04:05 PM", "1/2/2006 3:04:05 PM",
"1/2/2006 15:04:05 MST",
"1/2/2006", "1/2/2006",
"06/1/2 15:04", "06/1/2 15:04",
"06-1-2 15:04", "06-1-2 15:04",
"02 Monday, Jan 2006 15:04", "02 Monday, Jan 2006 15:04",
"02 Jan 2006 15:04 MST",
"02 Jan 2006 15:04:05 UT", "02 Jan 2006 15:04:05 UT",
"02 Jan 2006 15:04:05 MST",
"02 Jan 2006 15:04:05 -0700", "02 Jan 2006 15:04:05 -0700",
"02 Jan 2006 15:04:05", "02 Jan 2006 15:04:05",
"02 Jan 2006", "02 Jan 2006",
"02/01/2006 15:04 MST",
"02-01-2006 15:04:05 MST",
"02.01.2006 15:04:05", "02.01.2006 15:04:05",
"02/01/2006 15:04:05", "02/01/2006 15:04:05",
"02.01.2006 15:04", "02.01.2006 15:04",
@ -173,12 +129,60 @@ var dateFormats = []string{
"02/01/2006", "02/01/2006",
"02-01-2006", "02-01-2006",
"01/02/2006 3:04 PM", "01/02/2006 3:04 PM",
"01/02/2006 15:04:05 MST",
"01/02/2006 - 15:04", "01/02/2006 - 15:04",
"01/02/2006", "01/02/2006",
"01-02-2006", "01-02-2006",
} }
// Named zone cannot be consistently loaded, so handle separately
var dateFormatsWithNamedZone = []string{
"Mon, January 02, 2006, 15:04:05 MST",
"Mon, January 02, 2006 15:04:05 MST",
"Mon, Jan 2, 2006 15:04 MST",
"Mon, Jan 2 2006 15:04 MST",
"Mon, Jan 2, 2006 15:04:05 MST",
"Mon Jan 2 15:04:05 2006 MST",
"Mon, Jan 02,2006 15:04:05 MST",
"Monday, January 2, 2006 15:04:05 MST",
"Monday, 2 January 2006 15:04:05 MST",
"Monday, 2 Jan 2006 15:04:05 MST",
"Monday, 02 January 2006 15:04:05 MST",
"Mon, 2 January 2006 15:04 MST",
"Mon, 2 January 2006, 15:04:05 MST",
"Mon, 2 January 2006 15:04:05 MST",
"Mon, 2 Jan 2006 15:4:5 MST",
"Mon, 2 Jan 2006 15:04 MST",
"Mon, 2 Jan 2006 15:04:05MST",
"Mon, 2 Jan 2006 15:04:05 MST",
"Mon 2 Jan 2006 15:04:05 MST",
"mon,2 Jan 2006 15:04:05 MST",
"Mon, 2 Jan 15:04:05 MST",
"Mon, 2 Jan 06 15:04:05 MST",
"Mon,02 January 2006 14:04:05 MST",
"Mon, 02 Jan 2006 3:04:05 PM MST",
"Mon,02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006, 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05MST",
"Mon, 02 Jan 2006 15:04:05 MST",
"Mon , 02 Jan 2006 15:04:05 MST",
"Mon, 02 Jan 06 15:04:05 MST",
"January 2, 2006 15:04:05 MST",
"January 02, 2006 15:04:05 MST",
"Jan 2, 2006 3:04:05 PM MST",
"Jan 2, 2006 15:04:05 MST",
"2 January 2006 15:04:05 MST",
"2 Jan 2006 15:04:05 MST",
"2006-01-02 15:04:05 MST",
"1/2/2006 3:04:05 PM MST",
"1/2/2006 15:04:05 MST",
"02 Jan 2006 15:04 MST",
"02 Jan 2006 15:04:05 MST",
"02/01/2006 15:04 MST",
"02-01-2006 15:04:05 MST",
"01/02/2006 15:04:05 MST",
}
// ParseDate parses a given date string using a large // ParseDate parses a given date string using a large
// list of commonly found feed date formats. // list of commonly found feed date formats.
func ParseDate(ds string) (t time.Time, err error) { func ParseDate(ds string) (t time.Time, err error) {
@ -191,6 +195,25 @@ func ParseDate(ds string) (t time.Time, err error) {
return return
} }
} }
for _, f := range dateFormatsWithNamedZone {
t, err = time.Parse(f, d)
if err != nil {
continue
}
// This is a format match! Now try to load the timezone name
loc, err := time.LoadLocation(t.Location().String())
if err != nil {
// We couldn't load the TZ name. Just use UTC instead...
return t, nil
}
if t, err = time.ParseInLocation(f, ds, loc); err == nil {
return t, nil
}
// This should not be reachable
}
err = fmt.Errorf("Failed to parse date: %s", ds) err = fmt.Errorf("Failed to parse date: %s", ds)
return return
} }

View file

@ -8,7 +8,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"github.com/mmcdole/goxpp" xpp "github.com/mmcdole/goxpp"
) )
var ( var (
@ -21,48 +21,8 @@ var (
InvalidNumericReference = errors.New("invalid numeric reference") InvalidNumericReference = errors.New("invalid numeric reference")
) )
// FindRoot iterates through the tokens of an xml document until const CDATA_START = "<![CDATA["
// it encounters its first StartTag event. It returns an error const CDATA_END = "]]>"
// if it reaches EndDocument before finding a tag.
func FindRoot(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
event, err = p.Next()
if err != nil {
return event, err
}
if event == xpp.StartTag {
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find root node before document end.")
}
}
return
}
// NextTag iterates through the tokens until it reaches a StartTag or EndTag
// It is similar to goxpp's NextTag method except it wont throw an error if
// the next immediate token isnt a Start/EndTag. Instead, it will continue to
// consume tokens until it hits a Start/EndTag or EndDocument.
func NextTag(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
event, err = p.Next()
if err != nil {
return event, err
}
if event == xpp.StartTag || event == xpp.EndTag {
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find NextTag before reaching the end of the document.")
}
}
return
}
// ParseText is a helper function for parsing the text // ParseText is a helper function for parsing the text
// from the current element of the XMLPullParser. // from the current element of the XMLPullParser.
@ -82,16 +42,46 @@ func ParseText(p *xpp.XMLPullParser) (string, error) {
result := text.InnerXML result := text.InnerXML
result = strings.TrimSpace(result) result = strings.TrimSpace(result)
if strings.HasPrefix(result, "<![CDATA[") && if strings.Contains(result, CDATA_START) {
strings.HasSuffix(result, "]]>") { return StripCDATA(result), nil
result = strings.TrimPrefix(result, "<![CDATA[")
result = strings.TrimSuffix(result, "]]>")
return result, nil
} }
return DecodeEntities(result) return DecodeEntities(result)
} }
// StripCDATA removes CDATA tags from the string
// content outside of CDATA tags is passed via DecodeEntities
func StripCDATA(str string) string {
buf := bytes.NewBuffer([]byte{})
curr := 0
for curr < len(str) {
start := indexAt(str, CDATA_START, curr)
if start == -1 {
dec, _ := DecodeEntities(str[curr:])
buf.Write([]byte(dec))
return buf.String()
}
end := indexAt(str, CDATA_END, start)
if end == -1 {
dec, _ := DecodeEntities(str[curr:])
buf.Write([]byte(dec))
return buf.String()
}
buf.Write([]byte(str[start+len(CDATA_START) : end]))
curr = curr + end + len(CDATA_END)
}
return buf.String()
}
// DecodeEntities decodes escaped XML entities // DecodeEntities decodes escaped XML entities
// in a string and returns the unescaped string // in a string and returns the unescaped string
func DecodeEntities(str string) (string, error) { func DecodeEntities(str string) (string, error) {
@ -106,30 +96,39 @@ func DecodeEntities(str string) (string, error) {
break break
} }
// Write and skip everything before it
buf.Write(data[:idx]) buf.Write(data[:idx])
data = data[idx+1:] data = data[idx:]
if len(data) == 0 { // If there is only the '&' left here
return "", TruncatedEntity if len(data) == 1 {
buf.Write(data)
return buf.String(), nil
} }
// Find the end of the entity // Find the end of the entity
end := bytes.IndexByte(data, ';') end := bytes.IndexByte(data, ';')
if end == -1 { if end == -1 {
return "", TruncatedEntity // it's not an entitiy. just a plain old '&' possibly with extra bytes
buf.Write(data)
return buf.String(), nil
} }
if data[0] == '#' { // Check if there is a space somewhere within the 'entitiy'.
// If there is then skip the whole thing since it's not a real entity.
if strings.Contains(string(data[1:end]), " ") {
buf.Write(data)
return buf.String(), nil
} else {
if data[1] == '#' {
// Numerical character reference // Numerical character reference
var str string var str string
base := 10 base := 10
if len(data) > 1 && data[1] == 'x' { if len(data) > 2 && data[2] == 'x' {
str = string(data[2:end]) str = string(data[3:end])
base = 16 base = 16
} else { } else {
str = string(data[1:end]) str = string(data[2:end])
} }
i, err := strconv.ParseUint(str, base, 32) i, err := strconv.ParseUint(str, base, 32)
@ -140,7 +139,7 @@ func DecodeEntities(str string) (string, error) {
buf.WriteRune(rune(i)) buf.WriteRune(rune(i))
} else { } else {
// Predefined entity // Predefined entity
name := string(data[:end]) name := string(data[1:end])
var c byte var c byte
switch name { switch name {
@ -161,6 +160,7 @@ func DecodeEntities(str string) (string, error) {
buf.WriteByte(c) buf.WriteByte(c)
} }
}
// Skip the entity // Skip the entity
data = data[end+1:] data = data[end+1:]
@ -194,3 +194,11 @@ func ParseNameAddress(nameAddressText string) (name string, address string) {
} }
return return
} }
func indexAt(str, substr string, start int) int {
idx := strings.Index(str[start:], substr)
if idx > -1 {
idx += start
}
return idx
}

View file

@ -0,0 +1,258 @@
package shared
import (
"bytes"
"fmt"
"golang.org/x/net/html"
"net/url"
"strings"
"github.com/mmcdole/goxpp"
)
var (
// HTML attributes which contain URIs
// https://pythonhosted.org/feedparser/resolving-relative-links.html
// To catch every possible URI attribute is non-trivial:
// https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
htmlURIAttrs = map[string]bool{
"action": true,
"background": true,
"cite": true,
"codebase": true,
"data": true,
"href": true,
"poster": true,
"profile": true,
"scheme": true,
"src": true,
"uri": true,
"usemap": true,
}
)
type urlStack []*url.URL
func (s *urlStack) push(u *url.URL) {
*s = append([]*url.URL{u}, *s...)
}
func (s *urlStack) pop() *url.URL {
if s == nil || len(*s) == 0 {
return nil
}
var top *url.URL
top, *s = (*s)[0], (*s)[1:]
return top
}
func (s *urlStack) top() *url.URL {
if s == nil || len(*s) == 0 {
return nil
}
return (*s)[0]
}
type XMLBase struct {
stack urlStack
URIAttrs map[string]bool
}
// FindRoot iterates through the tokens of an xml document until
// it encounters its first StartTag event. It returns an error
// if it reaches EndDocument before finding a tag.
func (b *XMLBase) FindRoot(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
event, err = b.NextTag(p)
if err != nil {
return event, err
}
if event == xpp.StartTag {
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find root node before document end.")
}
}
return
}
// XMLBase.NextTag iterates through the tokens until it reaches a StartTag or
// EndTag It maintains the urlStack upon encountering StartTag and EndTags, so
// that the top of the stack (accessible through the CurrentBase() and
// CurrentBaseURL() methods) is the absolute base URI by which relative URIs
// should be resolved.
//
// NextTag is similar to goxpp's NextTag method except it wont throw an error
// if the next immediate token isnt a Start/EndTag. Instead, it will continue
// to consume tokens until it hits a Start/EndTag or EndDocument.
func (b *XMLBase) NextTag(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
if p.Event == xpp.EndTag {
// Pop xml:base after each end tag
b.pop()
}
event, err = p.Next()
if err != nil {
return event, err
}
if event == xpp.EndTag {
break
}
if event == xpp.StartTag {
base := parseBase(p)
err = b.push(base)
if err != nil {
return
}
err = b.resolveAttrs(p)
if err != nil {
return
}
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find NextTag before reaching the end of the document.")
}
}
return
}
func parseBase(p *xpp.XMLPullParser) string {
xmlURI := "http://www.w3.org/XML/1998/namespace"
for _, attr := range p.Attrs {
if attr.Name.Local == "base" && attr.Name.Space == xmlURI {
return attr.Value
}
}
return ""
}
func (b *XMLBase) push(base string) error {
newURL, err := url.Parse(base)
if err != nil {
return err
}
topURL := b.CurrentBaseURL()
if topURL != nil {
newURL = topURL.ResolveReference(newURL)
}
b.stack.push(newURL)
return nil
}
// returns the popped base URL
func (b *XMLBase) pop() string {
url := b.stack.pop()
if url != nil {
return url.String()
}
return ""
}
func (b *XMLBase) CurrentBaseURL() *url.URL {
return b.stack.top()
}
func (b *XMLBase) CurrentBase() string {
if url := b.CurrentBaseURL(); url != nil {
return url.String()
}
return ""
}
// resolve the given string as a URL relative to current base
func (b *XMLBase) ResolveURL(u string) (string, error) {
if b.CurrentBase() == "" {
return u, nil
}
relURL, err := url.Parse(u)
if err != nil {
return u, err
}
curr := b.CurrentBaseURL()
if curr.Path != "" && u != "" && curr.Path[len(curr.Path)-1] != '/' {
// There's no reason someone would use a path in xml:base if they
// didn't mean for it to be a directory
curr.Path = curr.Path + "/"
}
absURL := b.CurrentBaseURL().ResolveReference(relURL)
return absURL.String(), nil
}
// resolve relative URI attributes according to xml:base
func (b *XMLBase) resolveAttrs(p *xpp.XMLPullParser) error {
for i, attr := range p.Attrs {
lowerName := strings.ToLower(attr.Name.Local)
if b.URIAttrs[lowerName] {
absURL, err := b.ResolveURL(attr.Value)
if err != nil {
return err
}
p.Attrs[i].Value = absURL
}
}
return nil
}
// Transforms html by resolving any relative URIs in attributes
// if an error occurs during parsing or serialization, then the original string
// is returned along with the error.
func (b *XMLBase) ResolveHTML(relHTML string) (string, error) {
if b.CurrentBase() == "" {
return relHTML, nil
}
htmlReader := strings.NewReader(relHTML)
doc, err := html.Parse(htmlReader)
if err != nil {
return relHTML, err
}
var visit func(*html.Node)
// recursively traverse HTML resolving any relative URIs in attributes
visit = func(n *html.Node) {
if n.Type == html.ElementNode {
for i, a := range n.Attr {
if htmlURIAttrs[a.Key] {
absVal, err := b.ResolveURL(a.Val)
if err == nil {
n.Attr[i].Val = absVal
}
break
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
visit(c)
}
}
visit(doc)
var w bytes.Buffer
err = html.Render(&w, doc)
if err != nil {
return relHTML, err
}
// html.Render() always writes a complete html5 document, so strip the html
// and body tags
absHTML := w.String()
absHTML = strings.TrimPrefix(absHTML, "<html><head></head><body>")
absHTML = strings.TrimSuffix(absHTML, "</body></html>")
return absHTML, err
}

View file

@ -2,6 +2,7 @@ package gofeed
import ( import (
"bytes" "bytes"
"context"
"errors" "errors"
"fmt" "fmt"
"io" "io"
@ -12,6 +13,10 @@ import (
"github.com/mmcdole/gofeed/rss" "github.com/mmcdole/gofeed/rss"
) )
// ErrFeedTypeNotDetected is returned when the detection system can not figure
// out the Feed format
var ErrFeedTypeNotDetected = errors.New("Failed to detect feed type")
// HTTPError represents an HTTP error returned by a server. // HTTPError represents an HTTP error returned by a server.
type HTTPError struct { type HTTPError struct {
StatusCode int StatusCode int
@ -65,14 +70,29 @@ func (f *Parser) Parse(feed io.Reader) (*Feed, error) {
case FeedTypeRSS: case FeedTypeRSS:
return f.parseRSSFeed(r) return f.parseRSSFeed(r)
} }
return nil, errors.New("Failed to detect feed type")
return nil, ErrFeedTypeNotDetected
} }
// ParseURL fetches the contents of a given url and // ParseURL fetches the contents of a given url and
// attempts to parse the response into the universal feed type. // attempts to parse the response into the universal feed type.
func (f *Parser) ParseURL(feedURL string) (feed *Feed, err error) { func (f *Parser) ParseURL(feedURL string) (feed *Feed, err error) {
return f.ParseURLWithContext(feedURL, context.Background())
}
// ParseURLWithContext fetches contents of a given url and
// attempts to parse the response into the universal feed type.
// Request could be canceled or timeout via given context
func (f *Parser) ParseURLWithContext(feedURL string, ctx context.Context) (feed *Feed, err error) {
client := f.httpClient() client := f.httpClient()
resp, err := client.Get(feedURL)
req, err := http.NewRequest("GET", feedURL, nil)
if err != nil {
return nil, err
}
req = req.WithContext(ctx)
req.Header.Set("User-Agent", "Gofeed/1.0")
resp, err := client.Do(req)
if err != nil { if err != nil {
return nil, err return nil, err

View file

@ -5,19 +5,22 @@ import (
"io" "io"
"strings" "strings"
"github.com/mmcdole/gofeed/extensions" ext "github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/gofeed/internal/shared" "github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/goxpp" xpp "github.com/mmcdole/goxpp"
) )
// Parser is a RSS Parser // Parser is a RSS Parser
type Parser struct{} type Parser struct {
base *shared.XMLBase
}
// Parse parses an xml feed into an rss.Feed // Parse parses an xml feed into an rss.Feed
func (rp *Parser) Parse(feed io.Reader) (*Feed, error) { func (rp *Parser) Parse(feed io.Reader) (*Feed, error) {
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel) p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
rp.base = &shared.XMLBase{}
_, err := shared.FindRoot(p) _, err := rp.base.FindRoot(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -41,7 +44,7 @@ func (rp *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
ver := rp.parseVersion(p) ver := rp.parseVersion(p)
for { for {
tok, err := shared.NextTag(p) tok, err := rp.base.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -127,7 +130,7 @@ func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) {
categories := []*Category{} categories := []*Category{}
for { for {
tok, err := shared.NextTag(p) tok, err := rp.base.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -318,7 +321,7 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) {
categories := []*Category{} categories := []*Category{}
for { for {
tok, err := shared.NextTag(p) tok, err := rp.base.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -492,7 +495,7 @@ func (rp *Parser) parseImage(p *xpp.XMLPullParser) (image *Image, err error) {
image = &Image{} image = &Image{}
for { for {
tok, err := shared.NextTag(p) tok, err := rp.base.NextTag(p)
if err != nil { if err != nil {
return image, err return image, err
} }
@ -604,7 +607,7 @@ func (rp *Parser) parseTextInput(p *xpp.XMLPullParser) (*TextInput, error) {
ti := &TextInput{} ti := &TextInput{}
for { for {
tok, err := shared.NextTag(p) tok, err := rp.base.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -661,7 +664,7 @@ func (rp *Parser) parseSkipHours(p *xpp.XMLPullParser) ([]string, error) {
hours := []string{} hours := []string{}
for { for {
tok, err := shared.NextTag(p) tok, err := rp.base.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -699,7 +702,7 @@ func (rp *Parser) parseSkipDays(p *xpp.XMLPullParser) ([]string, error) {
days := []string{} days := []string{}
for { for {
tok, err := shared.NextTag(p) tok, err := rp.base.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -741,7 +744,7 @@ func (rp *Parser) parseCloud(p *xpp.XMLPullParser) (*Cloud, error) {
cloud.RegisterProcedure = p.Attribute("registerProcedure") cloud.RegisterProcedure = p.Attribute("registerProcedure")
cloud.Protocol = p.Attribute("protocol") cloud.Protocol = p.Attribute("protocol")
shared.NextTag(p) rp.base.NextTag(p)
if err := p.Expect(xpp.EndTag, "cloud"); err != nil { if err := p.Expect(xpp.EndTag, "cloud"); err != nil {
return nil, err return nil, err

View file

@ -6,7 +6,7 @@ import (
"time" "time"
"github.com/mmcdole/gofeed/atom" "github.com/mmcdole/gofeed/atom"
"github.com/mmcdole/gofeed/extensions" ext "github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/gofeed/internal/shared" "github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/gofeed/rss" "github.com/mmcdole/gofeed/rss"
) )
@ -103,8 +103,8 @@ func (t *DefaultRSSTranslator) translateFeedFeedLink(rss *rss.Feed) (link string
for _, ex := range atomExtensions { for _, ex := range atomExtensions {
if links, ok := ex["link"]; ok { if links, ok := ex["link"]; ok {
for _, l := range links { for _, l := range links {
if l.Attrs["Rel"] == "self" { if l.Attrs["rel"] == "self" {
link = l.Value link = l.Attrs["href"]
} }
} }
} }

15
vendor/modules.txt vendored
View file

@ -1,20 +1,22 @@
# github.com/PuerkitoBio/goquery v1.5.1 # github.com/PuerkitoBio/goquery v1.5.1
## explicit ## explicit
github.com/PuerkitoBio/goquery github.com/PuerkitoBio/goquery
# github.com/andybalholm/cascadia v1.1.0 # github.com/andybalholm/cascadia v1.2.0
## explicit
github.com/andybalholm/cascadia github.com/andybalholm/cascadia
# github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9 # github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9
## explicit ## explicit
github.com/chilts/sid github.com/chilts/sid
# github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195 # github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7
## explicit ## explicit
github.com/jaytaylor/html2text github.com/jaytaylor/html2text
# github.com/mattn/go-runewidth v0.0.7 # github.com/mattn/go-runewidth v0.0.9
## explicit
github.com/mattn/go-runewidth github.com/mattn/go-runewidth
# github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2 # github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2
## explicit ## explicit
github.com/mattn/go-xmpp github.com/mattn/go-xmpp
# github.com/mmcdole/gofeed v1.0.0-beta2 # github.com/mmcdole/gofeed v1.0.0
## explicit ## explicit
github.com/mmcdole/gofeed github.com/mmcdole/gofeed
github.com/mmcdole/gofeed/atom github.com/mmcdole/gofeed/atom
@ -22,7 +24,6 @@ github.com/mmcdole/gofeed/extensions
github.com/mmcdole/gofeed/internal/shared github.com/mmcdole/gofeed/internal/shared
github.com/mmcdole/gofeed/rss github.com/mmcdole/gofeed/rss
# github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf # github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf
## explicit
github.com/mmcdole/goxpp github.com/mmcdole/goxpp
# github.com/olekukonko/tablewriter v0.0.4 # github.com/olekukonko/tablewriter v0.0.4
## explicit ## explicit
@ -30,7 +31,9 @@ github.com/olekukonko/tablewriter
# github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf # github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf
## explicit ## explicit
github.com/ssor/bom github.com/ssor/bom
# golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e # github.com/stretchr/testify v1.5.1
## explicit
# golang.org/x/net v0.0.0-20200513185701-a91f0712d120
## explicit ## explicit
golang.org/x/net/html golang.org/x/net/html
golang.org/x/net/html/atom golang.org/x/net/html/atom