Update vendored dependencies.

This commit is contained in:
Martin Dosch 2020-05-14 16:07:09 +02:00
parent 48690e232f
commit d6f44d987e
29 changed files with 1240 additions and 488 deletions

9
go.mod
View file

@ -4,14 +4,15 @@ go 1.14
require (
github.com/PuerkitoBio/goquery v1.5.1 // indirect
github.com/andybalholm/cascadia v1.2.0 // indirect
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9
github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7
github.com/mattn/go-runewidth v0.0.9 // indirect
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2
github.com/mmcdole/gofeed v1.0.0-beta2
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf // indirect
github.com/mmcdole/gofeed v1.0.0
github.com/olekukonko/tablewriter v0.0.4 // indirect
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
github.com/stretchr/testify v1.5.1 // indirect
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e // indirect
golang.org/x/net v0.0.0-20200513185701-a91f0712d120 // indirect
golang.org/x/text v0.3.2 // indirect
)

24
go.sum
View file

@ -1,19 +1,28 @@
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9 h1:z0uK8UQqjMVYzvk4tiiu3obv2B44+XBsvgEJREQfnO8=
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9/go.mod h1:Jl2neWsQaDanWORdqZ4emBl50J4/aRBBS4FyyG9/PFo=
github.com/codegangsta/cli v1.20.0/go.mod h1:/qJNoX69yVSKu5o4jLyXAENLRyk1uhi7zkbQ3slBdOA=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195 h1:j0UEFmS7wSjAwKEIkgKBn8PRDfjcuggzr93R9wk53nQ=
github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 h1:g0fAGBisHaEQ0TRq1iBvemFRf+8AEWEmBESSiWB3Vsc=
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54=
github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2 h1:F544zRtDc/pMpFNHN46oeXV2jIAG4DoMH+6zlVSn0Q8=
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2/go.mod h1:Cs5mF0OsrRRmhkyOod//ldNPOwJsrBvJ+1WRspv0xoc=
github.com/mmcdole/gofeed v1.0.0-beta2 h1:CjQ0ADhAwNSb08zknAkGOEYqr8zfZKfrzgk9BxpWP2E=
github.com/mmcdole/gofeed v1.0.0-beta2/go.mod h1:/BF9JneEL2/flujm8XHoxUcghdTV6vvb3xx/vKyChFU=
github.com/mmcdole/gofeed v1.0.0 h1:PHqwr8fsEm8xarj9s53XeEAFYhRM3E9Ib7Ie766/LTE=
github.com/mmcdole/gofeed v1.0.0/go.mod h1:tkVcyzS3qVMlQrQxJoEH1hkTiuo9a8emDzkMi7TZBu0=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8=
@ -23,13 +32,16 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120 h1:EZ3cVSzKOlJxAd8e8YAJ7no8nNypTxexh/YE/xW3ZEY=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

View file

@ -1,3 +1,5 @@
module "github.com/andybalholm/cascadia"
module github.com/andybalholm/cascadia
require "golang.org/x/net" v0.0.0-20180218175443-cbe0f9307d01
require golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01
go 1.13

View file

@ -13,6 +13,10 @@ import (
type parser struct {
s string // the source text
i int // the current position
// if `false`, parsing a pseudo-element
// returns an error.
acceptPseudoElements bool
}
// parseEscape parses a backslash escape.
@ -29,7 +33,7 @@ func (p *parser) parseEscape() (result string, err error) {
case hexDigit(c):
// unicode escape (hex)
var i int
for i = start; i < p.i+6 && i < len(p.s) && hexDigit(p.s[i]); i++ {
for i = start; i < start+6 && i < len(p.s) && hexDigit(p.s[i]); i++ {
// empty
}
v, _ := strconv.ParseUint(p.s[start:i], 16, 21)
@ -422,17 +426,25 @@ var errExpectedParenthesis = errors.New("expected '(' but didn't find it")
var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
var errUnmatchedParenthesis = errors.New("unmatched '('")
// parsePseudoclassSelector parses a pseudoclass selector like :not(p)
func (p *parser) parsePseudoclassSelector() (out Sel, err error) {
// parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element
// For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements.
// https://drafts.csswg.org/selectors-3/#pseudo-elements
// Returning a nil `Sel` (and a nil `error`) means we found a pseudo-element.
func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err error) {
if p.i >= len(p.s) {
return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
return nil, "", fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
}
if p.s[p.i] != ':' {
return nil, fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i])
return nil, "", fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i])
}
p.i++
var mustBePseudoElement bool
if p.i >= len(p.s) {
return nil, "", fmt.Errorf("got empty pseudoclass (or pseudoelement)")
}
if p.s[p.i] == ':' { // we found a pseudo-element
mustBePseudoElement = true
p.i++
}
@ -441,27 +453,33 @@ func (p *parser) parsePseudoclassSelector() (out Sel, err error) {
return
}
name = toLowerASCII(name)
if mustBePseudoElement && (name != "after" && name != "backdrop" && name != "before" &&
name != "cue" && name != "first-letter" && name != "first-line" && name != "grammar-error" &&
name != "marker" && name != "placeholder" && name != "selection" && name != "spelling-error") {
return out, "", fmt.Errorf("unknown pseudoelement :%s", name)
}
switch name {
case "not", "has", "haschild":
if !p.consumeParenthesis() {
return out, errExpectedParenthesis
return out, "", errExpectedParenthesis
}
sel, parseErr := p.parseSelectorGroup()
if parseErr != nil {
return out, parseErr
return out, "", parseErr
}
if !p.consumeClosingParenthesis() {
return out, errExpectedClosingParenthesis
return out, "", errExpectedClosingParenthesis
}
out = relativePseudoClassSelector{name: name, match: sel}
case "contains", "containsown":
if !p.consumeParenthesis() {
return out, errExpectedParenthesis
return out, "", errExpectedParenthesis
}
if p.i == len(p.s) {
return out, errUnmatchedParenthesis
return out, "", errUnmatchedParenthesis
}
var val string
switch p.s[p.i] {
@ -471,46 +489,46 @@ func (p *parser) parsePseudoclassSelector() (out Sel, err error) {
val, err = p.parseIdentifier()
}
if err != nil {
return out, err
return out, "", err
}
val = strings.ToLower(val)
p.skipWhitespace()
if p.i >= len(p.s) {
return out, errors.New("unexpected EOF in pseudo selector")
return out, "", errors.New("unexpected EOF in pseudo selector")
}
if !p.consumeClosingParenthesis() {
return out, errExpectedClosingParenthesis
return out, "", errExpectedClosingParenthesis
}
out = containsPseudoClassSelector{own: name == "containsown", value: val}
case "matches", "matchesown":
if !p.consumeParenthesis() {
return out, errExpectedParenthesis
return out, "", errExpectedParenthesis
}
rx, err := p.parseRegex()
if err != nil {
return out, err
return out, "", err
}
if p.i >= len(p.s) {
return out, errors.New("unexpected EOF in pseudo selector")
return out, "", errors.New("unexpected EOF in pseudo selector")
}
if !p.consumeClosingParenthesis() {
return out, errExpectedClosingParenthesis
return out, "", errExpectedClosingParenthesis
}
out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx}
case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type":
if !p.consumeParenthesis() {
return out, errExpectedParenthesis
return out, "", errExpectedParenthesis
}
a, b, err := p.parseNth()
if err != nil {
return out, err
return out, "", err
}
if !p.consumeClosingParenthesis() {
return out, errExpectedClosingParenthesis
return out, "", errExpectedClosingParenthesis
}
last := name == "nth-last-child" || name == "nth-last-of-type"
ofType := name == "nth-of-type" || name == "nth-last-of-type"
@ -535,9 +553,9 @@ func (p *parser) parsePseudoclassSelector() (out Sel, err error) {
case "root":
out = rootPseudoClassSelector{}
case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error":
return out, errors.New("pseudo-elements are not yet supported")
return nil, name, nil
default:
return out, fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name)
return out, "", fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name)
}
return
}
@ -706,11 +724,13 @@ func (p *parser) parseSimpleSelectorSequence() (Sel, error) {
selectors = append(selectors, r)
}
var pseudoElement string
loop:
for p.i < len(p.s) {
var (
ns Sel
err error
ns Sel
newPseudoElement string
err error
)
switch p.s[p.i] {
case '#':
@ -720,20 +740,37 @@ loop:
case '[':
ns, err = p.parseAttributeSelector()
case ':':
ns, err = p.parsePseudoclassSelector()
ns, newPseudoElement, err = p.parsePseudoclassSelector()
default:
break loop
}
if err != nil {
return nil, err
}
// From https://drafts.csswg.org/selectors-3/#pseudo-elements :
// "Only one pseudo-element may appear per selector, and if present
// it must appear after the sequence of simple selectors that
// represents the subjects of the selector.""
if ns == nil { // we found a pseudo-element
if pseudoElement != "" {
return nil, fmt.Errorf("only one pseudo-element is accepted per selector, got %s and %s", pseudoElement, newPseudoElement)
}
if !p.acceptPseudoElements {
return nil, fmt.Errorf("pseudo-element %s found, but pseudo-elements support is disabled", newPseudoElement)
}
pseudoElement = newPseudoElement
} else {
if pseudoElement != "" {
return nil, fmt.Errorf("pseudo-element %s must be at the end of selector", pseudoElement)
}
selectors = append(selectors, ns)
}
selectors = append(selectors, ns)
}
if len(selectors) == 1 { // no need wrap the selectors in compoundSelector
if len(selectors) == 1 && pseudoElement == "" { // no need wrap the selectors in compoundSelector
return selectors[0], nil
}
return compoundSelector{selectors: selectors}, nil
return compoundSelector{selectors: selectors, pseudoElement: pseudoElement}, nil
}
// parseSelector parses a selector that may include combinators.

View file

@ -16,14 +16,19 @@ type Matcher interface {
}
// Sel is the interface for all the functionality provided by selectors.
// It is currently the same as Matcher, but other methods may be added in the
// future.
type Sel interface {
Matcher
Specificity() Specificity
// Returns a CSS input compiling to this selector.
String() string
// Returns a pseudo-element, or an empty string.
PseudoElement() string
}
// Parse parses a selector.
// Parse parses a selector. Use `ParseWithPseudoElement`
// if you need support for pseudo-elements.
func Parse(sel string) (Sel, error) {
p := &parser{s: sel}
compiled, err := p.parseSelector()
@ -38,7 +43,25 @@ func Parse(sel string) (Sel, error) {
return compiled, nil
}
// ParseWithPseudoElement parses a single selector,
// with support for pseudo-element.
func ParseWithPseudoElement(sel string) (Sel, error) {
p := &parser{s: sel, acceptPseudoElements: true}
compiled, err := p.parseSelector()
if err != nil {
return nil, err
}
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
}
// ParseGroup parses a selector, or a group of selectors separated by commas.
// Use `ParseGroupWithPseudoElements`
// if you need support for pseudo-elements.
func ParseGroup(sel string) (SelectorGroup, error) {
p := &parser{s: sel}
compiled, err := p.parseSelectorGroup()
@ -53,6 +76,22 @@ func ParseGroup(sel string) (SelectorGroup, error) {
return compiled, nil
}
// ParseGroupWithPseudoElements parses a selector, or a group of selectors separated by commas.
// It supports pseudo-elements.
func ParseGroupWithPseudoElements(sel string) (SelectorGroup, error) {
p := &parser{s: sel, acceptPseudoElements: true}
compiled, err := p.parseSelectorGroup()
if err != nil {
return nil, err
}
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
}
// A Selector is a function which tells whether a node matches or not.
//
// This type is maintained for compatibility; I recommend using the newer and
@ -182,6 +221,10 @@ func (c tagSelector) Specificity() Specificity {
return Specificity{0, 0, 1}
}
func (c tagSelector) PseudoElement() string {
return ""
}
type classSelector struct {
class string
}
@ -197,6 +240,10 @@ func (c classSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c classSelector) PseudoElement() string {
return ""
}
type idSelector struct {
id string
}
@ -212,6 +259,10 @@ func (c idSelector) Specificity() Specificity {
return Specificity{1, 0, 0}
}
func (c idSelector) PseudoElement() string {
return ""
}
type attrSelector struct {
key, val, operation string
regexp *regexp.Regexp
@ -352,6 +403,10 @@ func (c attrSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c attrSelector) PseudoElement() string {
return ""
}
// ---------------- Pseudo class selectors ----------------
// we use severals concrete types of pseudo-class selectors
@ -415,6 +470,10 @@ func (s relativePseudoClassSelector) Specificity() Specificity {
return max
}
func (c relativePseudoClassSelector) PseudoElement() string {
return ""
}
type containsPseudoClassSelector struct {
own bool
value string
@ -436,6 +495,10 @@ func (s containsPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c containsPseudoClassSelector) PseudoElement() string {
return ""
}
type regexpPseudoClassSelector struct {
own bool
regexp *regexp.Regexp
@ -488,6 +551,10 @@ func (s regexpPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c regexpPseudoClassSelector) PseudoElement() string {
return ""
}
type nthPseudoClassSelector struct {
a, b int
last, ofType bool
@ -623,6 +690,10 @@ func (s nthPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c nthPseudoClassSelector) PseudoElement() string {
return ""
}
type onlyChildPseudoClassSelector struct {
ofType bool
}
@ -661,6 +732,10 @@ func (s onlyChildPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c onlyChildPseudoClassSelector) PseudoElement() string {
return ""
}
type inputPseudoClassSelector struct{}
// Matches input, select, textarea and button elements.
@ -672,6 +747,10 @@ func (s inputPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c inputPseudoClassSelector) PseudoElement() string {
return ""
}
type emptyElementPseudoClassSelector struct{}
// Matches empty elements.
@ -694,6 +773,10 @@ func (s emptyElementPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c emptyElementPseudoClassSelector) PseudoElement() string {
return ""
}
type rootPseudoClassSelector struct{}
// Match implements :root
@ -711,8 +794,13 @@ func (s rootPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c rootPseudoClassSelector) PseudoElement() string {
return ""
}
type compoundSelector struct {
selectors []Sel
selectors []Sel
pseudoElement string
}
// Matches elements if each sub-selectors matches.
@ -734,9 +822,17 @@ func (s compoundSelector) Specificity() Specificity {
for _, sel := range s.selectors {
out = out.Add(sel.Specificity())
}
if s.pseudoElement != "" {
// https://drafts.csswg.org/selectors-3/#specificity
out = out.Add(Specificity{0, 0, 1})
}
return out
}
func (c compoundSelector) PseudoElement() string {
return c.pseudoElement
}
type combinedSelector struct {
first Sel
combinator byte
@ -818,6 +914,15 @@ func (s combinedSelector) Specificity() Specificity {
return spec
}
// on combinedSelector, a pseudo-element only makes sens on the last
// selector, although others increase specificity.
func (c combinedSelector) PseudoElement() string {
if c.second == nil {
return ""
}
return c.second.PseudoElement()
}
// A SelectorGroup is a list of selectors, which matches if any of the
// individual selectors matches.
type SelectorGroup []Sel

120
vendor/github.com/andybalholm/cascadia/serialize.go generated vendored Normal file
View file

@ -0,0 +1,120 @@
package cascadia
import (
"fmt"
"strings"
)
// implements the reverse operation Sel -> string
func (c tagSelector) String() string {
return c.tag
}
func (c idSelector) String() string {
return "#" + c.id
}
func (c classSelector) String() string {
return "." + c.class
}
func (c attrSelector) String() string {
val := c.val
if c.operation == "#=" {
val = c.regexp.String()
} else if c.operation != "" {
val = fmt.Sprintf(`"%s"`, val)
}
return fmt.Sprintf(`[%s%s%s]`, c.key, c.operation, val)
}
func (c relativePseudoClassSelector) String() string {
return fmt.Sprintf(":%s(%s)", c.name, c.match.String())
}
func (c containsPseudoClassSelector) String() string {
s := "contains"
if c.own {
s += "Own"
}
return fmt.Sprintf(`:%s("%s")`, s, c.value)
}
func (c regexpPseudoClassSelector) String() string {
s := "matches"
if c.own {
s += "Own"
}
return fmt.Sprintf(":%s(%s)", s, c.regexp.String())
}
func (c nthPseudoClassSelector) String() string {
if c.a == 0 && c.b == 1 { // special cases
s := ":first-"
if c.last {
s = ":last-"
}
if c.ofType {
s += "of-type"
} else {
s += "child"
}
return s
}
var name string
switch [2]bool{c.last, c.ofType} {
case [2]bool{true, true}:
name = "nth-last-of-type"
case [2]bool{true, false}:
name = "nth-last-child"
case [2]bool{false, true}:
name = "nth-of-type"
case [2]bool{false, false}:
name = "nth-child"
}
return fmt.Sprintf(":%s(%dn+%d)", name, c.a, c.b)
}
func (c onlyChildPseudoClassSelector) String() string {
if c.ofType {
return ":only-of-type"
}
return ":only-child"
}
func (c inputPseudoClassSelector) String() string {
return ":input"
}
func (c emptyElementPseudoClassSelector) String() string {
return ":empty"
}
func (c rootPseudoClassSelector) String() string {
return ":root"
}
func (c compoundSelector) String() string {
if len(c.selectors) == 0 && c.pseudoElement == "" {
return "*"
}
chunks := make([]string, len(c.selectors))
for i, sel := range c.selectors {
chunks[i] = sel.String()
}
s := strings.Join(chunks, "")
if c.pseudoElement != "" {
s += "::" + c.pseudoElement
}
return s
}
func (c combinedSelector) String() string {
start := c.first.String()
if c.second != nil {
start += fmt.Sprintf(" %s %s", string(c.combinator), c.second.String())
}
return start
}
func (c SelectorGroup) String() string {
ck := make([]string, len(c))
for i, s := range c {
ck[i] = s.String()
}
return strings.Join(ck, ", ")
}

View file

@ -135,3 +135,6 @@ Email: jay at (my github username).com
Twitter: [@jtaylor](https://twitter.com/jtaylor)
# Alternatives
https://github.com/k3a/html2text - Lightweight

View file

@ -1,8 +1,16 @@
language: go
sudo: false
go:
- 1.13.x
- tip
before_install:
- go get github.com/mattn/goveralls
- go get golang.org/x/tools/cmd/cover
- go get -t -v ./...
script:
- $HOME/gopath/bin/goveralls -repotoken lAKAWPzcGsD3A8yBX3BGGtRUdJ6CaGERL
- go generate
- git diff --cached --exit-code
- ./go.test.sh
after_success:
- bash <(curl -s https://codecov.io/bash)

View file

@ -2,7 +2,7 @@ go-runewidth
============
[![Build Status](https://travis-ci.org/mattn/go-runewidth.png?branch=master)](https://travis-ci.org/mattn/go-runewidth)
[![Coverage Status](https://coveralls.io/repos/mattn/go-runewidth/badge.png?branch=HEAD)](https://coveralls.io/r/mattn/go-runewidth?branch=HEAD)
[![Codecov](https://codecov.io/gh/mattn/go-runewidth/branch/master/graph/badge.svg)](https://codecov.io/gh/mattn/go-runewidth)
[![GoDoc](https://godoc.org/github.com/mattn/go-runewidth?status.svg)](http://godoc.org/github.com/mattn/go-runewidth)
[![Go Report Card](https://goreportcard.com/badge/github.com/mattn/go-runewidth)](https://goreportcard.com/report/github.com/mattn/go-runewidth)

12
vendor/github.com/mattn/go-runewidth/go.test.sh generated vendored Normal file
View file

@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -e
echo "" > coverage.txt
for d in $(go list ./... | grep -v vendor); do
go test -race -coverprofile=profile.out -covermode=atomic "$d"
if [ -f profile.out ]; then
cat profile.out >> coverage.txt
rm profile.out
fi
done

View file

@ -50,7 +50,6 @@ func inTables(r rune, ts ...table) bool {
}
func inTable(r rune, t table) bool {
// func (t table) IncludesRune(r rune) bool {
if r < t[0].first {
return false
}

View file

@ -62,7 +62,10 @@ func isEastAsian(locale string) bool {
// IsEastAsian return true if the current locale is CJK
func IsEastAsian() bool {
locale := os.Getenv("LC_CTYPE")
locale := os.Getenv("LC_ALL")
if locale == "" {
locale = os.Getenv("LC_CTYPE")
}
if locale == "" {
locale = os.Getenv("LANG")
}

View file

@ -1,20 +1,23 @@
// Code generated by script/generate.go. DO NOT EDIT.
package runewidth
var combining = table{
{0x0300, 0x036F}, {0x0483, 0x0489}, {0x07EB, 0x07F3},
{0x0C00, 0x0C00}, {0x0C04, 0x0C04}, {0x0D00, 0x0D01},
{0x135D, 0x135F}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1ABE},
{0x135D, 0x135F}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1AC0},
{0x1B6B, 0x1B73}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF},
{0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2DE0, 0x2DFF},
{0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D},
{0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA8E0, 0xA8F1},
{0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x10376, 0x1037A},
{0x10F46, 0x10F50}, {0x11300, 0x11301}, {0x1133B, 0x1133C},
{0x11366, 0x1136C}, {0x11370, 0x11374}, {0x16AF0, 0x16AF4},
{0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182},
{0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244},
{0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021},
{0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E8D0, 0x1E8D6},
{0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x11300, 0x11301},
{0x1133B, 0x1133C}, {0x11366, 0x1136C}, {0x11370, 0x11374},
{0x16AF0, 0x16AF4}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172},
{0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD},
{0x1D242, 0x1D244}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018},
{0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A},
{0x1E8D0, 0x1E8D6},
}
var doublewidth = table{
@ -32,29 +35,30 @@ var doublewidth = table{
{0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99},
{0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB},
{0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF},
{0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31BA},
{0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247},
{0x3250, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6},
{0xA960, 0xA97C}, {0xAC00, 0xD7A3}, {0xF900, 0xFAFF},
{0xFE10, 0xFE19}, {0xFE30, 0xFE52}, {0xFE54, 0xFE66},
{0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6},
{0x16FE0, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18AF2},
{0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167},
{0x1B170, 0x1B2FB}, {0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF},
{0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A}, {0x1F200, 0x1F202},
{0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, {0x1F250, 0x1F251},
{0x1F260, 0x1F265}, {0x1F300, 0x1F320}, {0x1F32D, 0x1F335},
{0x1F337, 0x1F37C}, {0x1F37E, 0x1F393}, {0x1F3A0, 0x1F3CA},
{0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0}, {0x1F3F4, 0x1F3F4},
{0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440}, {0x1F442, 0x1F4FC},
{0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E}, {0x1F550, 0x1F567},
{0x1F57A, 0x1F57A}, {0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A4},
{0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC},
{0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D5}, {0x1F6EB, 0x1F6EC},
{0x1F6F4, 0x1F6FA}, {0x1F7E0, 0x1F7EB}, {0x1F90D, 0x1F971},
{0x1F973, 0x1F976}, {0x1F97A, 0x1F9A2}, {0x1F9A5, 0x1F9AA},
{0x1F9AE, 0x1F9CA}, {0x1F9CD, 0x1F9FF}, {0x1FA70, 0x1FA73},
{0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA82}, {0x1FA90, 0x1FA95},
{0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31E3},
{0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x4DBF},
{0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
{0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19},
{0xFE30, 0xFE52}, {0xFE54, 0xFE66}, {0xFE68, 0xFE6B},
{0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4},
{0x16FF0, 0x16FF1}, {0x17000, 0x187F7}, {0x18800, 0x18CD5},
{0x18D00, 0x18D08}, {0x1B000, 0x1B11E}, {0x1B150, 0x1B152},
{0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1F004, 0x1F004},
{0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A},
{0x1F200, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248},
{0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F320},
{0x1F32D, 0x1F335}, {0x1F337, 0x1F37C}, {0x1F37E, 0x1F393},
{0x1F3A0, 0x1F3CA}, {0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0},
{0x1F3F4, 0x1F3F4}, {0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440},
{0x1F442, 0x1F4FC}, {0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E},
{0x1F550, 0x1F567}, {0x1F57A, 0x1F57A}, {0x1F595, 0x1F596},
{0x1F5A4, 0x1F5A4}, {0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5},
{0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D7},
{0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB},
{0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1F978},
{0x1F97A, 0x1F9CB}, {0x1F9CD, 0x1F9FF}, {0x1FA70, 0x1FA74},
{0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAA8},
{0x1FAB0, 0x1FAB6}, {0x1FAC0, 0x1FAC2}, {0x1FAD0, 0x1FAD6},
{0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
}
@ -151,7 +155,7 @@ var neutral = table{
{0x0600, 0x061C}, {0x061E, 0x070D}, {0x070F, 0x074A},
{0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D},
{0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E},
{0x0860, 0x086A}, {0x08A0, 0x08B4}, {0x08B6, 0x08BD},
{0x0860, 0x086A}, {0x08A0, 0x08B4}, {0x08B6, 0x08C7},
{0x08D3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
{0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2},
{0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8},
@ -170,7 +174,7 @@ var neutral = table{
{0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28},
{0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39},
{0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
{0x0B56, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63},
{0x0B55, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63},
{0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
{0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A},
{0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4},
@ -184,166 +188,169 @@ var neutral = table{
{0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9},
{0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD},
{0x0CD5, 0x0CD6}, {0x0CDE, 0x0CDE}, {0x0CE0, 0x0CE3},
{0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2}, {0x0D00, 0x0D03},
{0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D44},
{0x0D46, 0x0D48}, {0x0D4A, 0x0D4F}, {0x0D54, 0x0D63},
{0x0D66, 0x0D7F}, {0x0D82, 0x0D83}, {0x0D85, 0x0D96},
{0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD},
{0x0DC0, 0x0DC6}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4},
{0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF},
{0x0DF2, 0x0DF4}, {0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B},
{0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A},
{0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD},
{0x0EC0, 0x0EC4}, {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD},
{0x0ED0, 0x0ED9}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F47},
{0x0F49, 0x0F6C}, {0x0F71, 0x0F97}, {0x0F99, 0x0FBC},
{0x0FBE, 0x0FCC}, {0x0FCE, 0x0FDA}, {0x1000, 0x10C5},
{0x10C7, 0x10C7}, {0x10CD, 0x10CD}, {0x10D0, 0x10FF},
{0x1160, 0x1248}, {0x124A, 0x124D}, {0x1250, 0x1256},
{0x1258, 0x1258}, {0x125A, 0x125D}, {0x1260, 0x1288},
{0x128A, 0x128D}, {0x1290, 0x12B0}, {0x12B2, 0x12B5},
{0x12B8, 0x12BE}, {0x12C0, 0x12C0}, {0x12C2, 0x12C5},
{0x12C8, 0x12D6}, {0x12D8, 0x1310}, {0x1312, 0x1315},
{0x1318, 0x135A}, {0x135D, 0x137C}, {0x1380, 0x1399},
{0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1400, 0x169C},
{0x16A0, 0x16F8}, {0x1700, 0x170C}, {0x170E, 0x1714},
{0x1720, 0x1736}, {0x1740, 0x1753}, {0x1760, 0x176C},
{0x176E, 0x1770}, {0x1772, 0x1773}, {0x1780, 0x17DD},
{0x17E0, 0x17E9}, {0x17F0, 0x17F9}, {0x1800, 0x180E},
{0x1810, 0x1819}, {0x1820, 0x1878}, {0x1880, 0x18AA},
{0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1920, 0x192B},
{0x1930, 0x193B}, {0x1940, 0x1940}, {0x1944, 0x196D},
{0x1970, 0x1974}, {0x1980, 0x19AB}, {0x19B0, 0x19C9},
{0x19D0, 0x19DA}, {0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E},
{0x1A60, 0x1A7C}, {0x1A7F, 0x1A89}, {0x1A90, 0x1A99},
{0x1AA0, 0x1AAD}, {0x1AB0, 0x1ABE}, {0x1B00, 0x1B4B},
{0x1B50, 0x1B7C}, {0x1B80, 0x1BF3}, {0x1BFC, 0x1C37},
{0x1C3B, 0x1C49}, {0x1C4D, 0x1C88}, {0x1C90, 0x1CBA},
{0x1CBD, 0x1CC7}, {0x1CD0, 0x1CFA}, {0x1D00, 0x1DF9},
{0x1DFB, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45},
{0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59},
{0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D},
{0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3},
{0x1FD6, 0x1FDB}, {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4},
{0x1FF6, 0x1FFE}, {0x2000, 0x200F}, {0x2011, 0x2012},
{0x2017, 0x2017}, {0x201A, 0x201B}, {0x201E, 0x201F},
{0x2023, 0x2023}, {0x2028, 0x202F}, {0x2031, 0x2031},
{0x2034, 0x2034}, {0x2036, 0x203A}, {0x203C, 0x203D},
{0x203F, 0x2064}, {0x2066, 0x2071}, {0x2075, 0x207E},
{0x2080, 0x2080}, {0x2085, 0x208E}, {0x2090, 0x209C},
{0x20A0, 0x20A8}, {0x20AA, 0x20AB}, {0x20AD, 0x20BF},
{0x20D0, 0x20F0}, {0x2100, 0x2102}, {0x2104, 0x2104},
{0x2106, 0x2108}, {0x210A, 0x2112}, {0x2114, 0x2115},
{0x2117, 0x2120}, {0x2123, 0x2125}, {0x2127, 0x212A},
{0x212C, 0x2152}, {0x2155, 0x215A}, {0x215F, 0x215F},
{0x216C, 0x216F}, {0x217A, 0x2188}, {0x218A, 0x218B},
{0x219A, 0x21B7}, {0x21BA, 0x21D1}, {0x21D3, 0x21D3},
{0x21D5, 0x21E6}, {0x21E8, 0x21FF}, {0x2201, 0x2201},
{0x2204, 0x2206}, {0x2209, 0x220A}, {0x220C, 0x220E},
{0x2210, 0x2210}, {0x2212, 0x2214}, {0x2216, 0x2219},
{0x221B, 0x221C}, {0x2221, 0x2222}, {0x2224, 0x2224},
{0x2226, 0x2226}, {0x222D, 0x222D}, {0x222F, 0x2233},
{0x2238, 0x223B}, {0x223E, 0x2247}, {0x2249, 0x224B},
{0x224D, 0x2251}, {0x2253, 0x225F}, {0x2262, 0x2263},
{0x2268, 0x2269}, {0x226C, 0x226D}, {0x2270, 0x2281},
{0x2284, 0x2285}, {0x2288, 0x2294}, {0x2296, 0x2298},
{0x229A, 0x22A4}, {0x22A6, 0x22BE}, {0x22C0, 0x2311},
{0x2313, 0x2319}, {0x231C, 0x2328}, {0x232B, 0x23E8},
{0x23ED, 0x23EF}, {0x23F1, 0x23F2}, {0x23F4, 0x2426},
{0x2440, 0x244A}, {0x24EA, 0x24EA}, {0x254C, 0x254F},
{0x2574, 0x257F}, {0x2590, 0x2591}, {0x2596, 0x259F},
{0x25A2, 0x25A2}, {0x25AA, 0x25B1}, {0x25B4, 0x25B5},
{0x25B8, 0x25BB}, {0x25BE, 0x25BF}, {0x25C2, 0x25C5},
{0x25C9, 0x25CA}, {0x25CC, 0x25CD}, {0x25D2, 0x25E1},
{0x25E6, 0x25EE}, {0x25F0, 0x25FC}, {0x25FF, 0x2604},
{0x2607, 0x2608}, {0x260A, 0x260D}, {0x2610, 0x2613},
{0x2616, 0x261B}, {0x261D, 0x261D}, {0x261F, 0x263F},
{0x2641, 0x2641}, {0x2643, 0x2647}, {0x2654, 0x265F},
{0x2662, 0x2662}, {0x2666, 0x2666}, {0x266B, 0x266B},
{0x266E, 0x266E}, {0x2670, 0x267E}, {0x2680, 0x2692},
{0x2694, 0x269D}, {0x26A0, 0x26A0}, {0x26A2, 0x26A9},
{0x26AC, 0x26BC}, {0x26C0, 0x26C3}, {0x26E2, 0x26E2},
{0x26E4, 0x26E7}, {0x2700, 0x2704}, {0x2706, 0x2709},
{0x270C, 0x2727}, {0x2729, 0x273C}, {0x273E, 0x274B},
{0x274D, 0x274D}, {0x274F, 0x2752}, {0x2756, 0x2756},
{0x2758, 0x2775}, {0x2780, 0x2794}, {0x2798, 0x27AF},
{0x27B1, 0x27BE}, {0x27C0, 0x27E5}, {0x27EE, 0x2984},
{0x2987, 0x2B1A}, {0x2B1D, 0x2B4F}, {0x2B51, 0x2B54},
{0x2B5A, 0x2B73}, {0x2B76, 0x2B95}, {0x2B98, 0x2C2E},
{0x2C30, 0x2C5E}, {0x2C60, 0x2CF3}, {0x2CF9, 0x2D25},
{0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67},
{0x2D6F, 0x2D70}, {0x2D7F, 0x2D96}, {0x2DA0, 0x2DA6},
{0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE},
{0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6},
{0x2DD8, 0x2DDE}, {0x2DE0, 0x2E4F}, {0x303F, 0x303F},
{0x4DC0, 0x4DFF}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7},
{0xA700, 0xA7BF}, {0xA7C2, 0xA7C6}, {0xA7F7, 0xA82B},
{0xA830, 0xA839}, {0xA840, 0xA877}, {0xA880, 0xA8C5},
{0xA8CE, 0xA8D9}, {0xA8E0, 0xA953}, {0xA95F, 0xA95F},
{0xA980, 0xA9CD}, {0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE},
{0xAA00, 0xAA36}, {0xAA40, 0xAA4D}, {0xAA50, 0xAA59},
{0xAA5C, 0xAAC2}, {0xAADB, 0xAAF6}, {0xAB01, 0xAB06},
{0xAB09, 0xAB0E}, {0xAB11, 0xAB16}, {0xAB20, 0xAB26},
{0xAB28, 0xAB2E}, {0xAB30, 0xAB67}, {0xAB70, 0xABED},
{0xABF0, 0xABF9}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB},
{0xD800, 0xDFFF}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17},
{0xFB1D, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E},
{0xFB40, 0xFB41}, {0xFB43, 0xFB44}, {0xFB46, 0xFBC1},
{0xFBD3, 0xFD3F}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7},
{0xFDF0, 0xFDFD}, {0xFE20, 0xFE2F}, {0xFE70, 0xFE74},
{0xFE76, 0xFEFC}, {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFC},
{0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A},
{0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D},
{0x10080, 0x100FA}, {0x10100, 0x10102}, {0x10107, 0x10133},
{0x10137, 0x1018E}, {0x10190, 0x1019B}, {0x101A0, 0x101A0},
{0x101D0, 0x101FD}, {0x10280, 0x1029C}, {0x102A0, 0x102D0},
{0x102E0, 0x102FB}, {0x10300, 0x10323}, {0x1032D, 0x1034A},
{0x10350, 0x1037A}, {0x10380, 0x1039D}, {0x1039F, 0x103C3},
{0x103C8, 0x103D5}, {0x10400, 0x1049D}, {0x104A0, 0x104A9},
{0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527},
{0x10530, 0x10563}, {0x1056F, 0x1056F}, {0x10600, 0x10736},
{0x10740, 0x10755}, {0x10760, 0x10767}, {0x10800, 0x10805},
{0x10808, 0x10808}, {0x1080A, 0x10835}, {0x10837, 0x10838},
{0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10857, 0x1089E},
{0x108A7, 0x108AF}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5},
{0x108FB, 0x1091B}, {0x1091F, 0x10939}, {0x1093F, 0x1093F},
{0x10980, 0x109B7}, {0x109BC, 0x109CF}, {0x109D2, 0x10A03},
{0x10A05, 0x10A06}, {0x10A0C, 0x10A13}, {0x10A15, 0x10A17},
{0x10A19, 0x10A35}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A48},
{0x10A50, 0x10A58}, {0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6},
{0x10AEB, 0x10AF6}, {0x10B00, 0x10B35}, {0x10B39, 0x10B55},
{0x10B58, 0x10B72}, {0x10B78, 0x10B91}, {0x10B99, 0x10B9C},
{0x10BA9, 0x10BAF}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2},
{0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27}, {0x10D30, 0x10D39},
{0x10E60, 0x10E7E}, {0x10F00, 0x10F27}, {0x10F30, 0x10F59},
{0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2}, {0x0D00, 0x0D0C},
{0x0D0E, 0x0D10}, {0x0D12, 0x0D44}, {0x0D46, 0x0D48},
{0x0D4A, 0x0D4F}, {0x0D54, 0x0D63}, {0x0D66, 0x0D7F},
{0x0D81, 0x0D83}, {0x0D85, 0x0D96}, {0x0D9A, 0x0DB1},
{0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD}, {0x0DC0, 0x0DC6},
{0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4}, {0x0DD6, 0x0DD6},
{0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF}, {0x0DF2, 0x0DF4},
{0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B}, {0x0E81, 0x0E82},
{0x0E84, 0x0E84}, {0x0E86, 0x0E8A}, {0x0E8C, 0x0EA3},
{0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD}, {0x0EC0, 0x0EC4},
{0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9},
{0x0EDC, 0x0EDF}, {0x0F00, 0x0F47}, {0x0F49, 0x0F6C},
{0x0F71, 0x0F97}, {0x0F99, 0x0FBC}, {0x0FBE, 0x0FCC},
{0x0FCE, 0x0FDA}, {0x1000, 0x10C5}, {0x10C7, 0x10C7},
{0x10CD, 0x10CD}, {0x10D0, 0x10FF}, {0x1160, 0x1248},
{0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258},
{0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D},
{0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x12B8, 0x12BE},
{0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6},
{0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A},
{0x135D, 0x137C}, {0x1380, 0x1399}, {0x13A0, 0x13F5},
{0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x16A0, 0x16F8},
{0x1700, 0x170C}, {0x170E, 0x1714}, {0x1720, 0x1736},
{0x1740, 0x1753}, {0x1760, 0x176C}, {0x176E, 0x1770},
{0x1772, 0x1773}, {0x1780, 0x17DD}, {0x17E0, 0x17E9},
{0x17F0, 0x17F9}, {0x1800, 0x180E}, {0x1810, 0x1819},
{0x1820, 0x1878}, {0x1880, 0x18AA}, {0x18B0, 0x18F5},
{0x1900, 0x191E}, {0x1920, 0x192B}, {0x1930, 0x193B},
{0x1940, 0x1940}, {0x1944, 0x196D}, {0x1970, 0x1974},
{0x1980, 0x19AB}, {0x19B0, 0x19C9}, {0x19D0, 0x19DA},
{0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E}, {0x1A60, 0x1A7C},
{0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, {0x1AA0, 0x1AAD},
{0x1AB0, 0x1AC0}, {0x1B00, 0x1B4B}, {0x1B50, 0x1B7C},
{0x1B80, 0x1BF3}, {0x1BFC, 0x1C37}, {0x1C3B, 0x1C49},
{0x1C4D, 0x1C88}, {0x1C90, 0x1CBA}, {0x1CBD, 0x1CC7},
{0x1CD0, 0x1CFA}, {0x1D00, 0x1DF9}, {0x1DFB, 0x1F15},
{0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
{0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B},
{0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4},
{0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3}, {0x1FD6, 0x1FDB},
{0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFE},
{0x2000, 0x200F}, {0x2011, 0x2012}, {0x2017, 0x2017},
{0x201A, 0x201B}, {0x201E, 0x201F}, {0x2023, 0x2023},
{0x2028, 0x202F}, {0x2031, 0x2031}, {0x2034, 0x2034},
{0x2036, 0x203A}, {0x203C, 0x203D}, {0x203F, 0x2064},
{0x2066, 0x2071}, {0x2075, 0x207E}, {0x2080, 0x2080},
{0x2085, 0x208E}, {0x2090, 0x209C}, {0x20A0, 0x20A8},
{0x20AA, 0x20AB}, {0x20AD, 0x20BF}, {0x20D0, 0x20F0},
{0x2100, 0x2102}, {0x2104, 0x2104}, {0x2106, 0x2108},
{0x210A, 0x2112}, {0x2114, 0x2115}, {0x2117, 0x2120},
{0x2123, 0x2125}, {0x2127, 0x212A}, {0x212C, 0x2152},
{0x2155, 0x215A}, {0x215F, 0x215F}, {0x216C, 0x216F},
{0x217A, 0x2188}, {0x218A, 0x218B}, {0x219A, 0x21B7},
{0x21BA, 0x21D1}, {0x21D3, 0x21D3}, {0x21D5, 0x21E6},
{0x21E8, 0x21FF}, {0x2201, 0x2201}, {0x2204, 0x2206},
{0x2209, 0x220A}, {0x220C, 0x220E}, {0x2210, 0x2210},
{0x2212, 0x2214}, {0x2216, 0x2219}, {0x221B, 0x221C},
{0x2221, 0x2222}, {0x2224, 0x2224}, {0x2226, 0x2226},
{0x222D, 0x222D}, {0x222F, 0x2233}, {0x2238, 0x223B},
{0x223E, 0x2247}, {0x2249, 0x224B}, {0x224D, 0x2251},
{0x2253, 0x225F}, {0x2262, 0x2263}, {0x2268, 0x2269},
{0x226C, 0x226D}, {0x2270, 0x2281}, {0x2284, 0x2285},
{0x2288, 0x2294}, {0x2296, 0x2298}, {0x229A, 0x22A4},
{0x22A6, 0x22BE}, {0x22C0, 0x2311}, {0x2313, 0x2319},
{0x231C, 0x2328}, {0x232B, 0x23E8}, {0x23ED, 0x23EF},
{0x23F1, 0x23F2}, {0x23F4, 0x2426}, {0x2440, 0x244A},
{0x24EA, 0x24EA}, {0x254C, 0x254F}, {0x2574, 0x257F},
{0x2590, 0x2591}, {0x2596, 0x259F}, {0x25A2, 0x25A2},
{0x25AA, 0x25B1}, {0x25B4, 0x25B5}, {0x25B8, 0x25BB},
{0x25BE, 0x25BF}, {0x25C2, 0x25C5}, {0x25C9, 0x25CA},
{0x25CC, 0x25CD}, {0x25D2, 0x25E1}, {0x25E6, 0x25EE},
{0x25F0, 0x25FC}, {0x25FF, 0x2604}, {0x2607, 0x2608},
{0x260A, 0x260D}, {0x2610, 0x2613}, {0x2616, 0x261B},
{0x261D, 0x261D}, {0x261F, 0x263F}, {0x2641, 0x2641},
{0x2643, 0x2647}, {0x2654, 0x265F}, {0x2662, 0x2662},
{0x2666, 0x2666}, {0x266B, 0x266B}, {0x266E, 0x266E},
{0x2670, 0x267E}, {0x2680, 0x2692}, {0x2694, 0x269D},
{0x26A0, 0x26A0}, {0x26A2, 0x26A9}, {0x26AC, 0x26BC},
{0x26C0, 0x26C3}, {0x26E2, 0x26E2}, {0x26E4, 0x26E7},
{0x2700, 0x2704}, {0x2706, 0x2709}, {0x270C, 0x2727},
{0x2729, 0x273C}, {0x273E, 0x274B}, {0x274D, 0x274D},
{0x274F, 0x2752}, {0x2756, 0x2756}, {0x2758, 0x2775},
{0x2780, 0x2794}, {0x2798, 0x27AF}, {0x27B1, 0x27BE},
{0x27C0, 0x27E5}, {0x27EE, 0x2984}, {0x2987, 0x2B1A},
{0x2B1D, 0x2B4F}, {0x2B51, 0x2B54}, {0x2B5A, 0x2B73},
{0x2B76, 0x2B95}, {0x2B97, 0x2C2E}, {0x2C30, 0x2C5E},
{0x2C60, 0x2CF3}, {0x2CF9, 0x2D25}, {0x2D27, 0x2D27},
{0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D70},
{0x2D7F, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE},
{0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6},
{0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE},
{0x2DE0, 0x2E52}, {0x303F, 0x303F}, {0x4DC0, 0x4DFF},
{0xA4D0, 0xA62B}, {0xA640, 0xA6F7}, {0xA700, 0xA7BF},
{0xA7C2, 0xA7CA}, {0xA7F5, 0xA82C}, {0xA830, 0xA839},
{0xA840, 0xA877}, {0xA880, 0xA8C5}, {0xA8CE, 0xA8D9},
{0xA8E0, 0xA953}, {0xA95F, 0xA95F}, {0xA980, 0xA9CD},
{0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xAA00, 0xAA36},
{0xAA40, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA5C, 0xAAC2},
{0xAADB, 0xAAF6}, {0xAB01, 0xAB06}, {0xAB09, 0xAB0E},
{0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E},
{0xAB30, 0xAB6B}, {0xAB70, 0xABED}, {0xABF0, 0xABF9},
{0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, {0xD800, 0xDFFF},
{0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB36},
{0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41},
{0xFB43, 0xFB44}, {0xFB46, 0xFBC1}, {0xFBD3, 0xFD3F},
{0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFD},
{0xFE20, 0xFE2F}, {0xFE70, 0xFE74}, {0xFE76, 0xFEFC},
{0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFC}, {0x10000, 0x1000B},
{0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D},
{0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x10080, 0x100FA},
{0x10100, 0x10102}, {0x10107, 0x10133}, {0x10137, 0x1018E},
{0x10190, 0x1019C}, {0x101A0, 0x101A0}, {0x101D0, 0x101FD},
{0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x102E0, 0x102FB},
{0x10300, 0x10323}, {0x1032D, 0x1034A}, {0x10350, 0x1037A},
{0x10380, 0x1039D}, {0x1039F, 0x103C3}, {0x103C8, 0x103D5},
{0x10400, 0x1049D}, {0x104A0, 0x104A9}, {0x104B0, 0x104D3},
{0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563},
{0x1056F, 0x1056F}, {0x10600, 0x10736}, {0x10740, 0x10755},
{0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808},
{0x1080A, 0x10835}, {0x10837, 0x10838}, {0x1083C, 0x1083C},
{0x1083F, 0x10855}, {0x10857, 0x1089E}, {0x108A7, 0x108AF},
{0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x108FB, 0x1091B},
{0x1091F, 0x10939}, {0x1093F, 0x1093F}, {0x10980, 0x109B7},
{0x109BC, 0x109CF}, {0x109D2, 0x10A03}, {0x10A05, 0x10A06},
{0x10A0C, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35},
{0x10A38, 0x10A3A}, {0x10A3F, 0x10A48}, {0x10A50, 0x10A58},
{0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6}, {0x10AEB, 0x10AF6},
{0x10B00, 0x10B35}, {0x10B39, 0x10B55}, {0x10B58, 0x10B72},
{0x10B78, 0x10B91}, {0x10B99, 0x10B9C}, {0x10BA9, 0x10BAF},
{0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2},
{0x10CFA, 0x10D27}, {0x10D30, 0x10D39}, {0x10E60, 0x10E7E},
{0x10E80, 0x10EA9}, {0x10EAB, 0x10EAD}, {0x10EB0, 0x10EB1},
{0x10F00, 0x10F27}, {0x10F30, 0x10F59}, {0x10FB0, 0x10FCB},
{0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x1106F},
{0x1107F, 0x110C1}, {0x110CD, 0x110CD}, {0x110D0, 0x110E8},
{0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11146},
{0x11150, 0x11176}, {0x11180, 0x111CD}, {0x111D0, 0x111DF},
{0x111E1, 0x111F4}, {0x11200, 0x11211}, {0x11213, 0x1123E},
{0x11280, 0x11286}, {0x11288, 0x11288}, {0x1128A, 0x1128D},
{0x1128F, 0x1129D}, {0x1129F, 0x112A9}, {0x112B0, 0x112EA},
{0x112F0, 0x112F9}, {0x11300, 0x11303}, {0x11305, 0x1130C},
{0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330},
{0x11332, 0x11333}, {0x11335, 0x11339}, {0x1133B, 0x11344},
{0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11350, 0x11350},
{0x11357, 0x11357}, {0x1135D, 0x11363}, {0x11366, 0x1136C},
{0x11370, 0x11374}, {0x11400, 0x11459}, {0x1145B, 0x1145B},
{0x1145D, 0x1145F}, {0x11480, 0x114C7}, {0x114D0, 0x114D9},
{0x11580, 0x115B5}, {0x115B8, 0x115DD}, {0x11600, 0x11644},
{0x11650, 0x11659}, {0x11660, 0x1166C}, {0x11680, 0x116B8},
{0x116C0, 0x116C9}, {0x11700, 0x1171A}, {0x1171D, 0x1172B},
{0x11730, 0x1173F}, {0x11800, 0x1183B}, {0x118A0, 0x118F2},
{0x118FF, 0x118FF}, {0x119A0, 0x119A7}, {0x119AA, 0x119D7},
{0x119DA, 0x119E4}, {0x11A00, 0x11A47}, {0x11A50, 0x11AA2},
{0x11AC0, 0x11AF8}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C36},
{0x11C38, 0x11C45}, {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F},
{0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06},
{0x11D08, 0x11D09}, {0x11D0B, 0x11D36}, {0x11D3A, 0x11D3A},
{0x11D3C, 0x11D3D}, {0x11D3F, 0x11D47}, {0x11D50, 0x11D59},
{0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E},
{0x11D90, 0x11D91}, {0x11D93, 0x11D98}, {0x11DA0, 0x11DA9},
{0x11EE0, 0x11EF8}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399},
{0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11147},
{0x11150, 0x11176}, {0x11180, 0x111DF}, {0x111E1, 0x111F4},
{0x11200, 0x11211}, {0x11213, 0x1123E}, {0x11280, 0x11286},
{0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D},
{0x1129F, 0x112A9}, {0x112B0, 0x112EA}, {0x112F0, 0x112F9},
{0x11300, 0x11303}, {0x11305, 0x1130C}, {0x1130F, 0x11310},
{0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333},
{0x11335, 0x11339}, {0x1133B, 0x11344}, {0x11347, 0x11348},
{0x1134B, 0x1134D}, {0x11350, 0x11350}, {0x11357, 0x11357},
{0x1135D, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374},
{0x11400, 0x1145B}, {0x1145D, 0x11461}, {0x11480, 0x114C7},
{0x114D0, 0x114D9}, {0x11580, 0x115B5}, {0x115B8, 0x115DD},
{0x11600, 0x11644}, {0x11650, 0x11659}, {0x11660, 0x1166C},
{0x11680, 0x116B8}, {0x116C0, 0x116C9}, {0x11700, 0x1171A},
{0x1171D, 0x1172B}, {0x11730, 0x1173F}, {0x11800, 0x1183B},
{0x118A0, 0x118F2}, {0x118FF, 0x11906}, {0x11909, 0x11909},
{0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x11935},
{0x11937, 0x11938}, {0x1193B, 0x11946}, {0x11950, 0x11959},
{0x119A0, 0x119A7}, {0x119AA, 0x119D7}, {0x119DA, 0x119E4},
{0x11A00, 0x11A47}, {0x11A50, 0x11AA2}, {0x11AC0, 0x11AF8},
{0x11C00, 0x11C08}, {0x11C0A, 0x11C36}, {0x11C38, 0x11C45},
{0x11C50, 0x11C6C}, {0x11C70, 0x11C8F}, {0x11C92, 0x11CA7},
{0x11CA9, 0x11CB6}, {0x11D00, 0x11D06}, {0x11D08, 0x11D09},
{0x11D0B, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D},
{0x11D3F, 0x11D47}, {0x11D50, 0x11D59}, {0x11D60, 0x11D65},
{0x11D67, 0x11D68}, {0x11D6A, 0x11D8E}, {0x11D90, 0x11D91},
{0x11D93, 0x11D98}, {0x11DA0, 0x11DA9}, {0x11EE0, 0x11EF8},
{0x11FB0, 0x11FB0}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399},
{0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543},
{0x13000, 0x1342E}, {0x13430, 0x13438}, {0x14400, 0x14646},
{0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A60, 0x16A69},
@ -382,20 +389,22 @@ var neutral = table{
{0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, {0x1EEF0, 0x1EEF1},
{0x1F000, 0x1F003}, {0x1F005, 0x1F02B}, {0x1F030, 0x1F093},
{0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF}, {0x1F0C1, 0x1F0CE},
{0x1F0D1, 0x1F0F5}, {0x1F10B, 0x1F10C}, {0x1F12E, 0x1F12F},
{0x1F16A, 0x1F16C}, {0x1F1E6, 0x1F1FF}, {0x1F321, 0x1F32C},
{0x1F336, 0x1F336}, {0x1F37D, 0x1F37D}, {0x1F394, 0x1F39F},
{0x1F3CB, 0x1F3CE}, {0x1F3D4, 0x1F3DF}, {0x1F3F1, 0x1F3F3},
{0x1F3F5, 0x1F3F7}, {0x1F43F, 0x1F43F}, {0x1F441, 0x1F441},
{0x1F4FD, 0x1F4FE}, {0x1F53E, 0x1F54A}, {0x1F54F, 0x1F54F},
{0x1F568, 0x1F579}, {0x1F57B, 0x1F594}, {0x1F597, 0x1F5A3},
{0x1F5A5, 0x1F5FA}, {0x1F650, 0x1F67F}, {0x1F6C6, 0x1F6CB},
{0x1F6CD, 0x1F6CF}, {0x1F6D3, 0x1F6D4}, {0x1F6E0, 0x1F6EA},
{0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F773}, {0x1F780, 0x1F7D8},
{0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859},
{0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F900, 0x1F90B},
{0x1FA00, 0x1FA53}, {0x1FA60, 0x1FA6D}, {0xE0001, 0xE0001},
{0xE0020, 0xE007F},
{0x1F0D1, 0x1F0F5}, {0x1F10B, 0x1F10F}, {0x1F12E, 0x1F12F},
{0x1F16A, 0x1F16F}, {0x1F1AD, 0x1F1AD}, {0x1F1E6, 0x1F1FF},
{0x1F321, 0x1F32C}, {0x1F336, 0x1F336}, {0x1F37D, 0x1F37D},
{0x1F394, 0x1F39F}, {0x1F3CB, 0x1F3CE}, {0x1F3D4, 0x1F3DF},
{0x1F3F1, 0x1F3F3}, {0x1F3F5, 0x1F3F7}, {0x1F43F, 0x1F43F},
{0x1F441, 0x1F441}, {0x1F4FD, 0x1F4FE}, {0x1F53E, 0x1F54A},
{0x1F54F, 0x1F54F}, {0x1F568, 0x1F579}, {0x1F57B, 0x1F594},
{0x1F597, 0x1F5A3}, {0x1F5A5, 0x1F5FA}, {0x1F650, 0x1F67F},
{0x1F6C6, 0x1F6CB}, {0x1F6CD, 0x1F6CF}, {0x1F6D3, 0x1F6D4},
{0x1F6E0, 0x1F6EA}, {0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F773},
{0x1F780, 0x1F7D8}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847},
{0x1F850, 0x1F859}, {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD},
{0x1F8B0, 0x1F8B1}, {0x1F900, 0x1F90B}, {0x1F93B, 0x1F93B},
{0x1F946, 0x1F946}, {0x1FA00, 0x1FA53}, {0x1FA60, 0x1FA6D},
{0x1FB00, 0x1FB92}, {0x1FB94, 0x1FBCA}, {0x1FBF0, 0x1FBF9},
{0xE0001, 0xE0001}, {0xE0020, 0xE007F},
}
var emoji = table{
@ -423,5 +432,6 @@ var emoji = table{
{0x1F546, 0x1F64F}, {0x1F680, 0x1F6FF}, {0x1F774, 0x1F77F},
{0x1F7D5, 0x1F7FF}, {0x1F80C, 0x1F80F}, {0x1F848, 0x1F84F},
{0x1F85A, 0x1F85F}, {0x1F888, 0x1F88F}, {0x1F8AE, 0x1F8FF},
{0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1FFFD},
{0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1FAFF},
{0x1FC00, 0x1FFFD},
}

View file

@ -24,3 +24,8 @@ _testmain.go
*.prof
.DS_STORE
cmd/ftest/ftest
# Goland specific files
.idea

View file

@ -1,16 +1,24 @@
language: go
env:
global:
- GO111MODULE="on"
go:
- tip
- 1.6
- 1.5
- 1.4
install:
- go get -t -v ./...
- go get github.com/go-playground/overalls
- go get github.com/mattn/goveralls
- go get golang.org/x/tools/cmd/cover
- 1.11.x
- 1.12.x
- 1.13.x
- tip
matrix:
allow_failures:
- go: tip
fast_finish: true
before_install:
- go get github.com/mattn/goveralls
script:
- go test -v ./...
- $GOPATH/bin/overalls -project=github.com/mmcdole/gofeed -covermode=count -ignore=.git,vendor -debug
after_success:
- $GOPATH/bin/goveralls -coverprofile=overalls.coverprofile -service=travis-ci
- go install ./...
- go test -v ./...
- $GOPATH/bin/goveralls -service=travis-ci

View file

@ -2,9 +2,23 @@
[![Build Status](https://travis-ci.org/mmcdole/gofeed.svg?branch=master)](https://travis-ci.org/mmcdole/gofeed) [![Coverage Status](https://coveralls.io/repos/github/mmcdole/gofeed/badge.svg?branch=master)](https://coveralls.io/github/mmcdole/gofeed?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/mmcdole/gofeed)](https://goreportcard.com/report/github.com/mmcdole/gofeed) [![](https://godoc.org/github.com/mmcdole/gofeed?status.svg)](http://godoc.org/github.com/mmcdole/gofeed) [![License](http://img.shields.io/:license-mit-blue.svg)](http://doge.mit-license.org)
The `gofeed` library is a robust feed parser that supports parsing both [RSS](https://en.wikipedia.org/wiki/RSS) and [Atom](https://en.wikipedia.org/wiki/Atom_(standard)) feeds. The universal `gofeed.Parser` will parse and convert all feed types into a hybrid `gofeed.Feed` model. You also have the option of parsing them into their respective `atom.Feed` and `rss.Feed` models using the feed specific `atom.Parser` or `rss.Parser`.
The `gofeed` library is a robust feed parser that supports parsing both [RSS](https://en.wikipedia.org/wiki/RSS) and [Atom](https://en.wikipedia.org/wiki/Atom_(standard)) feeds. The library provides a universal `gofeed.Parser` that will parse and convert all feed types into a hybrid `gofeed.Feed` model. You also have the option of utilizing the feed specific `atom.Parser` or `rss.Parser` parsers which generate `atom.Feed` and `rss.Feed` respectively.
##### Supported feed types:
## Table of Contents
- [Features](#features)
- [Overview](#overview)
- [Basic Usage](#basic-usage)
- [Advanced Usage](#advanced-usage)
- [Extensions](#extensions)
- [Invalid Feeds](#invalid-feeds)
- [Default Mappings](#default-mappings)
- [Dependencies](#dependencies)
- [License](#license)
- [Credits](#credits)
## Features
#### Supported feed types:
* RSS 0.90
* Netscape RSS 0.91
* Userland RSS 0.91
@ -16,22 +30,26 @@ The `gofeed` library is a robust feed parser that supports parsing both [RSS](ht
* Atom 0.3
* Atom 1.0
It also provides support for parsing several popular predefined extension modules, including [Dublin Core](http://dublincore.org/documents/dces/) and [Apples iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390), as well as arbitrary extensions. See the [Extensions](#extensions) section for more details.
#### Extension Support
## Table of Contents
- [Overview](#overview)
- [Basic Usage](#basic-usage)
- [Advanced Usage](#advanced-usage)
- [Extensions](#extensions)
- [Invalid Feeds](#invalid-feeds)
- [Default Mappings](#default-mappings)
- [Dependencies](#dependencies)
- [License](#license)
- [Donate](#donate)
- [Credits](#credits)
The `gofeed` library provides support for parsing several popular predefined extensions into ready-made structs, including [Dublin Core](http://dublincore.org/documents/dces/) and [Apples iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390).
It parses all other feed extensions in a generic way (see the [Extensions](#extensions) section for more details).
#### Invalid Feeds
A best-effort attempt is made at parsing broken and invalid XML feeds. Currently, `gofeed` can succesfully parse feeds with the following issues:
- Unescaped/Naked Markup in feed elements
- Undeclared namespace prefixes
- Missing closing tags on certain elements
- Illegal tags within feed elements without namespace prefixes
- Missing "required" elements as specified by the respective feed specs.
- Incorrect date formats
## Overview
The `gofeed` library is comprised of a universal feed parser and several feed specific parsers. Which one you choose depends entirely on your usecase. If you will be handling both rss and atom feeds then it makes sense to use the `gofeed.Parser`. If you know ahead of time that you will only be parsing one feed type then it would make sense to use `rss.Parser` or `atom.Parser`.
#### Universal Feed Parser
The universal `gofeed.Parser` works in 3 stages: detection, parsing and translation. It first detects the feed type that it is currently parsing. Then it uses a feed specific parser to parse the feed into its true representation which will be either a `rss.Feed` or `atom.Feed`. These models cover every field possible for their respective feed types. Finally, they are *translated* into a `gofeed.Feed` model that is a hybrid of both feed types. Performing the universal feed parsing in these 3 stages allows for more flexibility and keeps the code base more maintainable by separating RSS and Atom parsing into seperate packages.
@ -44,8 +62,6 @@ The translation step is done by anything which adheres to the `gofeed.Translator
The `gofeed` library provides two feed specific parsers: `atom.Parser` and `rss.Parser`. If the hybrid `gofeed.Feed` model that the universal `gofeed.Parser` produces does not contain a field from the `atom.Feed` or `rss.Feed` model that you require, it might be beneficial to use the feed specific parsers. When using the `atom.Parser` or `rss.Parser` directly, you can access all of fields found in the `atom.Feed` and `rss.Feed` models. It is also marginally faster because you are able to skip the translation step.
However, for the *vast* majority of users, the universal `gofeed.Parser` is the best way to parse feeds. This allows the user of `gofeed` library to not care about the differences between RSS or Atom feeds.
## Basic Usage
#### Universal Feed Parser
@ -83,6 +99,16 @@ feed, _ := fp.Parse(file)
fmt.Println(feed.Title)
```
##### Parse a feed from an URL with a 60s timeout:
```go
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
fp := gofeed.NewParser()
feed, _ := fp.ParseURLWithContext("http://feeds.twit.tv/twit.xml", ctx)
fmt.Println(feed.Title)
```
#### Feed Specific Parsers
You can easily use the `rss.Parser` and `atom.Parser` directly if you have a usage scenario that requires it:
@ -185,16 +211,6 @@ Every element which does not belong to the feed's default namespace is considere
In addition to the generic handling of extensions, `gofeed` also has built in support for parsing certain popular extensions into their own structs for convenience. It currently supports the [Dublin Core](http://dublincore.org/documents/dces/) and [Apple iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390) extensions which you can access at `Feed.ItunesExt`, `feed.DublinCoreExt` and `Item.ITunesExt` and `Item.DublinCoreExt`
## Invalid Feeds
A best-effort attempt is made at parsing broken and invalid XML feeds. Currently, `gofeed` can succesfully parse feeds with the following issues:
- Unescaped/Naked Markup in feed elements
- Undeclared namespace prefixes
- Missing closing tags on certain elements
- Illegal tags within feed elements without namespace prefixes
- Missing "required" elements as specified by the respective feed specs.
- Incorrect date formats
## Default Mappings
The ```DefaultRSSTranslator``` and the ```DefaultAtomTranslator``` map the following ```rss.Feed``` and ```atom.Feed``` fields to their respective ```gofeed.Feed``` fields. They are listed in order of precedence (highest to lowest):
@ -220,12 +236,12 @@ Categories | /rss/channel/category<br>/rss/channel/itunes:category<br>/rss/chann
--- | --- | ---
Title | /rss/channel/item/title<br>/rdf:RDF/item/title<br>/rdf:RDF/item/dc:title<br>/rss/channel/item/dc:title | /feed/entry/title
Description | /rss/channel/item/description<br>/rdf:RDF/item/description<br>/rss/channel/item/dc:description<br>/rdf:RDF/item/dc:description | /feed/entry/summary
Content | | /feed/entry/content
Content | /rss/channel/item/content:encoded | /feed/entry/content
Link | /rss/channel/item/link<br>/rdf:RDF/item/link | /feed/entry/link[@rel=”alternate”]/@href<br>/feed/entry/link[not(@rel)]/@href
Updated | /rss/channel/item/dc:date<br>/rdf:RDF/rdf:item/dc:date | /feed/entry/modified<br>/feed/entry/updated
Published | /rss/channel/item/pubDate<br>/rss/channel/item/dc:date | /feed/entry/published<br>/feed/entry/issued
Author | /rss/channel/item/author<br>/rss/channel/item/dc:author<br>/rdf:RDF/item/dc:author<br>/rss/channel/item/dc:creator<br>/rdf:RDF/item/dc:creator<br>/rss/channel/item/itunes:author | /feed/entry/author
Guid | /rss/channel/item/guid | /feed/entry/id
GUID | /rss/channel/item/guid | /feed/entry/id
Image | /rss/channel/item/itunes:image<br>/rss/channel/item/media:image |
Categories | /rss/channel/item/category<br>/rss/channel/item/dc:subject<br>/rss/channel/item/itunes:keywords<br>/rdf:RDF/channel/item/dc:subject | /feed/entry/category
Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”]
@ -240,14 +256,9 @@ Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”
This project is licensed under the [MIT License](https://raw.githubusercontent.com/mmcdole/gofeed/master/LICENSE)
## Donate
I write open source software for fun. However, if you want to buy me a beer because you found something I wrote useful, feel free!
Bitcoin: 1CXrjBBkxgVNgKXRAq5MnsR7zzZbHvUHkJ
## Credits
* [cristoper](https://github.com/cristoper) for his work on implementing xml:base relative URI handling.
* [Mark Pilgrim](https://en.wikipedia.org/wiki/Mark_Pilgrim) and [Kurt McKee](http://kurtmckee.org) for their work on the excellent [Universal Feed Parser](https://github.com/kurtmckee/feedparser) Python library. This library was the inspiration for the `gofeed` library.
* [Dan MacTough](http://blog.mact.me) for his work on [node-feedparser](https://github.com/danmactough/node-feedparser). It provided inspiration for the set of fields that should be covered in the hybrid `gofeed.Feed` model.
* [Matt Jibson](https://mattjibson.com/) for his date parsing function in the [goread](https://github.com/mjibson/goread) project.

View file

@ -6,19 +6,43 @@ import (
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/mmcdole/gofeed/extensions"
ext "github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/goxpp"
xpp "github.com/mmcdole/goxpp"
)
var (
// Atom elements which contain URIs
// https://tools.ietf.org/html/rfc4287
uriElements = map[string]bool{
"icon": true,
"id": true,
"logo": true,
"uri": true,
"url": true, // atom 0.3
}
// Atom attributes which contain URIs
// https://tools.ietf.org/html/rfc4287
atomURIAttrs = map[string]bool{
"href": true,
"scheme": true,
"src": true,
"uri": true,
}
)
// Parser is an Atom Parser
type Parser struct{}
type Parser struct {
base *shared.XMLBase
}
// Parse parses an xml feed into an atom.Feed
func (ap *Parser) Parse(feed io.Reader) (*Feed, error) {
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
ap.base = &shared.XMLBase{URIAttrs: atomURIAttrs}
_, err := shared.FindRoot(p)
_, err := ap.base.FindRoot(p)
if err != nil {
return nil, err
}
@ -43,7 +67,7 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
extensions := ext.Extensions{}
for {
tok, err := shared.NextTag(p)
tok, err := ap.base.NextTag(p)
if err != nil {
return nil, err
}
@ -197,7 +221,7 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
extensions := ext.Extensions{}
for {
tok, err := shared.NextTag(p)
tok, err := ap.base.NextTag(p)
if err != nil {
return nil, err
}
@ -352,7 +376,7 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {
extensions := ext.Extensions{}
for {
tok, err := shared.NextTag(p)
tok, err := ap.base.NextTag(p)
if err != nil {
return nil, err
}
@ -510,7 +534,7 @@ func (ap *Parser) parsePerson(name string, p *xpp.XMLPullParser) (*Person, error
person := &Person{}
for {
tok, err := shared.NextTag(p)
tok, err := ap.base.NextTag(p)
if err != nil {
return nil, err
}
@ -654,29 +678,44 @@ func (ap *Parser) parseAtomText(p *xpp.XMLPullParser) (string, error) {
result := text.InnerXML
result = strings.TrimSpace(result)
if strings.HasPrefix(result, "<![CDATA[") &&
strings.HasSuffix(result, "]]>") {
result = strings.TrimPrefix(result, "<![CDATA[")
result = strings.TrimSuffix(result, "]]>")
return result, nil
}
lowerType := strings.ToLower(text.Type)
lowerMode := strings.ToLower(text.Mode)
if lowerType == "text" ||
strings.HasPrefix(lowerType, "text/") ||
(lowerType == "" && lowerMode == "") {
result, err = shared.DecodeEntities(result)
} else if strings.Contains(lowerType, "xhtml") {
result = ap.stripWrappingDiv(result)
} else if lowerType == "html" {
result = ap.stripWrappingDiv(result)
result, err = shared.DecodeEntities(result)
if strings.Contains(result, "<![CDATA[") {
result = shared.StripCDATA(result)
if lowerType == "html" || strings.Contains(lowerType, "xhtml") {
result, _ = ap.base.ResolveHTML(result)
}
} else {
decodedStr, err := base64.StdEncoding.DecodeString(result)
// decode non-CDATA contents depending on type
if lowerType == "text" ||
strings.HasPrefix(lowerType, "text/") ||
(lowerType == "" && lowerMode == "") {
result, err = shared.DecodeEntities(result)
} else if strings.Contains(lowerType, "xhtml") {
result = ap.stripWrappingDiv(result)
result, _ = ap.base.ResolveHTML(result)
} else if lowerType == "html" {
result = ap.stripWrappingDiv(result)
result, err = shared.DecodeEntities(result)
if err == nil {
result, _ = ap.base.ResolveHTML(result)
}
} else {
decodedStr, err := base64.StdEncoding.DecodeString(result)
if err == nil {
result = string(decodedStr)
}
}
}
// resolve relative URIs in URI-containing elements according to xml:base
name := strings.ToLower(p.Name)
if uriElements[name] {
resolved, err := ap.base.ResolveURL(result)
if err == nil {
result = string(decodedStr)
result = resolved
}
}

View file

@ -5,7 +5,7 @@ import (
"strings"
"github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/goxpp"
xpp "github.com/mmcdole/goxpp"
)
// FeedType represents one of the possible feed
@ -28,7 +28,8 @@ const (
func DetectFeedType(feed io.Reader) FeedType {
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
_, err := shared.FindRoot(p)
xmlBase := shared.XMLBase{}
_, err := xmlBase.FindRoot(p)
if err != nil {
return FeedTypeUnknown
}

View file

@ -14,6 +14,7 @@ type ITunesFeedExtension struct {
Image string `json:"image,omitempty"`
Complete string `json:"complete,omitempty"`
NewFeedURL string `json:"newFeedUrl,omitempty"`
Type string `json:"type,omitempty"`
}
// ITunesItemExtension is a set of extension
@ -28,7 +29,10 @@ type ITunesItemExtension struct {
Summary string `json:"summary,omitempty"`
Image string `json:"image,omitempty"`
IsClosedCaptioned string `json:"isClosedCaptioned,omitempty"`
Episode string `json:"episode,omitempty"`
Season string `json:"season,omitempty"`
Order string `json:"order,omitempty"`
EpisodeType string `json:"episodeType,omitempty"`
}
// ITunesCategory is a category element for itunes feeds.
@ -58,6 +62,7 @@ func NewITunesFeedExtension(extensions map[string][]Extension) *ITunesFeedExtens
feed.NewFeedURL = parseTextExtension("new-feed-url", extensions)
feed.Categories = parseCategories(extensions)
feed.Owner = parseOwner(extensions)
feed.Type = parseTextExtension("type", extensions)
return feed
}
@ -71,9 +76,13 @@ func NewITunesItemExtension(extensions map[string][]Extension) *ITunesItemExtens
entry.Explicit = parseTextExtension("explicit", extensions)
entry.Subtitle = parseTextExtension("subtitle", extensions)
entry.Summary = parseTextExtension("summary", extensions)
entry.Keywords = parseTextExtension("keywords", extensions)
entry.Image = parseImage(extensions)
entry.IsClosedCaptioned = parseTextExtension("isClosedCaptioned", extensions)
entry.Episode = parseTextExtension("episode", extensions)
entry.Season = parseTextExtension("season", extensions)
entry.Order = parseTextExtension("order", extensions)
entry.EpisodeType = parseTextExtension("episodeType", extensions)
return entry
}

View file

@ -10,6 +10,8 @@ import (
// Feed is the universal Feed type that atom.Feed
// and rss.Feed gets translated to. It represents
// a web feed.
// Sorting with sort.Sort will order the Items by
// oldest to newest publish time.
type Feed struct {
Title string `json:"title,omitempty"`
Description string `json:"description,omitempty"`
@ -82,3 +84,21 @@ type Enclosure struct {
Length string `json:"length,omitempty"`
Type string `json:"type,omitempty"`
}
// Len returns the length of Items.
func (f Feed) Len() int {
return len(f.Items)
}
// Less compares PublishedParsed of Items[i], Items[k]
// and returns true if Items[i] is less than Items[k].
func (f Feed) Less(i, k int) bool {
return f.Items[i].PublishedParsed.Before(
*f.Items[k].PublishedParsed,
)
}
// Swap swaps Items[i] and Items[k].
func (f Feed) Swap(i, k int) {
f.Items[i], f.Items[k] = f.Items[k], f.Items[i]
}

12
vendor/github.com/mmcdole/gofeed/go.mod generated vendored Normal file
View file

@ -0,0 +1,12 @@
module github.com/mmcdole/gofeed
require (
github.com/PuerkitoBio/goquery v1.5.0
github.com/codegangsta/cli v1.20.0
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/stretchr/testify v1.2.2
golang.org/x/net v0.0.0-20181220203305-927f97764cc3
golang.org/x/text v0.3.0
)

20
vendor/github.com/mmcdole/gofeed/go.sum generated vendored Normal file
View file

@ -0,0 +1,20 @@
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/codegangsta/cli v1.20.0 h1:iX1FXEgwzd5+XN6wk5cVHOGQj6Q3Dcp20lUeS4lHNTw=
github.com/codegangsta/cli v1.20.0/go.mod h1:/qJNoX69yVSKu5o4jLyXAENLRyk1uhi7zkbQ3slBdOA=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181220203305-927f97764cc3 h1:eH6Eip3UpmR+yM/qI9Ijluzb1bNv/cAU/n+6l8tRSis=
golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

View file

@ -18,47 +18,27 @@ var dateFormats = []string{
time.RFC1123,
time.ANSIC,
"Mon, January 2 2006 15:04:05 -0700",
"Mon, January 02, 2006, 15:04:05 MST",
"Mon, January 02, 2006 15:04:05 MST",
"Mon, Jan 2, 2006 15:04 MST",
"Mon, Jan 2 2006 15:04 MST",
"Mon, Jan 2, 2006 15:04:05 MST",
"Mon, Jan 2 2006 15:04:05 -700",
"Mon, Jan 2 2006 15:04:05 -0700",
"Mon Jan 2 15:04 2006",
"Mon Jan 2 15:04:05 2006 MST",
"Mon Jan 02, 2006 3:04 pm",
"Mon, Jan 02,2006 15:04:05 MST",
"Mon Jan 02 2006 15:04:05 -0700",
"Monday, January 2, 2006 15:04:05 MST",
"Monday, January 2, 2006 03:04 PM",
"Monday, January 2, 2006",
"Monday, January 02, 2006",
"Monday, 2 January 2006 15:04:05 MST",
"Monday, 2 January 2006 15:04:05 -0700",
"Monday, 2 Jan 2006 15:04:05 MST",
"Monday, 2 Jan 2006 15:04:05 -0700",
"Monday, 02 January 2006 15:04:05 MST",
"Monday, 02 January 2006 15:04:05 -0700",
"Monday, 02 January 2006 15:04:05",
"Mon, 2 January 2006 15:04 MST",
"Mon, 2 January 2006, 15:04 -0700",
"Mon, 2 January 2006, 15:04:05 MST",
"Mon, 2 January 2006 15:04:05 MST",
"Mon, 2 January 2006 15:04:05 -0700",
"Mon, 2 January 2006",
"Mon, 2 Jan 2006 3:04:05 PM -0700",
"Mon, 2 Jan 2006 15:4:5 MST",
"Mon, 2 Jan 2006 15:4:5 -0700 GMT",
"Mon, 2, Jan 2006 15:4",
"Mon, 2 Jan 2006 15:04 MST",
"Mon, 2 Jan 2006, 15:04 -0700",
"Mon, 2 Jan 2006 15:04 -0700",
"Mon, 2 Jan 2006 15:04:05 UT",
"Mon, 2 Jan 2006 15:04:05MST",
"Mon, 2 Jan 2006 15:04:05 MST",
"Mon 2 Jan 2006 15:04:05 MST",
"mon,2 Jan 2006 15:04:05 MST",
"Mon, 2 Jan 2006 15:04:05 -0700 MST",
"Mon, 2 Jan 2006 15:04:05-0700",
"Mon, 2 Jan 2006 15:04:05 -0700",
@ -66,25 +46,15 @@ var dateFormats = []string{
"Mon, 2 Jan 2006 15:04",
"Mon,2 Jan 2006",
"Mon, 2 Jan 2006",
"Mon, 2 Jan 15:04:05 MST",
"Mon, 2 Jan 06 15:04:05 MST",
"Mon, 2 Jan 06 15:04:05 -0700",
"Mon, 2006-01-02 15:04",
"Mon,02 January 2006 14:04:05 MST",
"Mon, 02 January 2006",
"Mon, 02 Jan 2006 3:04:05 PM MST",
"Mon, 02 Jan 2006 15 -0700",
"Mon,02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006 15:04 -0700",
"Mon, 02 Jan 2006 15:04:05 Z",
"Mon, 02 Jan 2006 15:04:05 UT",
"Mon, 02 Jan 2006 15:04:05 MST-07:00",
"Mon, 02 Jan 2006 15:04:05 MST -0700",
"Mon, 02 Jan 2006, 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05MST",
"Mon, 02 Jan 2006 15:04:05 MST",
"Mon , 02 Jan 2006 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05 GMT-0700",
"Mon,02 Jan 2006 15:04:05 -0700",
"Mon, 02 Jan 2006 15:04:05 -0700",
@ -95,30 +65,23 @@ var dateFormats = []string{
"Mon, 02 Jan 2006 15:04:05 00",
"Mon, 02 Jan 2006 15:04:05",
"Mon, 02 Jan 2006",
"Mon, 02 Jan 06 15:04:05 MST",
"January 2, 2006 3:04 PM",
"January 2, 2006, 3:04 p.m.",
"January 2, 2006 15:04:05 MST",
"January 2, 2006 15:04:05",
"January 2, 2006 03:04 PM",
"January 2, 2006",
"January 02, 2006 15:04:05 MST",
"January 02, 2006 15:04",
"January 02, 2006 03:04 PM",
"January 02, 2006",
"Jan 2, 2006 3:04:05 PM MST",
"Jan 2, 2006 3:04:05 PM",
"Jan 2, 2006 15:04:05 MST",
"Jan 2, 2006",
"Jan 02 2006 03:04:05PM",
"Jan 02, 2006",
"6/1/2 15:04",
"6-1-2 15:04",
"2 January 2006 15:04:05 MST",
"2 January 2006 15:04:05 -0700",
"2 January 2006",
"2 Jan 2006 15:04:05 Z",
"2 Jan 2006 15:04:05 MST",
"2 Jan 2006 15:04:05 -0700",
"2 Jan 2006",
"2.1.2006 15:04:05",
@ -141,7 +104,6 @@ var dateFormats = []string{
"2006-01-02T15:04:05",
"2006-01-02 at 15:04:05",
"2006-01-02 15:04:05Z",
"2006-01-02 15:04:05 MST",
"2006-01-02 15:04:05-0700",
"2006-01-02 15:04:05-07:00",
"2006-01-02 15:04:05 -0700",
@ -150,21 +112,15 @@ var dateFormats = []string{
"2006/01/02",
"2006-01-02",
"15:04 02.01.2006 -0700",
"1/2/2006 3:04:05 PM MST",
"1/2/2006 3:04:05 PM",
"1/2/2006 15:04:05 MST",
"1/2/2006",
"06/1/2 15:04",
"06-1-2 15:04",
"02 Monday, Jan 2006 15:04",
"02 Jan 2006 15:04 MST",
"02 Jan 2006 15:04:05 UT",
"02 Jan 2006 15:04:05 MST",
"02 Jan 2006 15:04:05 -0700",
"02 Jan 2006 15:04:05",
"02 Jan 2006",
"02/01/2006 15:04 MST",
"02-01-2006 15:04:05 MST",
"02.01.2006 15:04:05",
"02/01/2006 15:04:05",
"02.01.2006 15:04",
@ -173,12 +129,60 @@ var dateFormats = []string{
"02/01/2006",
"02-01-2006",
"01/02/2006 3:04 PM",
"01/02/2006 15:04:05 MST",
"01/02/2006 - 15:04",
"01/02/2006",
"01-02-2006",
}
// Named zone cannot be consistently loaded, so handle separately
var dateFormatsWithNamedZone = []string{
"Mon, January 02, 2006, 15:04:05 MST",
"Mon, January 02, 2006 15:04:05 MST",
"Mon, Jan 2, 2006 15:04 MST",
"Mon, Jan 2 2006 15:04 MST",
"Mon, Jan 2, 2006 15:04:05 MST",
"Mon Jan 2 15:04:05 2006 MST",
"Mon, Jan 02,2006 15:04:05 MST",
"Monday, January 2, 2006 15:04:05 MST",
"Monday, 2 January 2006 15:04:05 MST",
"Monday, 2 Jan 2006 15:04:05 MST",
"Monday, 02 January 2006 15:04:05 MST",
"Mon, 2 January 2006 15:04 MST",
"Mon, 2 January 2006, 15:04:05 MST",
"Mon, 2 January 2006 15:04:05 MST",
"Mon, 2 Jan 2006 15:4:5 MST",
"Mon, 2 Jan 2006 15:04 MST",
"Mon, 2 Jan 2006 15:04:05MST",
"Mon, 2 Jan 2006 15:04:05 MST",
"Mon 2 Jan 2006 15:04:05 MST",
"mon,2 Jan 2006 15:04:05 MST",
"Mon, 2 Jan 15:04:05 MST",
"Mon, 2 Jan 06 15:04:05 MST",
"Mon,02 January 2006 14:04:05 MST",
"Mon, 02 Jan 2006 3:04:05 PM MST",
"Mon,02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006, 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05MST",
"Mon, 02 Jan 2006 15:04:05 MST",
"Mon , 02 Jan 2006 15:04:05 MST",
"Mon, 02 Jan 06 15:04:05 MST",
"January 2, 2006 15:04:05 MST",
"January 02, 2006 15:04:05 MST",
"Jan 2, 2006 3:04:05 PM MST",
"Jan 2, 2006 15:04:05 MST",
"2 January 2006 15:04:05 MST",
"2 Jan 2006 15:04:05 MST",
"2006-01-02 15:04:05 MST",
"1/2/2006 3:04:05 PM MST",
"1/2/2006 15:04:05 MST",
"02 Jan 2006 15:04 MST",
"02 Jan 2006 15:04:05 MST",
"02/01/2006 15:04 MST",
"02-01-2006 15:04:05 MST",
"01/02/2006 15:04:05 MST",
}
// ParseDate parses a given date string using a large
// list of commonly found feed date formats.
func ParseDate(ds string) (t time.Time, err error) {
@ -191,6 +195,25 @@ func ParseDate(ds string) (t time.Time, err error) {
return
}
}
for _, f := range dateFormatsWithNamedZone {
t, err = time.Parse(f, d)
if err != nil {
continue
}
// This is a format match! Now try to load the timezone name
loc, err := time.LoadLocation(t.Location().String())
if err != nil {
// We couldn't load the TZ name. Just use UTC instead...
return t, nil
}
if t, err = time.ParseInLocation(f, ds, loc); err == nil {
return t, nil
}
// This should not be reachable
}
err = fmt.Errorf("Failed to parse date: %s", ds)
return
}

View file

@ -8,7 +8,7 @@ import (
"strconv"
"strings"
"github.com/mmcdole/goxpp"
xpp "github.com/mmcdole/goxpp"
)
var (
@ -21,48 +21,8 @@ var (
InvalidNumericReference = errors.New("invalid numeric reference")
)
// FindRoot iterates through the tokens of an xml document until
// it encounters its first StartTag event. It returns an error
// if it reaches EndDocument before finding a tag.
func FindRoot(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
event, err = p.Next()
if err != nil {
return event, err
}
if event == xpp.StartTag {
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find root node before document end.")
}
}
return
}
// NextTag iterates through the tokens until it reaches a StartTag or EndTag
// It is similar to goxpp's NextTag method except it wont throw an error if
// the next immediate token isnt a Start/EndTag. Instead, it will continue to
// consume tokens until it hits a Start/EndTag or EndDocument.
func NextTag(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
event, err = p.Next()
if err != nil {
return event, err
}
if event == xpp.StartTag || event == xpp.EndTag {
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find NextTag before reaching the end of the document.")
}
}
return
}
const CDATA_START = "<![CDATA["
const CDATA_END = "]]>"
// ParseText is a helper function for parsing the text
// from the current element of the XMLPullParser.
@ -82,16 +42,46 @@ func ParseText(p *xpp.XMLPullParser) (string, error) {
result := text.InnerXML
result = strings.TrimSpace(result)
if strings.HasPrefix(result, "<![CDATA[") &&
strings.HasSuffix(result, "]]>") {
result = strings.TrimPrefix(result, "<![CDATA[")
result = strings.TrimSuffix(result, "]]>")
return result, nil
if strings.Contains(result, CDATA_START) {
return StripCDATA(result), nil
}
return DecodeEntities(result)
}
// StripCDATA removes CDATA tags from the string
// content outside of CDATA tags is passed via DecodeEntities
func StripCDATA(str string) string {
buf := bytes.NewBuffer([]byte{})
curr := 0
for curr < len(str) {
start := indexAt(str, CDATA_START, curr)
if start == -1 {
dec, _ := DecodeEntities(str[curr:])
buf.Write([]byte(dec))
return buf.String()
}
end := indexAt(str, CDATA_END, start)
if end == -1 {
dec, _ := DecodeEntities(str[curr:])
buf.Write([]byte(dec))
return buf.String()
}
buf.Write([]byte(str[start+len(CDATA_START) : end]))
curr = curr + end + len(CDATA_END)
}
return buf.String()
}
// DecodeEntities decodes escaped XML entities
// in a string and returns the unescaped string
func DecodeEntities(str string) (string, error) {
@ -106,60 +96,70 @@ func DecodeEntities(str string) (string, error) {
break
}
// Write and skip everything before it
buf.Write(data[:idx])
data = data[idx+1:]
data = data[idx:]
if len(data) == 0 {
return "", TruncatedEntity
// If there is only the '&' left here
if len(data) == 1 {
buf.Write(data)
return buf.String(), nil
}
// Find the end of the entity
end := bytes.IndexByte(data, ';')
if end == -1 {
return "", TruncatedEntity
// it's not an entitiy. just a plain old '&' possibly with extra bytes
buf.Write(data)
return buf.String(), nil
}
if data[0] == '#' {
// Numerical character reference
var str string
base := 10
if len(data) > 1 && data[1] == 'x' {
str = string(data[2:end])
base = 16
} else {
str = string(data[1:end])
}
i, err := strconv.ParseUint(str, base, 32)
if err != nil {
return "", InvalidNumericReference
}
buf.WriteRune(rune(i))
// Check if there is a space somewhere within the 'entitiy'.
// If there is then skip the whole thing since it's not a real entity.
if strings.Contains(string(data[1:end]), " ") {
buf.Write(data)
return buf.String(), nil
} else {
// Predefined entity
name := string(data[:end])
if data[1] == '#' {
// Numerical character reference
var str string
base := 10
var c byte
switch name {
case "lt":
c = '<'
case "gt":
c = '>'
case "quot":
c = '"'
case "apos":
c = '\''
case "amp":
c = '&'
default:
return "", fmt.Errorf("unknown predefined "+
"entity &%s;", name)
if len(data) > 2 && data[2] == 'x' {
str = string(data[3:end])
base = 16
} else {
str = string(data[2:end])
}
i, err := strconv.ParseUint(str, base, 32)
if err != nil {
return "", InvalidNumericReference
}
buf.WriteRune(rune(i))
} else {
// Predefined entity
name := string(data[1:end])
var c byte
switch name {
case "lt":
c = '<'
case "gt":
c = '>'
case "quot":
c = '"'
case "apos":
c = '\''
case "amp":
c = '&'
default:
return "", fmt.Errorf("unknown predefined "+
"entity &%s;", name)
}
buf.WriteByte(c)
}
buf.WriteByte(c)
}
// Skip the entity
@ -194,3 +194,11 @@ func ParseNameAddress(nameAddressText string) (name string, address string) {
}
return
}
func indexAt(str, substr string, start int) int {
idx := strings.Index(str[start:], substr)
if idx > -1 {
idx += start
}
return idx
}

View file

@ -0,0 +1,258 @@
package shared
import (
"bytes"
"fmt"
"golang.org/x/net/html"
"net/url"
"strings"
"github.com/mmcdole/goxpp"
)
var (
// HTML attributes which contain URIs
// https://pythonhosted.org/feedparser/resolving-relative-links.html
// To catch every possible URI attribute is non-trivial:
// https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
htmlURIAttrs = map[string]bool{
"action": true,
"background": true,
"cite": true,
"codebase": true,
"data": true,
"href": true,
"poster": true,
"profile": true,
"scheme": true,
"src": true,
"uri": true,
"usemap": true,
}
)
type urlStack []*url.URL
func (s *urlStack) push(u *url.URL) {
*s = append([]*url.URL{u}, *s...)
}
func (s *urlStack) pop() *url.URL {
if s == nil || len(*s) == 0 {
return nil
}
var top *url.URL
top, *s = (*s)[0], (*s)[1:]
return top
}
func (s *urlStack) top() *url.URL {
if s == nil || len(*s) == 0 {
return nil
}
return (*s)[0]
}
type XMLBase struct {
stack urlStack
URIAttrs map[string]bool
}
// FindRoot iterates through the tokens of an xml document until
// it encounters its first StartTag event. It returns an error
// if it reaches EndDocument before finding a tag.
func (b *XMLBase) FindRoot(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
event, err = b.NextTag(p)
if err != nil {
return event, err
}
if event == xpp.StartTag {
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find root node before document end.")
}
}
return
}
// XMLBase.NextTag iterates through the tokens until it reaches a StartTag or
// EndTag It maintains the urlStack upon encountering StartTag and EndTags, so
// that the top of the stack (accessible through the CurrentBase() and
// CurrentBaseURL() methods) is the absolute base URI by which relative URIs
// should be resolved.
//
// NextTag is similar to goxpp's NextTag method except it wont throw an error
// if the next immediate token isnt a Start/EndTag. Instead, it will continue
// to consume tokens until it hits a Start/EndTag or EndDocument.
func (b *XMLBase) NextTag(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
if p.Event == xpp.EndTag {
// Pop xml:base after each end tag
b.pop()
}
event, err = p.Next()
if err != nil {
return event, err
}
if event == xpp.EndTag {
break
}
if event == xpp.StartTag {
base := parseBase(p)
err = b.push(base)
if err != nil {
return
}
err = b.resolveAttrs(p)
if err != nil {
return
}
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find NextTag before reaching the end of the document.")
}
}
return
}
func parseBase(p *xpp.XMLPullParser) string {
xmlURI := "http://www.w3.org/XML/1998/namespace"
for _, attr := range p.Attrs {
if attr.Name.Local == "base" && attr.Name.Space == xmlURI {
return attr.Value
}
}
return ""
}
func (b *XMLBase) push(base string) error {
newURL, err := url.Parse(base)
if err != nil {
return err
}
topURL := b.CurrentBaseURL()
if topURL != nil {
newURL = topURL.ResolveReference(newURL)
}
b.stack.push(newURL)
return nil
}
// returns the popped base URL
func (b *XMLBase) pop() string {
url := b.stack.pop()
if url != nil {
return url.String()
}
return ""
}
func (b *XMLBase) CurrentBaseURL() *url.URL {
return b.stack.top()
}
func (b *XMLBase) CurrentBase() string {
if url := b.CurrentBaseURL(); url != nil {
return url.String()
}
return ""
}
// resolve the given string as a URL relative to current base
func (b *XMLBase) ResolveURL(u string) (string, error) {
if b.CurrentBase() == "" {
return u, nil
}
relURL, err := url.Parse(u)
if err != nil {
return u, err
}
curr := b.CurrentBaseURL()
if curr.Path != "" && u != "" && curr.Path[len(curr.Path)-1] != '/' {
// There's no reason someone would use a path in xml:base if they
// didn't mean for it to be a directory
curr.Path = curr.Path + "/"
}
absURL := b.CurrentBaseURL().ResolveReference(relURL)
return absURL.String(), nil
}
// resolve relative URI attributes according to xml:base
func (b *XMLBase) resolveAttrs(p *xpp.XMLPullParser) error {
for i, attr := range p.Attrs {
lowerName := strings.ToLower(attr.Name.Local)
if b.URIAttrs[lowerName] {
absURL, err := b.ResolveURL(attr.Value)
if err != nil {
return err
}
p.Attrs[i].Value = absURL
}
}
return nil
}
// Transforms html by resolving any relative URIs in attributes
// if an error occurs during parsing or serialization, then the original string
// is returned along with the error.
func (b *XMLBase) ResolveHTML(relHTML string) (string, error) {
if b.CurrentBase() == "" {
return relHTML, nil
}
htmlReader := strings.NewReader(relHTML)
doc, err := html.Parse(htmlReader)
if err != nil {
return relHTML, err
}
var visit func(*html.Node)
// recursively traverse HTML resolving any relative URIs in attributes
visit = func(n *html.Node) {
if n.Type == html.ElementNode {
for i, a := range n.Attr {
if htmlURIAttrs[a.Key] {
absVal, err := b.ResolveURL(a.Val)
if err == nil {
n.Attr[i].Val = absVal
}
break
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
visit(c)
}
}
visit(doc)
var w bytes.Buffer
err = html.Render(&w, doc)
if err != nil {
return relHTML, err
}
// html.Render() always writes a complete html5 document, so strip the html
// and body tags
absHTML := w.String()
absHTML = strings.TrimPrefix(absHTML, "<html><head></head><body>")
absHTML = strings.TrimSuffix(absHTML, "</body></html>")
return absHTML, err
}

View file

@ -2,6 +2,7 @@ package gofeed
import (
"bytes"
"context"
"errors"
"fmt"
"io"
@ -12,6 +13,10 @@ import (
"github.com/mmcdole/gofeed/rss"
)
// ErrFeedTypeNotDetected is returned when the detection system can not figure
// out the Feed format
var ErrFeedTypeNotDetected = errors.New("Failed to detect feed type")
// HTTPError represents an HTTP error returned by a server.
type HTTPError struct {
StatusCode int
@ -65,14 +70,29 @@ func (f *Parser) Parse(feed io.Reader) (*Feed, error) {
case FeedTypeRSS:
return f.parseRSSFeed(r)
}
return nil, errors.New("Failed to detect feed type")
return nil, ErrFeedTypeNotDetected
}
// ParseURL fetches the contents of a given url and
// attempts to parse the response into the universal feed type.
func (f *Parser) ParseURL(feedURL string) (feed *Feed, err error) {
return f.ParseURLWithContext(feedURL, context.Background())
}
// ParseURLWithContext fetches contents of a given url and
// attempts to parse the response into the universal feed type.
// Request could be canceled or timeout via given context
func (f *Parser) ParseURLWithContext(feedURL string, ctx context.Context) (feed *Feed, err error) {
client := f.httpClient()
resp, err := client.Get(feedURL)
req, err := http.NewRequest("GET", feedURL, nil)
if err != nil {
return nil, err
}
req = req.WithContext(ctx)
req.Header.Set("User-Agent", "Gofeed/1.0")
resp, err := client.Do(req)
if err != nil {
return nil, err

View file

@ -5,19 +5,22 @@ import (
"io"
"strings"
"github.com/mmcdole/gofeed/extensions"
ext "github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/goxpp"
xpp "github.com/mmcdole/goxpp"
)
// Parser is a RSS Parser
type Parser struct{}
type Parser struct {
base *shared.XMLBase
}
// Parse parses an xml feed into an rss.Feed
func (rp *Parser) Parse(feed io.Reader) (*Feed, error) {
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
rp.base = &shared.XMLBase{}
_, err := shared.FindRoot(p)
_, err := rp.base.FindRoot(p)
if err != nil {
return nil, err
}
@ -41,7 +44,7 @@ func (rp *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
ver := rp.parseVersion(p)
for {
tok, err := shared.NextTag(p)
tok, err := rp.base.NextTag(p)
if err != nil {
return nil, err
}
@ -127,7 +130,7 @@ func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) {
categories := []*Category{}
for {
tok, err := shared.NextTag(p)
tok, err := rp.base.NextTag(p)
if err != nil {
return nil, err
}
@ -318,7 +321,7 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) {
categories := []*Category{}
for {
tok, err := shared.NextTag(p)
tok, err := rp.base.NextTag(p)
if err != nil {
return nil, err
}
@ -492,7 +495,7 @@ func (rp *Parser) parseImage(p *xpp.XMLPullParser) (image *Image, err error) {
image = &Image{}
for {
tok, err := shared.NextTag(p)
tok, err := rp.base.NextTag(p)
if err != nil {
return image, err
}
@ -604,7 +607,7 @@ func (rp *Parser) parseTextInput(p *xpp.XMLPullParser) (*TextInput, error) {
ti := &TextInput{}
for {
tok, err := shared.NextTag(p)
tok, err := rp.base.NextTag(p)
if err != nil {
return nil, err
}
@ -661,7 +664,7 @@ func (rp *Parser) parseSkipHours(p *xpp.XMLPullParser) ([]string, error) {
hours := []string{}
for {
tok, err := shared.NextTag(p)
tok, err := rp.base.NextTag(p)
if err != nil {
return nil, err
}
@ -699,7 +702,7 @@ func (rp *Parser) parseSkipDays(p *xpp.XMLPullParser) ([]string, error) {
days := []string{}
for {
tok, err := shared.NextTag(p)
tok, err := rp.base.NextTag(p)
if err != nil {
return nil, err
}
@ -741,7 +744,7 @@ func (rp *Parser) parseCloud(p *xpp.XMLPullParser) (*Cloud, error) {
cloud.RegisterProcedure = p.Attribute("registerProcedure")
cloud.Protocol = p.Attribute("protocol")
shared.NextTag(p)
rp.base.NextTag(p)
if err := p.Expect(xpp.EndTag, "cloud"); err != nil {
return nil, err

View file

@ -6,7 +6,7 @@ import (
"time"
"github.com/mmcdole/gofeed/atom"
"github.com/mmcdole/gofeed/extensions"
ext "github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/gofeed/rss"
)
@ -103,8 +103,8 @@ func (t *DefaultRSSTranslator) translateFeedFeedLink(rss *rss.Feed) (link string
for _, ex := range atomExtensions {
if links, ok := ex["link"]; ok {
for _, l := range links {
if l.Attrs["Rel"] == "self" {
link = l.Value
if l.Attrs["rel"] == "self" {
link = l.Attrs["href"]
}
}
}

15
vendor/modules.txt vendored
View file

@ -1,20 +1,22 @@
# github.com/PuerkitoBio/goquery v1.5.1
## explicit
github.com/PuerkitoBio/goquery
# github.com/andybalholm/cascadia v1.1.0
# github.com/andybalholm/cascadia v1.2.0
## explicit
github.com/andybalholm/cascadia
# github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9
## explicit
github.com/chilts/sid
# github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195
# github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7
## explicit
github.com/jaytaylor/html2text
# github.com/mattn/go-runewidth v0.0.7
# github.com/mattn/go-runewidth v0.0.9
## explicit
github.com/mattn/go-runewidth
# github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2
## explicit
github.com/mattn/go-xmpp
# github.com/mmcdole/gofeed v1.0.0-beta2
# github.com/mmcdole/gofeed v1.0.0
## explicit
github.com/mmcdole/gofeed
github.com/mmcdole/gofeed/atom
@ -22,7 +24,6 @@ github.com/mmcdole/gofeed/extensions
github.com/mmcdole/gofeed/internal/shared
github.com/mmcdole/gofeed/rss
# github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf
## explicit
github.com/mmcdole/goxpp
# github.com/olekukonko/tablewriter v0.0.4
## explicit
@ -30,7 +31,9 @@ github.com/olekukonko/tablewriter
# github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf
## explicit
github.com/ssor/bom
# golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e
# github.com/stretchr/testify v1.5.1
## explicit
# golang.org/x/net v0.0.0-20200513185701-a91f0712d120
## explicit
golang.org/x/net/html
golang.org/x/net/html/atom