Set up go modules and updated dependencies.

This commit is contained in:
Martin Dosch 2020-03-28 13:44:36 +01:00
parent 27e7e0083c
commit 48690e232f
73 changed files with 2394 additions and 6503 deletions

17
go.mod Normal file
View file

@ -0,0 +1,17 @@
module salsa.debian.org/mdosch-guest/feed-to-muc
go 1.14
require (
github.com/PuerkitoBio/goquery v1.5.1 // indirect
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9
github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2
github.com/mmcdole/gofeed v1.0.0-beta2
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf // indirect
github.com/olekukonko/tablewriter v0.0.4 // indirect
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
github.com/stretchr/testify v1.5.1 // indirect
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e // indirect
golang.org/x/text v0.3.2 // indirect
)

42
go.sum Normal file
View file

@ -0,0 +1,42 @@
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9 h1:z0uK8UQqjMVYzvk4tiiu3obv2B44+XBsvgEJREQfnO8=
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9/go.mod h1:Jl2neWsQaDanWORdqZ4emBl50J4/aRBBS4FyyG9/PFo=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195 h1:j0UEFmS7wSjAwKEIkgKBn8PRDfjcuggzr93R9wk53nQ=
github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54=
github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2 h1:F544zRtDc/pMpFNHN46oeXV2jIAG4DoMH+6zlVSn0Q8=
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2/go.mod h1:Cs5mF0OsrRRmhkyOod//ldNPOwJsrBvJ+1WRspv0xoc=
github.com/mmcdole/gofeed v1.0.0-beta2 h1:CjQ0ADhAwNSb08zknAkGOEYqr8zfZKfrzgk9BxpWP2E=
github.com/mmcdole/gofeed v1.0.0-beta2/go.mod h1:/BF9JneEL2/flujm8XHoxUcghdTV6vvb3xx/vKyChFU=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8=
github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

1
vendor/github.com/PuerkitoBio/goquery/.gitattributes generated vendored Normal file
View file

@ -0,0 +1 @@
testdata/* linguist-vendored

16
vendor/github.com/PuerkitoBio/goquery/.gitignore generated vendored Normal file
View file

@ -0,0 +1,16 @@
# editor temporary files
*.sublime-*
.DS_Store
*.swp
#*.*#
tags
# direnv config
.env*
# test binaries
*.test
# coverage and profilte outputs
*.out

17
vendor/github.com/PuerkitoBio/goquery/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,17 @@
language: go
go:
- 1.2.x
- 1.3.x
- 1.4.x
- 1.5.x
- 1.6.x
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x
- 1.11.x
- 1.12.x
- 1.13.x
- tip

View file

@ -37,6 +37,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
**Note that goquery's API is now stable, and will not break.** **Note that goquery's API is now stable, and will not break.**
* **2020-02-04 (v1.5.1)** : Update module dependencies.
* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505). * **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505).
* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples. * **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples.
* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`. * **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`.
@ -138,11 +139,12 @@ func main() {
- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags. - [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags.
- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery. - [andybalholm/cascadia][cascadia], the CSS selector library used by goquery.
- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors. - [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors.
- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework - [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework
- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets. - [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets.
- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping. - [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping.
- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins - [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins
- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers. - [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers.
- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering.
## Support ## Support

View file

@ -1,6 +1,8 @@
module github.com/PuerkitoBio/goquery module github.com/PuerkitoBio/goquery
require ( require (
github.com/andybalholm/cascadia v1.0.0 github.com/andybalholm/cascadia v1.1.0
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a golang.org/x/net v0.0.0-20200202094626-16171245cfb2
) )
go 1.13

View file

@ -1,5 +1,8 @@
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U= golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

14
vendor/github.com/andybalholm/cascadia/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,14 @@
language: go
go:
- 1.3
- 1.4
install:
- go get github.com/andybalholm/cascadia
script:
- go test -v
notifications:
email: false

0
vendor/github.com/andybalholm/cascadia/LICENSE generated vendored Executable file → Normal file
View file

View file

@ -7,8 +7,6 @@ import (
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
"golang.org/x/net/html"
) )
// a parser for CSS selectors // a parser for CSS selectors
@ -56,6 +54,26 @@ func (p *parser) parseEscape() (result string, err error) {
return result, nil return result, nil
} }
// toLowerASCII returns s with all ASCII capital letters lowercased.
func toLowerASCII(s string) string {
var b []byte
for i := 0; i < len(s); i++ {
if c := s[i]; 'A' <= c && c <= 'Z' {
if b == nil {
b = make([]byte, len(s))
copy(b, s)
}
b[i] = s[i] + ('a' - 'A')
}
}
if b == nil {
return s
}
return string(b)
}
func hexDigit(c byte) bool { func hexDigit(c byte) bool {
return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
} }
@ -280,92 +298,92 @@ func (p *parser) consumeClosingParenthesis() bool {
} }
// parseTypeSelector parses a type selector (one that matches by tag name). // parseTypeSelector parses a type selector (one that matches by tag name).
func (p *parser) parseTypeSelector() (result Selector, err error) { func (p *parser) parseTypeSelector() (result tagSelector, err error) {
tag, err := p.parseIdentifier() tag, err := p.parseIdentifier()
if err != nil { if err != nil {
return nil, err return
} }
return tagSelector{tag: toLowerASCII(tag)}, nil
return typeSelector(tag), nil
} }
// parseIDSelector parses a selector that matches by id attribute. // parseIDSelector parses a selector that matches by id attribute.
func (p *parser) parseIDSelector() (Selector, error) { func (p *parser) parseIDSelector() (idSelector, error) {
if p.i >= len(p.s) { if p.i >= len(p.s) {
return nil, fmt.Errorf("expected id selector (#id), found EOF instead") return idSelector{}, fmt.Errorf("expected id selector (#id), found EOF instead")
} }
if p.s[p.i] != '#' { if p.s[p.i] != '#' {
return nil, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i]) return idSelector{}, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i])
} }
p.i++ p.i++
id, err := p.parseName() id, err := p.parseName()
if err != nil { if err != nil {
return nil, err return idSelector{}, err
} }
return attributeEqualsSelector("id", id), nil return idSelector{id: id}, nil
} }
// parseClassSelector parses a selector that matches by class attribute. // parseClassSelector parses a selector that matches by class attribute.
func (p *parser) parseClassSelector() (Selector, error) { func (p *parser) parseClassSelector() (classSelector, error) {
if p.i >= len(p.s) { if p.i >= len(p.s) {
return nil, fmt.Errorf("expected class selector (.class), found EOF instead") return classSelector{}, fmt.Errorf("expected class selector (.class), found EOF instead")
} }
if p.s[p.i] != '.' { if p.s[p.i] != '.' {
return nil, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i]) return classSelector{}, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i])
} }
p.i++ p.i++
class, err := p.parseIdentifier() class, err := p.parseIdentifier()
if err != nil { if err != nil {
return nil, err return classSelector{}, err
} }
return attributeIncludesSelector("class", class), nil return classSelector{class: class}, nil
} }
// parseAttributeSelector parses a selector that matches by attribute value. // parseAttributeSelector parses a selector that matches by attribute value.
func (p *parser) parseAttributeSelector() (Selector, error) { func (p *parser) parseAttributeSelector() (attrSelector, error) {
if p.i >= len(p.s) { if p.i >= len(p.s) {
return nil, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead") return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead")
} }
if p.s[p.i] != '[' { if p.s[p.i] != '[' {
return nil, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i]) return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i])
} }
p.i++ p.i++
p.skipWhitespace() p.skipWhitespace()
key, err := p.parseIdentifier() key, err := p.parseIdentifier()
if err != nil { if err != nil {
return nil, err return attrSelector{}, err
} }
key = toLowerASCII(key)
p.skipWhitespace() p.skipWhitespace()
if p.i >= len(p.s) { if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector") return attrSelector{}, errors.New("unexpected EOF in attribute selector")
} }
if p.s[p.i] == ']' { if p.s[p.i] == ']' {
p.i++ p.i++
return attributeExistsSelector(key), nil return attrSelector{key: key, operation: ""}, nil
} }
if p.i+2 >= len(p.s) { if p.i+2 >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector") return attrSelector{}, errors.New("unexpected EOF in attribute selector")
} }
op := p.s[p.i : p.i+2] op := p.s[p.i : p.i+2]
if op[0] == '=' { if op[0] == '=' {
op = "=" op = "="
} else if op[1] != '=' { } else if op[1] != '=' {
return nil, fmt.Errorf(`expected equality operator, found "%s" instead`, op) return attrSelector{}, fmt.Errorf(`expected equality operator, found "%s" instead`, op)
} }
p.i += len(op) p.i += len(op)
p.skipWhitespace() p.skipWhitespace()
if p.i >= len(p.s) { if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector") return attrSelector{}, errors.New("unexpected EOF in attribute selector")
} }
var val string var val string
var rx *regexp.Regexp var rx *regexp.Regexp
@ -380,46 +398,32 @@ func (p *parser) parseAttributeSelector() (Selector, error) {
} }
} }
if err != nil { if err != nil {
return nil, err return attrSelector{}, err
} }
p.skipWhitespace() p.skipWhitespace()
if p.i >= len(p.s) { if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in attribute selector") return attrSelector{}, errors.New("unexpected EOF in attribute selector")
} }
if p.s[p.i] != ']' { if p.s[p.i] != ']' {
return nil, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i]) return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
} }
p.i++ p.i++
switch op { switch op {
case "=": case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=":
return attributeEqualsSelector(key, val), nil return attrSelector{key: key, val: val, operation: op, regexp: rx}, nil
case "!=": default:
return attributeNotEqualSelector(key, val), nil return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op)
case "~=":
return attributeIncludesSelector(key, val), nil
case "|=":
return attributeDashmatchSelector(key, val), nil
case "^=":
return attributePrefixSelector(key, val), nil
case "$=":
return attributeSuffixSelector(key, val), nil
case "*=":
return attributeSubstringSelector(key, val), nil
case "#=":
return attributeRegexSelector(key, rx), nil
} }
return nil, fmt.Errorf("attribute operator %q is not supported", op)
} }
var errExpectedParenthesis = errors.New("expected '(' but didn't find it") var errExpectedParenthesis = errors.New("expected '(' but didn't find it")
var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it") var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
var errUnmatchedParenthesis = errors.New("unmatched '('") var errUnmatchedParenthesis = errors.New("unmatched '('")
// parsePseudoclassSelector parses a pseudoclass selector like :not(p). // parsePseudoclassSelector parses a pseudoclass selector like :not(p)
func (p *parser) parsePseudoclassSelector() (Selector, error) { func (p *parser) parsePseudoclassSelector() (out Sel, err error) {
if p.i >= len(p.s) { if p.i >= len(p.s) {
return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
} }
@ -428,40 +432,36 @@ func (p *parser) parsePseudoclassSelector() (Selector, error) {
} }
p.i++ p.i++
if p.s[p.i] == ':' { // we found a pseudo-element
p.i++
}
name, err := p.parseIdentifier() name, err := p.parseIdentifier()
if err != nil { if err != nil {
return nil, err return
} }
name = toLowerASCII(name) name = toLowerASCII(name)
switch name { switch name {
case "not", "has", "haschild": case "not", "has", "haschild":
if !p.consumeParenthesis() { if !p.consumeParenthesis() {
return nil, errExpectedParenthesis return out, errExpectedParenthesis
} }
sel, parseErr := p.parseSelectorGroup() sel, parseErr := p.parseSelectorGroup()
if parseErr != nil { if parseErr != nil {
return nil, parseErr return out, parseErr
} }
if !p.consumeClosingParenthesis() { if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis return out, errExpectedClosingParenthesis
} }
switch name { out = relativePseudoClassSelector{name: name, match: sel}
case "not":
return negatedSelector(sel), nil
case "has":
return hasDescendantSelector(sel), nil
case "haschild":
return hasChildSelector(sel), nil
}
case "contains", "containsown": case "contains", "containsown":
if !p.consumeParenthesis() { if !p.consumeParenthesis() {
return nil, errExpectedParenthesis return out, errExpectedParenthesis
} }
if p.i == len(p.s) { if p.i == len(p.s) {
return nil, errUnmatchedParenthesis return out, errUnmatchedParenthesis
} }
var val string var val string
switch p.s[p.i] { switch p.s[p.i] {
@ -471,95 +471,75 @@ func (p *parser) parsePseudoclassSelector() (Selector, error) {
val, err = p.parseIdentifier() val, err = p.parseIdentifier()
} }
if err != nil { if err != nil {
return nil, err return out, err
} }
val = strings.ToLower(val) val = strings.ToLower(val)
p.skipWhitespace() p.skipWhitespace()
if p.i >= len(p.s) { if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in pseudo selector") return out, errors.New("unexpected EOF in pseudo selector")
} }
if !p.consumeClosingParenthesis() { if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis return out, errExpectedClosingParenthesis
} }
switch name { out = containsPseudoClassSelector{own: name == "containsown", value: val}
case "contains":
return textSubstrSelector(val), nil
case "containsown":
return ownTextSubstrSelector(val), nil
}
case "matches", "matchesown": case "matches", "matchesown":
if !p.consumeParenthesis() { if !p.consumeParenthesis() {
return nil, errExpectedParenthesis return out, errExpectedParenthesis
} }
rx, err := p.parseRegex() rx, err := p.parseRegex()
if err != nil { if err != nil {
return nil, err return out, err
} }
if p.i >= len(p.s) { if p.i >= len(p.s) {
return nil, errors.New("unexpected EOF in pseudo selector") return out, errors.New("unexpected EOF in pseudo selector")
} }
if !p.consumeClosingParenthesis() { if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis return out, errExpectedClosingParenthesis
} }
switch name { out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx}
case "matches":
return textRegexSelector(rx), nil
case "matchesown":
return ownTextRegexSelector(rx), nil
}
case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type": case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type":
if !p.consumeParenthesis() { if !p.consumeParenthesis() {
return nil, errExpectedParenthesis return out, errExpectedParenthesis
} }
a, b, err := p.parseNth() a, b, err := p.parseNth()
if err != nil { if err != nil {
return nil, err return out, err
} }
if !p.consumeClosingParenthesis() { if !p.consumeClosingParenthesis() {
return nil, errExpectedClosingParenthesis return out, errExpectedClosingParenthesis
} }
if a == 0 { last := name == "nth-last-child" || name == "nth-last-of-type"
switch name { ofType := name == "nth-of-type" || name == "nth-last-of-type"
case "nth-child": out = nthPseudoClassSelector{a: a, b: b, last: last, ofType: ofType}
return simpleNthChildSelector(b, false), nil
case "nth-of-type":
return simpleNthChildSelector(b, true), nil
case "nth-last-child":
return simpleNthLastChildSelector(b, false), nil
case "nth-last-of-type":
return simpleNthLastChildSelector(b, true), nil
}
}
return nthChildSelector(a, b,
name == "nth-last-child" || name == "nth-last-of-type",
name == "nth-of-type" || name == "nth-last-of-type"),
nil
case "first-child": case "first-child":
return simpleNthChildSelector(1, false), nil out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: false}
case "last-child": case "last-child":
return simpleNthLastChildSelector(1, false), nil out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: true}
case "first-of-type": case "first-of-type":
return simpleNthChildSelector(1, true), nil out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: false}
case "last-of-type": case "last-of-type":
return simpleNthLastChildSelector(1, true), nil out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: true}
case "only-child": case "only-child":
return onlyChildSelector(false), nil out = onlyChildPseudoClassSelector{ofType: false}
case "only-of-type": case "only-of-type":
return onlyChildSelector(true), nil out = onlyChildPseudoClassSelector{ofType: true}
case "input": case "input":
return inputSelector, nil out = inputPseudoClassSelector{}
case "empty": case "empty":
return emptyElementSelector, nil out = emptyElementPseudoClassSelector{}
case "root": case "root":
return rootSelector, nil out = rootPseudoClassSelector{}
case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error":
return out, errors.New("pseudo-elements are not yet supported")
default:
return out, fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name)
} }
return
return nil, fmt.Errorf("unknown pseudoclass :%s", name)
} }
// parseInteger parses a decimal integer. // parseInteger parses a decimal integer.
@ -705,8 +685,8 @@ invalid:
// parseSimpleSelectorSequence parses a selector sequence that applies to // parseSimpleSelectorSequence parses a selector sequence that applies to
// a single element. // a single element.
func (p *parser) parseSimpleSelectorSequence() (Selector, error) { func (p *parser) parseSimpleSelectorSequence() (Sel, error) {
var result Selector var selectors []Sel
if p.i >= len(p.s) { if p.i >= len(p.s) {
return nil, errors.New("expected selector, found EOF instead") return nil, errors.New("expected selector, found EOF instead")
@ -723,13 +703,15 @@ func (p *parser) parseSimpleSelectorSequence() (Selector, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
result = r selectors = append(selectors, r)
} }
loop: loop:
for p.i < len(p.s) { for p.i < len(p.s) {
var ns Selector var (
var err error ns Sel
err error
)
switch p.s[p.i] { switch p.s[p.i] {
case '#': case '#':
ns, err = p.parseIDSelector() ns, err = p.parseIDSelector()
@ -745,37 +727,33 @@ loop:
if err != nil { if err != nil {
return nil, err return nil, err
} }
if result == nil {
result = ns
} else {
result = intersectionSelector(result, ns)
}
}
if result == nil { selectors = append(selectors, ns)
result = func(n *html.Node) bool {
return n.Type == html.ElementNode
} }
if len(selectors) == 1 { // no need wrap the selectors in compoundSelector
return selectors[0], nil
} }
return compoundSelector{selectors: selectors}, nil
return result, nil
} }
// parseSelector parses a selector that may include combinators. // parseSelector parses a selector that may include combinators.
func (p *parser) parseSelector() (result Selector, err error) { func (p *parser) parseSelector() (Sel, error) {
p.skipWhitespace() p.skipWhitespace()
result, err = p.parseSimpleSelectorSequence() result, err := p.parseSimpleSelectorSequence()
if err != nil { if err != nil {
return return nil, err
} }
for { for {
var combinator byte var (
combinator byte
c Sel
)
if p.skipWhitespace() { if p.skipWhitespace() {
combinator = ' ' combinator = ' '
} }
if p.i >= len(p.s) { if p.i >= len(p.s) {
return return result, nil
} }
switch p.s[p.i] { switch p.s[p.i] {
@ -785,51 +763,39 @@ func (p *parser) parseSelector() (result Selector, err error) {
p.skipWhitespace() p.skipWhitespace()
case ',', ')': case ',', ')':
// These characters can't begin a selector, but they can legally occur after one. // These characters can't begin a selector, but they can legally occur after one.
return return result, nil
} }
if combinator == 0 { if combinator == 0 {
return return result, nil
} }
c, err := p.parseSimpleSelectorSequence() c, err = p.parseSimpleSelectorSequence()
if err != nil { if err != nil {
return nil, err return nil, err
} }
result = combinedSelector{first: result, combinator: combinator, second: c}
switch combinator {
case ' ':
result = descendantSelector(result, c)
case '>':
result = childSelector(result, c)
case '+':
result = siblingSelector(result, c, true)
case '~':
result = siblingSelector(result, c, false)
} }
}
panic("unreachable")
} }
// parseSelectorGroup parses a group of selectors, separated by commas. // parseSelectorGroup parses a group of selectors, separated by commas.
func (p *parser) parseSelectorGroup() (result Selector, err error) { func (p *parser) parseSelectorGroup() (SelectorGroup, error) {
result, err = p.parseSelector() current, err := p.parseSelector()
if err != nil { if err != nil {
return return nil, err
} }
result := SelectorGroup{current}
for p.i < len(p.s) { for p.i < len(p.s) {
if p.s[p.i] != ',' { if p.s[p.i] != ',' {
return result, nil break
} }
p.i++ p.i++
c, err := p.parseSelector() c, err := p.parseSelector()
if err != nil { if err != nil {
return nil, err return nil, err
} }
result = unionSelector(result, c) result = append(result, c)
} }
return result, nil
return
} }

View file

@ -9,36 +9,37 @@ import (
"golang.org/x/net/html" "golang.org/x/net/html"
) )
// the Selector type, and functions for creating them // Matcher is the interface for basic selector functionality.
// Match returns whether a selector matches n.
// A Selector is a function which tells whether a node matches or not. type Matcher interface {
type Selector func(*html.Node) bool Match(n *html.Node) bool
// hasChildMatch returns whether n has any child that matches a.
func hasChildMatch(n *html.Node, a Selector) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a(c) {
return true
}
}
return false
} }
// hasDescendantMatch performs a depth-first search of n's descendants, // Sel is the interface for all the functionality provided by selectors.
// testing whether any of them match a. It returns true as soon as a match is // It is currently the same as Matcher, but other methods may be added in the
// found, or false if no match is found. // future.
func hasDescendantMatch(n *html.Node, a Selector) bool { type Sel interface {
for c := n.FirstChild; c != nil; c = c.NextSibling { Matcher
if a(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) { Specificity() Specificity
return true
}
}
return false
} }
// Compile parses a selector and returns, if successful, a Selector object // Parse parses a selector.
// that can be used to match against html.Node objects. func Parse(sel string) (Sel, error) {
func Compile(sel string) (Selector, error) { p := &parser{s: sel}
compiled, err := p.parseSelector()
if err != nil {
return nil, err
}
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
}
// ParseGroup parses a selector, or a group of selectors separated by commas.
func ParseGroup(sel string) (SelectorGroup, error) {
p := &parser{s: sel} p := &parser{s: sel}
compiled, err := p.parseSelectorGroup() compiled, err := p.parseSelectorGroup()
if err != nil { if err != nil {
@ -52,6 +53,23 @@ func Compile(sel string) (Selector, error) {
return compiled, nil return compiled, nil
} }
// A Selector is a function which tells whether a node matches or not.
//
// This type is maintained for compatibility; I recommend using the newer and
// more idiomatic interfaces Sel and Matcher.
type Selector func(*html.Node) bool
// Compile parses a selector and returns, if successful, a Selector object
// that can be used to match against html.Node objects.
func Compile(sel string) (Selector, error) {
compiled, err := ParseGroup(sel)
if err != nil {
return nil, err
}
return Selector(compiled.Match), nil
}
// MustCompile is like Compile, but panics instead of returning an error. // MustCompile is like Compile, but panics instead of returning an error.
func MustCompile(sel string) Selector { func MustCompile(sel string) Selector {
compiled, err := Compile(sel) compiled, err := Compile(sel)
@ -79,6 +97,23 @@ func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node
return storage return storage
} }
func queryInto(n *html.Node, m Matcher, storage []*html.Node) []*html.Node {
for child := n.FirstChild; child != nil; child = child.NextSibling {
if m.Match(child) {
storage = append(storage, child)
}
storage = queryInto(child, m, storage)
}
return storage
}
// QueryAll returns a slice of all the nodes that match m, from the descendants
// of n.
func QueryAll(n *html.Node, m Matcher) []*html.Node {
return queryInto(n, m, nil)
}
// Match returns true if the node matches the selector. // Match returns true if the node matches the selector.
func (s Selector) Match(n *html.Node) bool { func (s Selector) Match(n *html.Node) bool {
return s(n) return s(n)
@ -99,6 +134,21 @@ func (s Selector) MatchFirst(n *html.Node) *html.Node {
return nil return nil
} }
// Query returns the first node that matches m, from the descendants of n.
// If none matches, it returns nil.
func Query(n *html.Node, m Matcher) *html.Node {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if m.Match(c) {
return c
}
if matched := Query(c, m); matched != nil {
return matched
}
}
return nil
}
// Filter returns the nodes in nodes that match the selector. // Filter returns the nodes in nodes that match the selector.
func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) { func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
for _, n := range nodes { for _, n := range nodes {
@ -109,39 +159,93 @@ func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
return result return result
} }
// typeSelector returns a Selector that matches elements with a given tag name. // Filter returns the nodes that match m.
func typeSelector(tag string) Selector { func Filter(nodes []*html.Node, m Matcher) (result []*html.Node) {
tag = toLowerASCII(tag) for _, n := range nodes {
return func(n *html.Node) bool { if m.Match(n) {
return n.Type == html.ElementNode && n.Data == tag result = append(result, n)
}
}
return result
}
type tagSelector struct {
tag string
}
// Matches elements with a given tag name.
func (t tagSelector) Match(n *html.Node) bool {
return n.Type == html.ElementNode && n.Data == t.tag
}
func (c tagSelector) Specificity() Specificity {
return Specificity{0, 0, 1}
}
type classSelector struct {
class string
}
// Matches elements by class attribute.
func (t classSelector) Match(n *html.Node) bool {
return matchAttribute(n, "class", func(s string) bool {
return matchInclude(t.class, s)
})
}
func (c classSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
type idSelector struct {
id string
}
// Matches elements by id attribute.
func (t idSelector) Match(n *html.Node) bool {
return matchAttribute(n, "id", func(s string) bool {
return s == t.id
})
}
func (c idSelector) Specificity() Specificity {
return Specificity{1, 0, 0}
}
type attrSelector struct {
key, val, operation string
regexp *regexp.Regexp
}
// Matches elements by attribute value.
func (t attrSelector) Match(n *html.Node) bool {
switch t.operation {
case "":
return matchAttribute(n, t.key, func(string) bool { return true })
case "=":
return matchAttribute(n, t.key, func(s string) bool { return s == t.val })
case "!=":
return attributeNotEqualMatch(t.key, t.val, n)
case "~=":
// matches elements where the attribute named key is a whitespace-separated list that includes val.
return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s) })
case "|=":
return attributeDashMatch(t.key, t.val, n)
case "^=":
return attributePrefixMatch(t.key, t.val, n)
case "$=":
return attributeSuffixMatch(t.key, t.val, n)
case "*=":
return attributeSubstringMatch(t.key, t.val, n)
case "#=":
return attributeRegexMatch(t.key, t.regexp, n)
default:
panic(fmt.Sprintf("unsuported operation : %s", t.operation))
} }
} }
// toLowerASCII returns s with all ASCII capital letters lowercased. // matches elements where the attribute named key satisifes the function f.
func toLowerASCII(s string) string { func matchAttribute(n *html.Node, key string, f func(string) bool) bool {
var b []byte
for i := 0; i < len(s); i++ {
if c := s[i]; 'A' <= c && c <= 'Z' {
if b == nil {
b = make([]byte, len(s))
copy(b, s)
}
b[i] = s[i] + ('a' - 'A')
}
}
if b == nil {
return s
}
return string(b)
}
// attributeSelector returns a Selector that matches elements
// where the attribute named key satisifes the function f.
func attributeSelector(key string, f func(string) bool) Selector {
key = toLowerASCII(key)
return func(n *html.Node) bool {
if n.Type != html.ElementNode { if n.Type != html.ElementNode {
return false return false
} }
@ -151,29 +255,11 @@ func attributeSelector(key string, f func(string) bool) Selector {
} }
} }
return false return false
}
} }
// attributeExistsSelector returns a Selector that matches elements that have // attributeNotEqualMatch matches elements where
// an attribute named key.
func attributeExistsSelector(key string) Selector {
return attributeSelector(key, func(string) bool { return true })
}
// attributeEqualsSelector returns a Selector that matches elements where
// the attribute named key has the value val.
func attributeEqualsSelector(key, val string) Selector {
return attributeSelector(key,
func(s string) bool {
return s == val
})
}
// attributeNotEqualSelector returns a Selector that matches elements where
// the attribute named key does not have the value val. // the attribute named key does not have the value val.
func attributeNotEqualSelector(key, val string) Selector { func attributeNotEqualMatch(key, val string, n *html.Node) bool {
key = toLowerASCII(key)
return func(n *html.Node) bool {
if n.Type != html.ElementNode { if n.Type != html.ElementNode {
return false return false
} }
@ -183,14 +269,10 @@ func attributeNotEqualSelector(key, val string) Selector {
} }
} }
return true return true
}
} }
// attributeIncludesSelector returns a Selector that matches elements where // returns true if s is a whitespace-separated list that includes val.
// the attribute named key is a whitespace-separated list that includes val. func matchInclude(val, s string) bool {
func attributeIncludesSelector(key, val string) Selector {
return attributeSelector(key,
func(s string) bool {
for s != "" { for s != "" {
i := strings.IndexAny(s, " \t\r\n\f") i := strings.IndexAny(s, " \t\r\n\f")
if i == -1 { if i == -1 {
@ -202,13 +284,11 @@ func attributeIncludesSelector(key, val string) Selector {
s = s[i+1:] s = s[i+1:]
} }
return false return false
})
} }
// attributeDashmatchSelector returns a Selector that matches elements where // matches elements where the attribute named key equals val or starts with val plus a hyphen.
// the attribute named key equals val or starts with val plus a hyphen. func attributeDashMatch(key, val string, n *html.Node) bool {
func attributeDashmatchSelector(key, val string) Selector { return matchAttribute(n, key,
return attributeSelector(key,
func(s string) bool { func(s string) bool {
if s == val { if s == val {
return true return true
@ -223,10 +303,10 @@ func attributeDashmatchSelector(key, val string) Selector {
}) })
} }
// attributePrefixSelector returns a Selector that matches elements where // attributePrefixMatch returns a Selector that matches elements where
// the attribute named key starts with val. // the attribute named key starts with val.
func attributePrefixSelector(key, val string) Selector { func attributePrefixMatch(key, val string, n *html.Node) bool {
return attributeSelector(key, return matchAttribute(n, key,
func(s string) bool { func(s string) bool {
if strings.TrimSpace(s) == "" { if strings.TrimSpace(s) == "" {
return false return false
@ -235,10 +315,10 @@ func attributePrefixSelector(key, val string) Selector {
}) })
} }
// attributeSuffixSelector returns a Selector that matches elements where // attributeSuffixMatch matches elements where
// the attribute named key ends with val. // the attribute named key ends with val.
func attributeSuffixSelector(key, val string) Selector { func attributeSuffixMatch(key, val string, n *html.Node) bool {
return attributeSelector(key, return matchAttribute(n, key,
func(s string) bool { func(s string) bool {
if strings.TrimSpace(s) == "" { if strings.TrimSpace(s) == "" {
return false return false
@ -247,10 +327,10 @@ func attributeSuffixSelector(key, val string) Selector {
}) })
} }
// attributeSubstringSelector returns a Selector that matches nodes where // attributeSubstringMatch matches nodes where
// the attribute named key contains val. // the attribute named key contains val.
func attributeSubstringSelector(key, val string) Selector { func attributeSubstringMatch(key, val string, n *html.Node) bool {
return attributeSelector(key, return matchAttribute(n, key,
func(s string) bool { func(s string) bool {
if strings.TrimSpace(s) == "" { if strings.TrimSpace(s) == "" {
return false return false
@ -259,41 +339,120 @@ func attributeSubstringSelector(key, val string) Selector {
}) })
} }
// attributeRegexSelector returns a Selector that matches nodes where // attributeRegexMatch matches nodes where
// the attribute named key matches the regular expression rx // the attribute named key matches the regular expression rx
func attributeRegexSelector(key string, rx *regexp.Regexp) Selector { func attributeRegexMatch(key string, rx *regexp.Regexp, n *html.Node) bool {
return attributeSelector(key, return matchAttribute(n, key,
func(s string) bool { func(s string) bool {
return rx.MatchString(s) return rx.MatchString(s)
}) })
} }
// intersectionSelector returns a selector that matches nodes that match func (c attrSelector) Specificity() Specificity {
// both a and b. return Specificity{0, 1, 0}
func intersectionSelector(a, b Selector) Selector {
return func(n *html.Node) bool {
return a(n) && b(n)
}
} }
// unionSelector returns a selector that matches elements that match // ---------------- Pseudo class selectors ----------------
// either a or b. // we use severals concrete types of pseudo-class selectors
func unionSelector(a, b Selector) Selector {
return func(n *html.Node) bool { type relativePseudoClassSelector struct {
return a(n) || b(n) name string // one of "not", "has", "haschild"
} match SelectorGroup
} }
// negatedSelector returns a selector that matches elements that do not match a. func (s relativePseudoClassSelector) Match(n *html.Node) bool {
func negatedSelector(a Selector) Selector {
return func(n *html.Node) bool {
if n.Type != html.ElementNode { if n.Type != html.ElementNode {
return false return false
} }
return !a(n) switch s.name {
case "not":
// matches elements that do not match a.
return !s.match.Match(n)
case "has":
// matches elements with any descendant that matches a.
return hasDescendantMatch(n, s.match)
case "haschild":
// matches elements with a child that matches a.
return hasChildMatch(n, s.match)
default:
panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name))
} }
} }
// hasChildMatch returns whether n has any child that matches a.
func hasChildMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) {
return true
}
}
return false
}
// hasDescendantMatch performs a depth-first search of n's descendants,
// testing whether any of them match a. It returns true as soon as a match is
// found, or false if no match is found.
func hasDescendantMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
return true
}
}
return false
}
// Specificity returns the specificity of the most specific selectors
// in the pseudo-class arguments.
// See https://www.w3.org/TR/selectors/#specificity-rules
func (s relativePseudoClassSelector) Specificity() Specificity {
var max Specificity
for _, sel := range s.match {
newSpe := sel.Specificity()
if max.Less(newSpe) {
max = newSpe
}
}
return max
}
type containsPseudoClassSelector struct {
own bool
value string
}
func (s containsPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes that directly contain the given text
text = strings.ToLower(nodeOwnText(n))
} else {
// matches nodes that contain the given text.
text = strings.ToLower(nodeText(n))
}
return strings.Contains(text, s.value)
}
func (s containsPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
type regexpPseudoClassSelector struct {
own bool
regexp *regexp.Regexp
}
func (s regexpPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes whose text directly matches the specified regular expression
text = nodeOwnText(n)
} else {
// matches nodes whose text matches the specified regular expression
text = nodeText(n)
}
return s.regexp.MatchString(text)
}
// writeNodeText writes the text contained in n and its descendants to b. // writeNodeText writes the text contained in n and its descendants to b.
func writeNodeText(n *html.Node, b *bytes.Buffer) { func writeNodeText(n *html.Node, b *bytes.Buffer) {
switch n.Type { switch n.Type {
@ -325,67 +484,30 @@ func nodeOwnText(n *html.Node) string {
return b.String() return b.String()
} }
// textSubstrSelector returns a selector that matches nodes that func (s regexpPseudoClassSelector) Specificity() Specificity {
// contain the given text. return Specificity{0, 1, 0}
func textSubstrSelector(val string) Selector {
return func(n *html.Node) bool {
text := strings.ToLower(nodeText(n))
return strings.Contains(text, val)
}
} }
// ownTextSubstrSelector returns a selector that matches nodes that type nthPseudoClassSelector struct {
// directly contain the given text a, b int
func ownTextSubstrSelector(val string) Selector { last, ofType bool
return func(n *html.Node) bool {
text := strings.ToLower(nodeOwnText(n))
return strings.Contains(text, val)
}
} }
// textRegexSelector returns a selector that matches nodes whose text matches func (s nthPseudoClassSelector) Match(n *html.Node) bool {
// the specified regular expression if s.a == 0 {
func textRegexSelector(rx *regexp.Regexp) Selector { if s.last {
return func(n *html.Node) bool { return simpleNthLastChildMatch(s.b, s.ofType, n)
return rx.MatchString(nodeText(n)) } else {
return simpleNthChildMatch(s.b, s.ofType, n)
} }
}
return nthChildMatch(s.a, s.b, s.last, s.ofType, n)
} }
// ownTextRegexSelector returns a selector that matches nodes whose text // nthChildMatch implements :nth-child(an+b).
// directly matches the specified regular expression
func ownTextRegexSelector(rx *regexp.Regexp) Selector {
return func(n *html.Node) bool {
return rx.MatchString(nodeOwnText(n))
}
}
// hasChildSelector returns a selector that matches elements
// with a child that matches a.
func hasChildSelector(a Selector) Selector {
return func(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
return hasChildMatch(n, a)
}
}
// hasDescendantSelector returns a selector that matches elements
// with any descendant that matches a.
func hasDescendantSelector(a Selector) Selector {
return func(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
return hasDescendantMatch(n, a)
}
}
// nthChildSelector returns a selector that implements :nth-child(an+b).
// If last is true, implements :nth-last-child instead. // If last is true, implements :nth-last-child instead.
// If ofType is true, implements :nth-of-type instead. // If ofType is true, implements :nth-of-type instead.
func nthChildSelector(a, b int, last, ofType bool) Selector { func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool {
return func(n *html.Node) bool {
if n.Type != html.ElementNode { if n.Type != html.ElementNode {
return false return false
} }
@ -429,13 +551,11 @@ func nthChildSelector(a, b int, last, ofType bool) Selector {
} }
return i%a == 0 && i/a >= 0 return i%a == 0 && i/a >= 0
}
} }
// simpleNthChildSelector returns a selector that implements :nth-child(b). // simpleNthChildMatch implements :nth-child(b).
// If ofType is true, implements :nth-of-type instead. // If ofType is true, implements :nth-of-type instead.
func simpleNthChildSelector(b int, ofType bool) Selector { func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool {
return func(n *html.Node) bool {
if n.Type != html.ElementNode { if n.Type != html.ElementNode {
return false return false
} }
@ -463,14 +583,11 @@ func simpleNthChildSelector(b int, ofType bool) Selector {
} }
} }
return false return false
}
} }
// simpleNthLastChildSelector returns a selector that implements // simpleNthLastChildMatch implements :nth-last-child(b).
// :nth-last-child(b). If ofType is true, implements :nth-last-of-type // If ofType is true, implements :nth-last-of-type instead.
// instead. func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool {
func simpleNthLastChildSelector(b int, ofType bool) Selector {
return func(n *html.Node) bool {
if n.Type != html.ElementNode { if n.Type != html.ElementNode {
return false return false
} }
@ -498,13 +615,21 @@ func simpleNthLastChildSelector(b int, ofType bool) Selector {
} }
} }
return false return false
}
} }
// onlyChildSelector returns a selector that implements :only-child. // Specificity for nth-child pseudo-class.
// If ofType is true, it implements :only-of-type instead. // Does not support a list of selectors
func onlyChildSelector(ofType bool) Selector { func (s nthPseudoClassSelector) Specificity() Specificity {
return func(n *html.Node) bool { return Specificity{0, 1, 0}
}
type onlyChildPseudoClassSelector struct {
ofType bool
}
// Match implements :only-child.
// If `ofType` is true, it implements :only-of-type instead.
func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode { if n.Type != html.ElementNode {
return false return false
} }
@ -520,7 +645,7 @@ func onlyChildSelector(ofType bool) Selector {
count := 0 count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling { for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) {
continue continue
} }
count++ count++
@ -530,16 +655,27 @@ func onlyChildSelector(ofType bool) Selector {
} }
return count == 1 return count == 1
}
} }
// inputSelector is a Selector that matches input, select, textarea and button elements. func (s onlyChildPseudoClassSelector) Specificity() Specificity {
func inputSelector(n *html.Node) bool { return Specificity{0, 1, 0}
}
type inputPseudoClassSelector struct{}
// Matches input, select, textarea and button elements.
func (s inputPseudoClassSelector) Match(n *html.Node) bool {
return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button") return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button")
} }
// emptyElementSelector is a Selector that matches empty elements. func (s inputPseudoClassSelector) Specificity() Specificity {
func emptyElementSelector(n *html.Node) bool { return Specificity{0, 1, 0}
}
type emptyElementPseudoClassSelector struct{}
// Matches empty elements.
func (s emptyElementPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode { if n.Type != html.ElementNode {
return false return false
} }
@ -554,64 +690,14 @@ func emptyElementSelector(n *html.Node) bool {
return true return true
} }
// descendantSelector returns a Selector that matches an element if func (s emptyElementPseudoClassSelector) Specificity() Specificity {
// it matches d and has an ancestor that matches a. return Specificity{0, 1, 0}
func descendantSelector(a, d Selector) Selector {
return func(n *html.Node) bool {
if !d(n) {
return false
}
for p := n.Parent; p != nil; p = p.Parent {
if a(p) {
return true
}
}
return false
}
} }
// childSelector returns a Selector that matches an element if type rootPseudoClassSelector struct{}
// it matches d and its parent matches a.
func childSelector(a, d Selector) Selector {
return func(n *html.Node) bool {
return d(n) && n.Parent != nil && a(n.Parent)
}
}
// siblingSelector returns a Selector that matches an element // Match implements :root
// if it matches s2 and in is preceded by an element that matches s1. func (s rootPseudoClassSelector) Match(n *html.Node) bool {
// If adjacent is true, the sibling must be immediately before the element.
func siblingSelector(s1, s2 Selector, adjacent bool) Selector {
return func(n *html.Node) bool {
if !s2(n) {
return false
}
if adjacent {
for n = n.PrevSibling; n != nil; n = n.PrevSibling {
if n.Type == html.TextNode || n.Type == html.CommentNode {
continue
}
return s1(n)
}
return false
}
// Walk backwards looking for element that matches s1
for c := n.PrevSibling; c != nil; c = c.PrevSibling {
if s1(c) {
return true
}
}
return false
}
}
// rootSelector implements :root
func rootSelector(n *html.Node) bool {
if n.Type != html.ElementNode { if n.Type != html.ElementNode {
return false return false
} }
@ -620,3 +706,128 @@ func rootSelector(n *html.Node) bool {
} }
return n.Parent.Type == html.DocumentNode return n.Parent.Type == html.DocumentNode
} }
func (s rootPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
type compoundSelector struct {
selectors []Sel
}
// Matches elements if each sub-selectors matches.
func (t compoundSelector) Match(n *html.Node) bool {
if len(t.selectors) == 0 {
return n.Type == html.ElementNode
}
for _, sel := range t.selectors {
if !sel.Match(n) {
return false
}
}
return true
}
func (s compoundSelector) Specificity() Specificity {
var out Specificity
for _, sel := range s.selectors {
out = out.Add(sel.Specificity())
}
return out
}
type combinedSelector struct {
first Sel
combinator byte
second Sel
}
func (t combinedSelector) Match(n *html.Node) bool {
if t.first == nil {
return false // maybe we should panic
}
switch t.combinator {
case 0:
return t.first.Match(n)
case ' ':
return descendantMatch(t.first, t.second, n)
case '>':
return childMatch(t.first, t.second, n)
case '+':
return siblingMatch(t.first, t.second, true, n)
case '~':
return siblingMatch(t.first, t.second, false, n)
default:
panic("unknown combinator")
}
}
// matches an element if it matches d and has an ancestor that matches a.
func descendantMatch(a, d Matcher, n *html.Node) bool {
if !d.Match(n) {
return false
}
for p := n.Parent; p != nil; p = p.Parent {
if a.Match(p) {
return true
}
}
return false
}
// matches an element if it matches d and its parent matches a.
func childMatch(a, d Matcher, n *html.Node) bool {
return d.Match(n) && n.Parent != nil && a.Match(n.Parent)
}
// matches an element if it matches s2 and is preceded by an element that matches s1.
// If adjacent is true, the sibling must be immediately before the element.
func siblingMatch(s1, s2 Matcher, adjacent bool, n *html.Node) bool {
if !s2.Match(n) {
return false
}
if adjacent {
for n = n.PrevSibling; n != nil; n = n.PrevSibling {
if n.Type == html.TextNode || n.Type == html.CommentNode {
continue
}
return s1.Match(n)
}
return false
}
// Walk backwards looking for element that matches s1
for c := n.PrevSibling; c != nil; c = c.PrevSibling {
if s1.Match(c) {
return true
}
}
return false
}
func (s combinedSelector) Specificity() Specificity {
spec := s.first.Specificity()
if s.second != nil {
spec = spec.Add(s.second.Specificity())
}
return spec
}
// A SelectorGroup is a list of selectors, which matches if any of the
// individual selectors matches.
type SelectorGroup []Sel
// Match returns true if the node matches one of the single selectors.
func (s SelectorGroup) Match(n *html.Node) bool {
for _, sel := range s {
if sel.Match(n) {
return true
}
}
return false
}

26
vendor/github.com/andybalholm/cascadia/specificity.go generated vendored Normal file
View file

@ -0,0 +1,26 @@
package cascadia
// Specificity is the CSS specificity as defined in
// https://www.w3.org/TR/selectors/#specificity-rules
// with the convention Specificity = [A,B,C].
type Specificity [3]int
// returns `true` if s < other (strictly), false otherwise
func (s Specificity) Less(other Specificity) bool {
for i := range s {
if s[i] < other[i] {
return true
}
if s[i] > other[i] {
return false
}
}
return false
}
func (s Specificity) Add(other Specificity) Specificity {
for i, sp := range other {
s[i] += sp
}
return s
}

15
vendor/github.com/chilts/sid/.codeclimate.yml generated vendored Normal file
View file

@ -0,0 +1,15 @@
---
engines:
golint:
enabled: true
govet:
enabled: true
gofmt:
enabled: true
fixme:
enabled: true
ratings:
paths:
- "**.go"
exclude_paths:
- "*_test.go"

11
vendor/github.com/chilts/sid/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,11 @@
language: go
sudo: false
go:
- 1.7.x
- 1.8.x
- 1.x
- master
matrix:
allow_failures:
- go: 'master'
fast_finish: true

24
vendor/github.com/jaytaylor/html2text/.gitignore generated vendored Normal file
View file

@ -0,0 +1,24 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof

13
vendor/github.com/jaytaylor/html2text/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,13 @@
language: go
go:
# n.b. For golang release history, see https://golang.org/doc/devel/release.html
- tip
- "1.13.8"
- "1.12.17"
- "1.11.13"
- "1.10.8"
- "1.9.7"
notifications:
email:
on_success: change
on_failure: always

8
vendor/github.com/mattn/go-runewidth/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,8 @@
language: go
go:
- tip
before_install:
- go get github.com/mattn/goveralls
- go get golang.org/x/tools/cmd/cover
script:
- $HOME/gopath/bin/goveralls -repotoken lAKAWPzcGsD3A8yBX3BGGtRUdJ6CaGERL

View file

@ -4,6 +4,8 @@ import (
"os" "os"
) )
//go:generate go run script/generate.go
var ( var (
// EastAsianWidth will be set true if the current locale is CJK // EastAsianWidth will be set true if the current locale is CJK
EastAsianWidth bool EastAsianWidth bool
@ -82,728 +84,6 @@ var nonprint = table{
{0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, {0xFFFE, 0xFFFF}, {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, {0xFFFE, 0xFFFF},
} }
var combining = table{
{0x0300, 0x036F}, {0x0483, 0x0489}, {0x0591, 0x05BD},
{0x05BF, 0x05BF}, {0x05C1, 0x05C2}, {0x05C4, 0x05C5},
{0x05C7, 0x05C7}, {0x0610, 0x061A}, {0x064B, 0x065F},
{0x0670, 0x0670}, {0x06D6, 0x06DC}, {0x06DF, 0x06E4},
{0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x0711, 0x0711},
{0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x07EB, 0x07F3},
{0x0816, 0x0819}, {0x081B, 0x0823}, {0x0825, 0x0827},
{0x0829, 0x082D}, {0x0859, 0x085B}, {0x08D4, 0x08E1},
{0x08E3, 0x0903}, {0x093A, 0x093C}, {0x093E, 0x094F},
{0x0951, 0x0957}, {0x0962, 0x0963}, {0x0981, 0x0983},
{0x09BC, 0x09BC}, {0x09BE, 0x09C4}, {0x09C7, 0x09C8},
{0x09CB, 0x09CD}, {0x09D7, 0x09D7}, {0x09E2, 0x09E3},
{0x0A01, 0x0A03}, {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42},
{0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51},
{0x0A70, 0x0A71}, {0x0A75, 0x0A75}, {0x0A81, 0x0A83},
{0x0ABC, 0x0ABC}, {0x0ABE, 0x0AC5}, {0x0AC7, 0x0AC9},
{0x0ACB, 0x0ACD}, {0x0AE2, 0x0AE3}, {0x0B01, 0x0B03},
{0x0B3C, 0x0B3C}, {0x0B3E, 0x0B44}, {0x0B47, 0x0B48},
{0x0B4B, 0x0B4D}, {0x0B56, 0x0B57}, {0x0B62, 0x0B63},
{0x0B82, 0x0B82}, {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8},
{0x0BCA, 0x0BCD}, {0x0BD7, 0x0BD7}, {0x0C00, 0x0C03},
{0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
{0x0C55, 0x0C56}, {0x0C62, 0x0C63}, {0x0C81, 0x0C83},
{0x0CBC, 0x0CBC}, {0x0CBE, 0x0CC4}, {0x0CC6, 0x0CC8},
{0x0CCA, 0x0CCD}, {0x0CD5, 0x0CD6}, {0x0CE2, 0x0CE3},
{0x0D01, 0x0D03}, {0x0D3E, 0x0D44}, {0x0D46, 0x0D48},
{0x0D4A, 0x0D4D}, {0x0D57, 0x0D57}, {0x0D62, 0x0D63},
{0x0D82, 0x0D83}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4},
{0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DF2, 0x0DF3},
{0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E},
{0x0EB1, 0x0EB1}, {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC},
{0x0EC8, 0x0ECD}, {0x0F18, 0x0F19}, {0x0F35, 0x0F35},
{0x0F37, 0x0F37}, {0x0F39, 0x0F39}, {0x0F3E, 0x0F3F},
{0x0F71, 0x0F84}, {0x0F86, 0x0F87}, {0x0F8D, 0x0F97},
{0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102B, 0x103E},
{0x1056, 0x1059}, {0x105E, 0x1060}, {0x1062, 0x1064},
{0x1067, 0x106D}, {0x1071, 0x1074}, {0x1082, 0x108D},
{0x108F, 0x108F}, {0x109A, 0x109D}, {0x135D, 0x135F},
{0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753},
{0x1772, 0x1773}, {0x17B4, 0x17D3}, {0x17DD, 0x17DD},
{0x180B, 0x180D}, {0x1885, 0x1886}, {0x18A9, 0x18A9},
{0x1920, 0x192B}, {0x1930, 0x193B}, {0x1A17, 0x1A1B},
{0x1A55, 0x1A5E}, {0x1A60, 0x1A7C}, {0x1A7F, 0x1A7F},
{0x1AB0, 0x1ABE}, {0x1B00, 0x1B04}, {0x1B34, 0x1B44},
{0x1B6B, 0x1B73}, {0x1B80, 0x1B82}, {0x1BA1, 0x1BAD},
{0x1BE6, 0x1BF3}, {0x1C24, 0x1C37}, {0x1CD0, 0x1CD2},
{0x1CD4, 0x1CE8}, {0x1CED, 0x1CED}, {0x1CF2, 0x1CF4},
{0x1CF8, 0x1CF9}, {0x1DC0, 0x1DF5}, {0x1DFB, 0x1DFF},
{0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F},
{0x2DE0, 0x2DFF}, {0x302A, 0x302F}, {0x3099, 0x309A},
{0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F},
{0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806},
{0xA80B, 0xA80B}, {0xA823, 0xA827}, {0xA880, 0xA881},
{0xA8B4, 0xA8C5}, {0xA8E0, 0xA8F1}, {0xA926, 0xA92D},
{0xA947, 0xA953}, {0xA980, 0xA983}, {0xA9B3, 0xA9C0},
{0xA9E5, 0xA9E5}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43},
{0xAA4C, 0xAA4D}, {0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0},
{0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF},
{0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6},
{0xABE3, 0xABEA}, {0xABEC, 0xABED}, {0xFB1E, 0xFB1E},
{0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0x101FD, 0x101FD},
{0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x10A01, 0x10A03},
{0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A},
{0x10A3F, 0x10A3F}, {0x10AE5, 0x10AE6}, {0x11000, 0x11002},
{0x11038, 0x11046}, {0x1107F, 0x11082}, {0x110B0, 0x110BA},
{0x11100, 0x11102}, {0x11127, 0x11134}, {0x11173, 0x11173},
{0x11180, 0x11182}, {0x111B3, 0x111C0}, {0x111CA, 0x111CC},
{0x1122C, 0x11237}, {0x1123E, 0x1123E}, {0x112DF, 0x112EA},
{0x11300, 0x11303}, {0x1133C, 0x1133C}, {0x1133E, 0x11344},
{0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11357, 0x11357},
{0x11362, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374},
{0x11435, 0x11446}, {0x114B0, 0x114C3}, {0x115AF, 0x115B5},
{0x115B8, 0x115C0}, {0x115DC, 0x115DD}, {0x11630, 0x11640},
{0x116AB, 0x116B7}, {0x1171D, 0x1172B}, {0x11C2F, 0x11C36},
{0x11C38, 0x11C3F}, {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6},
{0x16AF0, 0x16AF4}, {0x16B30, 0x16B36}, {0x16F51, 0x16F7E},
{0x16F8F, 0x16F92}, {0x1BC9D, 0x1BC9E}, {0x1D165, 0x1D169},
{0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B},
{0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1DA00, 0x1DA36},
{0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84},
{0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006},
{0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024},
{0x1E026, 0x1E02A}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A},
{0xE0100, 0xE01EF},
}
var doublewidth = table{
{0x1100, 0x115F}, {0x231A, 0x231B}, {0x2329, 0x232A},
{0x23E9, 0x23EC}, {0x23F0, 0x23F0}, {0x23F3, 0x23F3},
{0x25FD, 0x25FE}, {0x2614, 0x2615}, {0x2648, 0x2653},
{0x267F, 0x267F}, {0x2693, 0x2693}, {0x26A1, 0x26A1},
{0x26AA, 0x26AB}, {0x26BD, 0x26BE}, {0x26C4, 0x26C5},
{0x26CE, 0x26CE}, {0x26D4, 0x26D4}, {0x26EA, 0x26EA},
{0x26F2, 0x26F3}, {0x26F5, 0x26F5}, {0x26FA, 0x26FA},
{0x26FD, 0x26FD}, {0x2705, 0x2705}, {0x270A, 0x270B},
{0x2728, 0x2728}, {0x274C, 0x274C}, {0x274E, 0x274E},
{0x2753, 0x2755}, {0x2757, 0x2757}, {0x2795, 0x2797},
{0x27B0, 0x27B0}, {0x27BF, 0x27BF}, {0x2B1B, 0x2B1C},
{0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99},
{0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB},
{0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF},
{0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA},
{0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247},
{0x3250, 0x32FE}, {0x3300, 0x4DBF}, {0x4E00, 0xA48C},
{0xA490, 0xA4C6}, {0xA960, 0xA97C}, {0xAC00, 0xD7A3},
{0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
{0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60},
{0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE0}, {0x17000, 0x187EC},
{0x18800, 0x18AF2}, {0x1B000, 0x1B001}, {0x1F004, 0x1F004},
{0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A},
{0x1F200, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248},
{0x1F250, 0x1F251}, {0x1F300, 0x1F320}, {0x1F32D, 0x1F335},
{0x1F337, 0x1F37C}, {0x1F37E, 0x1F393}, {0x1F3A0, 0x1F3CA},
{0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0}, {0x1F3F4, 0x1F3F4},
{0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440}, {0x1F442, 0x1F4FC},
{0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E}, {0x1F550, 0x1F567},
{0x1F57A, 0x1F57A}, {0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A4},
{0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC},
{0x1F6D0, 0x1F6D2}, {0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6F6},
{0x1F910, 0x1F91E}, {0x1F920, 0x1F927}, {0x1F930, 0x1F930},
{0x1F933, 0x1F93E}, {0x1F940, 0x1F94B}, {0x1F950, 0x1F95E},
{0x1F980, 0x1F991}, {0x1F9C0, 0x1F9C0}, {0x20000, 0x2FFFD},
{0x30000, 0x3FFFD},
}
var ambiguous = table{
{0x00A1, 0x00A1}, {0x00A4, 0x00A4}, {0x00A7, 0x00A8},
{0x00AA, 0x00AA}, {0x00AD, 0x00AE}, {0x00B0, 0x00B4},
{0x00B6, 0x00BA}, {0x00BC, 0x00BF}, {0x00C6, 0x00C6},
{0x00D0, 0x00D0}, {0x00D7, 0x00D8}, {0x00DE, 0x00E1},
{0x00E6, 0x00E6}, {0x00E8, 0x00EA}, {0x00EC, 0x00ED},
{0x00F0, 0x00F0}, {0x00F2, 0x00F3}, {0x00F7, 0x00FA},
{0x00FC, 0x00FC}, {0x00FE, 0x00FE}, {0x0101, 0x0101},
{0x0111, 0x0111}, {0x0113, 0x0113}, {0x011B, 0x011B},
{0x0126, 0x0127}, {0x012B, 0x012B}, {0x0131, 0x0133},
{0x0138, 0x0138}, {0x013F, 0x0142}, {0x0144, 0x0144},
{0x0148, 0x014B}, {0x014D, 0x014D}, {0x0152, 0x0153},
{0x0166, 0x0167}, {0x016B, 0x016B}, {0x01CE, 0x01CE},
{0x01D0, 0x01D0}, {0x01D2, 0x01D2}, {0x01D4, 0x01D4},
{0x01D6, 0x01D6}, {0x01D8, 0x01D8}, {0x01DA, 0x01DA},
{0x01DC, 0x01DC}, {0x0251, 0x0251}, {0x0261, 0x0261},
{0x02C4, 0x02C4}, {0x02C7, 0x02C7}, {0x02C9, 0x02CB},
{0x02CD, 0x02CD}, {0x02D0, 0x02D0}, {0x02D8, 0x02DB},
{0x02DD, 0x02DD}, {0x02DF, 0x02DF}, {0x0300, 0x036F},
{0x0391, 0x03A1}, {0x03A3, 0x03A9}, {0x03B1, 0x03C1},
{0x03C3, 0x03C9}, {0x0401, 0x0401}, {0x0410, 0x044F},
{0x0451, 0x0451}, {0x2010, 0x2010}, {0x2013, 0x2016},
{0x2018, 0x2019}, {0x201C, 0x201D}, {0x2020, 0x2022},
{0x2024, 0x2027}, {0x2030, 0x2030}, {0x2032, 0x2033},
{0x2035, 0x2035}, {0x203B, 0x203B}, {0x203E, 0x203E},
{0x2074, 0x2074}, {0x207F, 0x207F}, {0x2081, 0x2084},
{0x20AC, 0x20AC}, {0x2103, 0x2103}, {0x2105, 0x2105},
{0x2109, 0x2109}, {0x2113, 0x2113}, {0x2116, 0x2116},
{0x2121, 0x2122}, {0x2126, 0x2126}, {0x212B, 0x212B},
{0x2153, 0x2154}, {0x215B, 0x215E}, {0x2160, 0x216B},
{0x2170, 0x2179}, {0x2189, 0x2189}, {0x2190, 0x2199},
{0x21B8, 0x21B9}, {0x21D2, 0x21D2}, {0x21D4, 0x21D4},
{0x21E7, 0x21E7}, {0x2200, 0x2200}, {0x2202, 0x2203},
{0x2207, 0x2208}, {0x220B, 0x220B}, {0x220F, 0x220F},
{0x2211, 0x2211}, {0x2215, 0x2215}, {0x221A, 0x221A},
{0x221D, 0x2220}, {0x2223, 0x2223}, {0x2225, 0x2225},
{0x2227, 0x222C}, {0x222E, 0x222E}, {0x2234, 0x2237},
{0x223C, 0x223D}, {0x2248, 0x2248}, {0x224C, 0x224C},
{0x2252, 0x2252}, {0x2260, 0x2261}, {0x2264, 0x2267},
{0x226A, 0x226B}, {0x226E, 0x226F}, {0x2282, 0x2283},
{0x2286, 0x2287}, {0x2295, 0x2295}, {0x2299, 0x2299},
{0x22A5, 0x22A5}, {0x22BF, 0x22BF}, {0x2312, 0x2312},
{0x2460, 0x24E9}, {0x24EB, 0x254B}, {0x2550, 0x2573},
{0x2580, 0x258F}, {0x2592, 0x2595}, {0x25A0, 0x25A1},
{0x25A3, 0x25A9}, {0x25B2, 0x25B3}, {0x25B6, 0x25B7},
{0x25BC, 0x25BD}, {0x25C0, 0x25C1}, {0x25C6, 0x25C8},
{0x25CB, 0x25CB}, {0x25CE, 0x25D1}, {0x25E2, 0x25E5},
{0x25EF, 0x25EF}, {0x2605, 0x2606}, {0x2609, 0x2609},
{0x260E, 0x260F}, {0x261C, 0x261C}, {0x261E, 0x261E},
{0x2640, 0x2640}, {0x2642, 0x2642}, {0x2660, 0x2661},
{0x2663, 0x2665}, {0x2667, 0x266A}, {0x266C, 0x266D},
{0x266F, 0x266F}, {0x269E, 0x269F}, {0x26BF, 0x26BF},
{0x26C6, 0x26CD}, {0x26CF, 0x26D3}, {0x26D5, 0x26E1},
{0x26E3, 0x26E3}, {0x26E8, 0x26E9}, {0x26EB, 0x26F1},
{0x26F4, 0x26F4}, {0x26F6, 0x26F9}, {0x26FB, 0x26FC},
{0x26FE, 0x26FF}, {0x273D, 0x273D}, {0x2776, 0x277F},
{0x2B56, 0x2B59}, {0x3248, 0x324F}, {0xE000, 0xF8FF},
{0xFE00, 0xFE0F}, {0xFFFD, 0xFFFD}, {0x1F100, 0x1F10A},
{0x1F110, 0x1F12D}, {0x1F130, 0x1F169}, {0x1F170, 0x1F18D},
{0x1F18F, 0x1F190}, {0x1F19B, 0x1F1AC}, {0xE0100, 0xE01EF},
{0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD},
}
var emoji = table{
{0x203C, 0x203C}, {0x2049, 0x2049}, {0x2122, 0x2122},
{0x2139, 0x2139}, {0x2194, 0x2199}, {0x21A9, 0x21AA},
{0x231A, 0x231B}, {0x2328, 0x2328}, {0x23CF, 0x23CF},
{0x23E9, 0x23F3}, {0x23F8, 0x23FA}, {0x24C2, 0x24C2},
{0x25AA, 0x25AB}, {0x25B6, 0x25B6}, {0x25C0, 0x25C0},
{0x25FB, 0x25FE}, {0x2600, 0x2604}, {0x260E, 0x260E},
{0x2611, 0x2611}, {0x2614, 0x2615}, {0x2618, 0x2618},
{0x261D, 0x261D}, {0x2620, 0x2620}, {0x2622, 0x2623},
{0x2626, 0x2626}, {0x262A, 0x262A}, {0x262E, 0x262F},
{0x2638, 0x263A}, {0x2640, 0x2640}, {0x2642, 0x2642},
{0x2648, 0x2653}, {0x265F, 0x2660}, {0x2663, 0x2663},
{0x2665, 0x2666}, {0x2668, 0x2668}, {0x267B, 0x267B},
{0x267E, 0x267F}, {0x2692, 0x2697}, {0x2699, 0x2699},
{0x269B, 0x269C}, {0x26A0, 0x26A1}, {0x26AA, 0x26AB},
{0x26B0, 0x26B1}, {0x26BD, 0x26BE}, {0x26C4, 0x26C5},
{0x26C8, 0x26C8}, {0x26CE, 0x26CF}, {0x26D1, 0x26D1},
{0x26D3, 0x26D4}, {0x26E9, 0x26EA}, {0x26F0, 0x26F5},
{0x26F7, 0x26FA}, {0x26FD, 0x26FD}, {0x2702, 0x2702},
{0x2705, 0x2705}, {0x2708, 0x270D}, {0x270F, 0x270F},
{0x2712, 0x2712}, {0x2714, 0x2714}, {0x2716, 0x2716},
{0x271D, 0x271D}, {0x2721, 0x2721}, {0x2728, 0x2728},
{0x2733, 0x2734}, {0x2744, 0x2744}, {0x2747, 0x2747},
{0x274C, 0x274C}, {0x274E, 0x274E}, {0x2753, 0x2755},
{0x2757, 0x2757}, {0x2763, 0x2764}, {0x2795, 0x2797},
{0x27A1, 0x27A1}, {0x27B0, 0x27B0}, {0x27BF, 0x27BF},
{0x2934, 0x2935}, {0x2B05, 0x2B07}, {0x2B1B, 0x2B1C},
{0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x3030, 0x3030},
{0x303D, 0x303D}, {0x3297, 0x3297}, {0x3299, 0x3299},
{0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF}, {0x1F170, 0x1F171},
{0x1F17E, 0x1F17F}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A},
{0x1F1E6, 0x1F1FF}, {0x1F201, 0x1F202}, {0x1F21A, 0x1F21A},
{0x1F22F, 0x1F22F}, {0x1F232, 0x1F23A}, {0x1F250, 0x1F251},
{0x1F300, 0x1F321}, {0x1F324, 0x1F393}, {0x1F396, 0x1F397},
{0x1F399, 0x1F39B}, {0x1F39E, 0x1F3F0}, {0x1F3F3, 0x1F3F5},
{0x1F3F7, 0x1F4FD}, {0x1F4FF, 0x1F53D}, {0x1F549, 0x1F54E},
{0x1F550, 0x1F567}, {0x1F56F, 0x1F570}, {0x1F573, 0x1F57A},
{0x1F587, 0x1F587}, {0x1F58A, 0x1F58D}, {0x1F590, 0x1F590},
{0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A5}, {0x1F5A8, 0x1F5A8},
{0x1F5B1, 0x1F5B2}, {0x1F5BC, 0x1F5BC}, {0x1F5C2, 0x1F5C4},
{0x1F5D1, 0x1F5D3}, {0x1F5DC, 0x1F5DE}, {0x1F5E1, 0x1F5E1},
{0x1F5E3, 0x1F5E3}, {0x1F5E8, 0x1F5E8}, {0x1F5EF, 0x1F5EF},
{0x1F5F3, 0x1F5F3}, {0x1F5FA, 0x1F64F}, {0x1F680, 0x1F6C5},
{0x1F6CB, 0x1F6D2}, {0x1F6E0, 0x1F6E5}, {0x1F6E9, 0x1F6E9},
{0x1F6EB, 0x1F6EC}, {0x1F6F0, 0x1F6F0}, {0x1F6F3, 0x1F6F9},
{0x1F910, 0x1F93A}, {0x1F93C, 0x1F93E}, {0x1F940, 0x1F945},
{0x1F947, 0x1F970}, {0x1F973, 0x1F976}, {0x1F97A, 0x1F97A},
{0x1F97C, 0x1F9A2}, {0x1F9B0, 0x1F9B9}, {0x1F9C0, 0x1F9C2},
{0x1F9D0, 0x1F9FF},
}
var notassigned = table{
{0x0378, 0x0379}, {0x0380, 0x0383}, {0x038B, 0x038B},
{0x038D, 0x038D}, {0x03A2, 0x03A2}, {0x0530, 0x0530},
{0x0557, 0x0558}, {0x0560, 0x0560}, {0x0588, 0x0588},
{0x058B, 0x058C}, {0x0590, 0x0590}, {0x05C8, 0x05CF},
{0x05EB, 0x05EF}, {0x05F5, 0x05FF}, {0x061D, 0x061D},
{0x070E, 0x070E}, {0x074B, 0x074C}, {0x07B2, 0x07BF},
{0x07FB, 0x07FF}, {0x082E, 0x082F}, {0x083F, 0x083F},
{0x085C, 0x085D}, {0x085F, 0x089F}, {0x08B5, 0x08B5},
{0x08BE, 0x08D3}, {0x0984, 0x0984}, {0x098D, 0x098E},
{0x0991, 0x0992}, {0x09A9, 0x09A9}, {0x09B1, 0x09B1},
{0x09B3, 0x09B5}, {0x09BA, 0x09BB}, {0x09C5, 0x09C6},
{0x09C9, 0x09CA}, {0x09CF, 0x09D6}, {0x09D8, 0x09DB},
{0x09DE, 0x09DE}, {0x09E4, 0x09E5}, {0x09FC, 0x0A00},
{0x0A04, 0x0A04}, {0x0A0B, 0x0A0E}, {0x0A11, 0x0A12},
{0x0A29, 0x0A29}, {0x0A31, 0x0A31}, {0x0A34, 0x0A34},
{0x0A37, 0x0A37}, {0x0A3A, 0x0A3B}, {0x0A3D, 0x0A3D},
{0x0A43, 0x0A46}, {0x0A49, 0x0A4A}, {0x0A4E, 0x0A50},
{0x0A52, 0x0A58}, {0x0A5D, 0x0A5D}, {0x0A5F, 0x0A65},
{0x0A76, 0x0A80}, {0x0A84, 0x0A84}, {0x0A8E, 0x0A8E},
{0x0A92, 0x0A92}, {0x0AA9, 0x0AA9}, {0x0AB1, 0x0AB1},
{0x0AB4, 0x0AB4}, {0x0ABA, 0x0ABB}, {0x0AC6, 0x0AC6},
{0x0ACA, 0x0ACA}, {0x0ACE, 0x0ACF}, {0x0AD1, 0x0ADF},
{0x0AE4, 0x0AE5}, {0x0AF2, 0x0AF8}, {0x0AFA, 0x0B00},
{0x0B04, 0x0B04}, {0x0B0D, 0x0B0E}, {0x0B11, 0x0B12},
{0x0B29, 0x0B29}, {0x0B31, 0x0B31}, {0x0B34, 0x0B34},
{0x0B3A, 0x0B3B}, {0x0B45, 0x0B46}, {0x0B49, 0x0B4A},
{0x0B4E, 0x0B55}, {0x0B58, 0x0B5B}, {0x0B5E, 0x0B5E},
{0x0B64, 0x0B65}, {0x0B78, 0x0B81}, {0x0B84, 0x0B84},
{0x0B8B, 0x0B8D}, {0x0B91, 0x0B91}, {0x0B96, 0x0B98},
{0x0B9B, 0x0B9B}, {0x0B9D, 0x0B9D}, {0x0BA0, 0x0BA2},
{0x0BA5, 0x0BA7}, {0x0BAB, 0x0BAD}, {0x0BBA, 0x0BBD},
{0x0BC3, 0x0BC5}, {0x0BC9, 0x0BC9}, {0x0BCE, 0x0BCF},
{0x0BD1, 0x0BD6}, {0x0BD8, 0x0BE5}, {0x0BFB, 0x0BFF},
{0x0C04, 0x0C04}, {0x0C0D, 0x0C0D}, {0x0C11, 0x0C11},
{0x0C29, 0x0C29}, {0x0C3A, 0x0C3C}, {0x0C45, 0x0C45},
{0x0C49, 0x0C49}, {0x0C4E, 0x0C54}, {0x0C57, 0x0C57},
{0x0C5B, 0x0C5F}, {0x0C64, 0x0C65}, {0x0C70, 0x0C77},
{0x0C84, 0x0C84}, {0x0C8D, 0x0C8D}, {0x0C91, 0x0C91},
{0x0CA9, 0x0CA9}, {0x0CB4, 0x0CB4}, {0x0CBA, 0x0CBB},
{0x0CC5, 0x0CC5}, {0x0CC9, 0x0CC9}, {0x0CCE, 0x0CD4},
{0x0CD7, 0x0CDD}, {0x0CDF, 0x0CDF}, {0x0CE4, 0x0CE5},
{0x0CF0, 0x0CF0}, {0x0CF3, 0x0D00}, {0x0D04, 0x0D04},
{0x0D0D, 0x0D0D}, {0x0D11, 0x0D11}, {0x0D3B, 0x0D3C},
{0x0D45, 0x0D45}, {0x0D49, 0x0D49}, {0x0D50, 0x0D53},
{0x0D64, 0x0D65}, {0x0D80, 0x0D81}, {0x0D84, 0x0D84},
{0x0D97, 0x0D99}, {0x0DB2, 0x0DB2}, {0x0DBC, 0x0DBC},
{0x0DBE, 0x0DBF}, {0x0DC7, 0x0DC9}, {0x0DCB, 0x0DCE},
{0x0DD5, 0x0DD5}, {0x0DD7, 0x0DD7}, {0x0DE0, 0x0DE5},
{0x0DF0, 0x0DF1}, {0x0DF5, 0x0E00}, {0x0E3B, 0x0E3E},
{0x0E5C, 0x0E80}, {0x0E83, 0x0E83}, {0x0E85, 0x0E86},
{0x0E89, 0x0E89}, {0x0E8B, 0x0E8C}, {0x0E8E, 0x0E93},
{0x0E98, 0x0E98}, {0x0EA0, 0x0EA0}, {0x0EA4, 0x0EA4},
{0x0EA6, 0x0EA6}, {0x0EA8, 0x0EA9}, {0x0EAC, 0x0EAC},
{0x0EBA, 0x0EBA}, {0x0EBE, 0x0EBF}, {0x0EC5, 0x0EC5},
{0x0EC7, 0x0EC7}, {0x0ECE, 0x0ECF}, {0x0EDA, 0x0EDB},
{0x0EE0, 0x0EFF}, {0x0F48, 0x0F48}, {0x0F6D, 0x0F70},
{0x0F98, 0x0F98}, {0x0FBD, 0x0FBD}, {0x0FCD, 0x0FCD},
{0x0FDB, 0x0FFF}, {0x10C6, 0x10C6}, {0x10C8, 0x10CC},
{0x10CE, 0x10CF}, {0x1249, 0x1249}, {0x124E, 0x124F},
{0x1257, 0x1257}, {0x1259, 0x1259}, {0x125E, 0x125F},
{0x1289, 0x1289}, {0x128E, 0x128F}, {0x12B1, 0x12B1},
{0x12B6, 0x12B7}, {0x12BF, 0x12BF}, {0x12C1, 0x12C1},
{0x12C6, 0x12C7}, {0x12D7, 0x12D7}, {0x1311, 0x1311},
{0x1316, 0x1317}, {0x135B, 0x135C}, {0x137D, 0x137F},
{0x139A, 0x139F}, {0x13F6, 0x13F7}, {0x13FE, 0x13FF},
{0x169D, 0x169F}, {0x16F9, 0x16FF}, {0x170D, 0x170D},
{0x1715, 0x171F}, {0x1737, 0x173F}, {0x1754, 0x175F},
{0x176D, 0x176D}, {0x1771, 0x1771}, {0x1774, 0x177F},
{0x17DE, 0x17DF}, {0x17EA, 0x17EF}, {0x17FA, 0x17FF},
{0x180F, 0x180F}, {0x181A, 0x181F}, {0x1878, 0x187F},
{0x18AB, 0x18AF}, {0x18F6, 0x18FF}, {0x191F, 0x191F},
{0x192C, 0x192F}, {0x193C, 0x193F}, {0x1941, 0x1943},
{0x196E, 0x196F}, {0x1975, 0x197F}, {0x19AC, 0x19AF},
{0x19CA, 0x19CF}, {0x19DB, 0x19DD}, {0x1A1C, 0x1A1D},
{0x1A5F, 0x1A5F}, {0x1A7D, 0x1A7E}, {0x1A8A, 0x1A8F},
{0x1A9A, 0x1A9F}, {0x1AAE, 0x1AAF}, {0x1ABF, 0x1AFF},
{0x1B4C, 0x1B4F}, {0x1B7D, 0x1B7F}, {0x1BF4, 0x1BFB},
{0x1C38, 0x1C3A}, {0x1C4A, 0x1C4C}, {0x1C89, 0x1CBF},
{0x1CC8, 0x1CCF}, {0x1CF7, 0x1CF7}, {0x1CFA, 0x1CFF},
{0x1DF6, 0x1DFA}, {0x1F16, 0x1F17}, {0x1F1E, 0x1F1F},
{0x1F46, 0x1F47}, {0x1F4E, 0x1F4F}, {0x1F58, 0x1F58},
{0x1F5A, 0x1F5A}, {0x1F5C, 0x1F5C}, {0x1F5E, 0x1F5E},
{0x1F7E, 0x1F7F}, {0x1FB5, 0x1FB5}, {0x1FC5, 0x1FC5},
{0x1FD4, 0x1FD5}, {0x1FDC, 0x1FDC}, {0x1FF0, 0x1FF1},
{0x1FF5, 0x1FF5}, {0x1FFF, 0x1FFF}, {0x2065, 0x2065},
{0x2072, 0x2073}, {0x208F, 0x208F}, {0x209D, 0x209F},
{0x20BF, 0x20CF}, {0x20F1, 0x20FF}, {0x218C, 0x218F},
{0x23FF, 0x23FF}, {0x2427, 0x243F}, {0x244B, 0x245F},
{0x2B74, 0x2B75}, {0x2B96, 0x2B97}, {0x2BBA, 0x2BBC},
{0x2BC9, 0x2BC9}, {0x2BD2, 0x2BEB}, {0x2BF0, 0x2BFF},
{0x2C2F, 0x2C2F}, {0x2C5F, 0x2C5F}, {0x2CF4, 0x2CF8},
{0x2D26, 0x2D26}, {0x2D28, 0x2D2C}, {0x2D2E, 0x2D2F},
{0x2D68, 0x2D6E}, {0x2D71, 0x2D7E}, {0x2D97, 0x2D9F},
{0x2DA7, 0x2DA7}, {0x2DAF, 0x2DAF}, {0x2DB7, 0x2DB7},
{0x2DBF, 0x2DBF}, {0x2DC7, 0x2DC7}, {0x2DCF, 0x2DCF},
{0x2DD7, 0x2DD7}, {0x2DDF, 0x2DDF}, {0x2E45, 0x2E7F},
{0x2E9A, 0x2E9A}, {0x2EF4, 0x2EFF}, {0x2FD6, 0x2FEF},
{0x2FFC, 0x2FFF}, {0x3040, 0x3040}, {0x3097, 0x3098},
{0x3100, 0x3104}, {0x312E, 0x3130}, {0x318F, 0x318F},
{0x31BB, 0x31BF}, {0x31E4, 0x31EF}, {0x321F, 0x321F},
{0x32FF, 0x32FF}, {0x4DB6, 0x4DBF}, {0x9FD6, 0x9FFF},
{0xA48D, 0xA48F}, {0xA4C7, 0xA4CF}, {0xA62C, 0xA63F},
{0xA6F8, 0xA6FF}, {0xA7AF, 0xA7AF}, {0xA7B8, 0xA7F6},
{0xA82C, 0xA82F}, {0xA83A, 0xA83F}, {0xA878, 0xA87F},
{0xA8C6, 0xA8CD}, {0xA8DA, 0xA8DF}, {0xA8FE, 0xA8FF},
{0xA954, 0xA95E}, {0xA97D, 0xA97F}, {0xA9CE, 0xA9CE},
{0xA9DA, 0xA9DD}, {0xA9FF, 0xA9FF}, {0xAA37, 0xAA3F},
{0xAA4E, 0xAA4F}, {0xAA5A, 0xAA5B}, {0xAAC3, 0xAADA},
{0xAAF7, 0xAB00}, {0xAB07, 0xAB08}, {0xAB0F, 0xAB10},
{0xAB17, 0xAB1F}, {0xAB27, 0xAB27}, {0xAB2F, 0xAB2F},
{0xAB66, 0xAB6F}, {0xABEE, 0xABEF}, {0xABFA, 0xABFF},
{0xD7A4, 0xD7AF}, {0xD7C7, 0xD7CA}, {0xD7FC, 0xD7FF},
{0xFA6E, 0xFA6F}, {0xFADA, 0xFAFF}, {0xFB07, 0xFB12},
{0xFB18, 0xFB1C}, {0xFB37, 0xFB37}, {0xFB3D, 0xFB3D},
{0xFB3F, 0xFB3F}, {0xFB42, 0xFB42}, {0xFB45, 0xFB45},
{0xFBC2, 0xFBD2}, {0xFD40, 0xFD4F}, {0xFD90, 0xFD91},
{0xFDC8, 0xFDEF}, {0xFDFE, 0xFDFF}, {0xFE1A, 0xFE1F},
{0xFE53, 0xFE53}, {0xFE67, 0xFE67}, {0xFE6C, 0xFE6F},
{0xFE75, 0xFE75}, {0xFEFD, 0xFEFE}, {0xFF00, 0xFF00},
{0xFFBF, 0xFFC1}, {0xFFC8, 0xFFC9}, {0xFFD0, 0xFFD1},
{0xFFD8, 0xFFD9}, {0xFFDD, 0xFFDF}, {0xFFE7, 0xFFE7},
{0xFFEF, 0xFFF8}, {0xFFFE, 0xFFFF}, {0x1000C, 0x1000C},
{0x10027, 0x10027}, {0x1003B, 0x1003B}, {0x1003E, 0x1003E},
{0x1004E, 0x1004F}, {0x1005E, 0x1007F}, {0x100FB, 0x100FF},
{0x10103, 0x10106}, {0x10134, 0x10136}, {0x1018F, 0x1018F},
{0x1019C, 0x1019F}, {0x101A1, 0x101CF}, {0x101FE, 0x1027F},
{0x1029D, 0x1029F}, {0x102D1, 0x102DF}, {0x102FC, 0x102FF},
{0x10324, 0x1032F}, {0x1034B, 0x1034F}, {0x1037B, 0x1037F},
{0x1039E, 0x1039E}, {0x103C4, 0x103C7}, {0x103D6, 0x103FF},
{0x1049E, 0x1049F}, {0x104AA, 0x104AF}, {0x104D4, 0x104D7},
{0x104FC, 0x104FF}, {0x10528, 0x1052F}, {0x10564, 0x1056E},
{0x10570, 0x105FF}, {0x10737, 0x1073F}, {0x10756, 0x1075F},
{0x10768, 0x107FF}, {0x10806, 0x10807}, {0x10809, 0x10809},
{0x10836, 0x10836}, {0x10839, 0x1083B}, {0x1083D, 0x1083E},
{0x10856, 0x10856}, {0x1089F, 0x108A6}, {0x108B0, 0x108DF},
{0x108F3, 0x108F3}, {0x108F6, 0x108FA}, {0x1091C, 0x1091E},
{0x1093A, 0x1093E}, {0x10940, 0x1097F}, {0x109B8, 0x109BB},
{0x109D0, 0x109D1}, {0x10A04, 0x10A04}, {0x10A07, 0x10A0B},
{0x10A14, 0x10A14}, {0x10A18, 0x10A18}, {0x10A34, 0x10A37},
{0x10A3B, 0x10A3E}, {0x10A48, 0x10A4F}, {0x10A59, 0x10A5F},
{0x10AA0, 0x10ABF}, {0x10AE7, 0x10AEA}, {0x10AF7, 0x10AFF},
{0x10B36, 0x10B38}, {0x10B56, 0x10B57}, {0x10B73, 0x10B77},
{0x10B92, 0x10B98}, {0x10B9D, 0x10BA8}, {0x10BB0, 0x10BFF},
{0x10C49, 0x10C7F}, {0x10CB3, 0x10CBF}, {0x10CF3, 0x10CF9},
{0x10D00, 0x10E5F}, {0x10E7F, 0x10FFF}, {0x1104E, 0x11051},
{0x11070, 0x1107E}, {0x110C2, 0x110CF}, {0x110E9, 0x110EF},
{0x110FA, 0x110FF}, {0x11135, 0x11135}, {0x11144, 0x1114F},
{0x11177, 0x1117F}, {0x111CE, 0x111CF}, {0x111E0, 0x111E0},
{0x111F5, 0x111FF}, {0x11212, 0x11212}, {0x1123F, 0x1127F},
{0x11287, 0x11287}, {0x11289, 0x11289}, {0x1128E, 0x1128E},
{0x1129E, 0x1129E}, {0x112AA, 0x112AF}, {0x112EB, 0x112EF},
{0x112FA, 0x112FF}, {0x11304, 0x11304}, {0x1130D, 0x1130E},
{0x11311, 0x11312}, {0x11329, 0x11329}, {0x11331, 0x11331},
{0x11334, 0x11334}, {0x1133A, 0x1133B}, {0x11345, 0x11346},
{0x11349, 0x1134A}, {0x1134E, 0x1134F}, {0x11351, 0x11356},
{0x11358, 0x1135C}, {0x11364, 0x11365}, {0x1136D, 0x1136F},
{0x11375, 0x113FF}, {0x1145A, 0x1145A}, {0x1145C, 0x1145C},
{0x1145E, 0x1147F}, {0x114C8, 0x114CF}, {0x114DA, 0x1157F},
{0x115B6, 0x115B7}, {0x115DE, 0x115FF}, {0x11645, 0x1164F},
{0x1165A, 0x1165F}, {0x1166D, 0x1167F}, {0x116B8, 0x116BF},
{0x116CA, 0x116FF}, {0x1171A, 0x1171C}, {0x1172C, 0x1172F},
{0x11740, 0x1189F}, {0x118F3, 0x118FE}, {0x11900, 0x11ABF},
{0x11AF9, 0x11BFF}, {0x11C09, 0x11C09}, {0x11C37, 0x11C37},
{0x11C46, 0x11C4F}, {0x11C6D, 0x11C6F}, {0x11C90, 0x11C91},
{0x11CA8, 0x11CA8}, {0x11CB7, 0x11FFF}, {0x1239A, 0x123FF},
{0x1246F, 0x1246F}, {0x12475, 0x1247F}, {0x12544, 0x12FFF},
{0x1342F, 0x143FF}, {0x14647, 0x167FF}, {0x16A39, 0x16A3F},
{0x16A5F, 0x16A5F}, {0x16A6A, 0x16A6D}, {0x16A70, 0x16ACF},
{0x16AEE, 0x16AEF}, {0x16AF6, 0x16AFF}, {0x16B46, 0x16B4F},
{0x16B5A, 0x16B5A}, {0x16B62, 0x16B62}, {0x16B78, 0x16B7C},
{0x16B90, 0x16EFF}, {0x16F45, 0x16F4F}, {0x16F7F, 0x16F8E},
{0x16FA0, 0x16FDF}, {0x16FE1, 0x16FFF}, {0x187ED, 0x187FF},
{0x18AF3, 0x1AFFF}, {0x1B002, 0x1BBFF}, {0x1BC6B, 0x1BC6F},
{0x1BC7D, 0x1BC7F}, {0x1BC89, 0x1BC8F}, {0x1BC9A, 0x1BC9B},
{0x1BCA4, 0x1CFFF}, {0x1D0F6, 0x1D0FF}, {0x1D127, 0x1D128},
{0x1D1E9, 0x1D1FF}, {0x1D246, 0x1D2FF}, {0x1D357, 0x1D35F},
{0x1D372, 0x1D3FF}, {0x1D455, 0x1D455}, {0x1D49D, 0x1D49D},
{0x1D4A0, 0x1D4A1}, {0x1D4A3, 0x1D4A4}, {0x1D4A7, 0x1D4A8},
{0x1D4AD, 0x1D4AD}, {0x1D4BA, 0x1D4BA}, {0x1D4BC, 0x1D4BC},
{0x1D4C4, 0x1D4C4}, {0x1D506, 0x1D506}, {0x1D50B, 0x1D50C},
{0x1D515, 0x1D515}, {0x1D51D, 0x1D51D}, {0x1D53A, 0x1D53A},
{0x1D53F, 0x1D53F}, {0x1D545, 0x1D545}, {0x1D547, 0x1D549},
{0x1D551, 0x1D551}, {0x1D6A6, 0x1D6A7}, {0x1D7CC, 0x1D7CD},
{0x1DA8C, 0x1DA9A}, {0x1DAA0, 0x1DAA0}, {0x1DAB0, 0x1DFFF},
{0x1E007, 0x1E007}, {0x1E019, 0x1E01A}, {0x1E022, 0x1E022},
{0x1E025, 0x1E025}, {0x1E02B, 0x1E7FF}, {0x1E8C5, 0x1E8C6},
{0x1E8D7, 0x1E8FF}, {0x1E94B, 0x1E94F}, {0x1E95A, 0x1E95D},
{0x1E960, 0x1EDFF}, {0x1EE04, 0x1EE04}, {0x1EE20, 0x1EE20},
{0x1EE23, 0x1EE23}, {0x1EE25, 0x1EE26}, {0x1EE28, 0x1EE28},
{0x1EE33, 0x1EE33}, {0x1EE38, 0x1EE38}, {0x1EE3A, 0x1EE3A},
{0x1EE3C, 0x1EE41}, {0x1EE43, 0x1EE46}, {0x1EE48, 0x1EE48},
{0x1EE4A, 0x1EE4A}, {0x1EE4C, 0x1EE4C}, {0x1EE50, 0x1EE50},
{0x1EE53, 0x1EE53}, {0x1EE55, 0x1EE56}, {0x1EE58, 0x1EE58},
{0x1EE5A, 0x1EE5A}, {0x1EE5C, 0x1EE5C}, {0x1EE5E, 0x1EE5E},
{0x1EE60, 0x1EE60}, {0x1EE63, 0x1EE63}, {0x1EE65, 0x1EE66},
{0x1EE6B, 0x1EE6B}, {0x1EE73, 0x1EE73}, {0x1EE78, 0x1EE78},
{0x1EE7D, 0x1EE7D}, {0x1EE7F, 0x1EE7F}, {0x1EE8A, 0x1EE8A},
{0x1EE9C, 0x1EEA0}, {0x1EEA4, 0x1EEA4}, {0x1EEAA, 0x1EEAA},
{0x1EEBC, 0x1EEEF}, {0x1EEF2, 0x1EFFF}, {0x1F02C, 0x1F02F},
{0x1F094, 0x1F09F}, {0x1F0AF, 0x1F0B0}, {0x1F0C0, 0x1F0C0},
{0x1F0D0, 0x1F0D0}, {0x1F0F6, 0x1F0FF}, {0x1F10D, 0x1F10F},
{0x1F12F, 0x1F12F}, {0x1F16C, 0x1F16F}, {0x1F1AD, 0x1F1E5},
{0x1F203, 0x1F20F}, {0x1F23C, 0x1F23F}, {0x1F249, 0x1F24F},
{0x1F252, 0x1F2FF}, {0x1F6D3, 0x1F6DF}, {0x1F6ED, 0x1F6EF},
{0x1F6F7, 0x1F6FF}, {0x1F774, 0x1F77F}, {0x1F7D5, 0x1F7FF},
{0x1F80C, 0x1F80F}, {0x1F848, 0x1F84F}, {0x1F85A, 0x1F85F},
{0x1F888, 0x1F88F}, {0x1F8AE, 0x1F90F}, {0x1F91F, 0x1F91F},
{0x1F928, 0x1F92F}, {0x1F931, 0x1F932}, {0x1F93F, 0x1F93F},
{0x1F94C, 0x1F94F}, {0x1F95F, 0x1F97F}, {0x1F992, 0x1F9BF},
{0x1F9C1, 0x1FFFF}, {0x2A6D7, 0x2A6FF}, {0x2B735, 0x2B73F},
{0x2B81E, 0x2B81F}, {0x2CEA2, 0x2F7FF}, {0x2FA1E, 0xE0000},
{0xE0002, 0xE001F}, {0xE0080, 0xE00FF}, {0xE01F0, 0xEFFFF},
{0xFFFFE, 0xFFFFF},
}
var neutral = table{
{0x0000, 0x001F}, {0x007F, 0x00A0}, {0x00A9, 0x00A9},
{0x00AB, 0x00AB}, {0x00B5, 0x00B5}, {0x00BB, 0x00BB},
{0x00C0, 0x00C5}, {0x00C7, 0x00CF}, {0x00D1, 0x00D6},
{0x00D9, 0x00DD}, {0x00E2, 0x00E5}, {0x00E7, 0x00E7},
{0x00EB, 0x00EB}, {0x00EE, 0x00EF}, {0x00F1, 0x00F1},
{0x00F4, 0x00F6}, {0x00FB, 0x00FB}, {0x00FD, 0x00FD},
{0x00FF, 0x0100}, {0x0102, 0x0110}, {0x0112, 0x0112},
{0x0114, 0x011A}, {0x011C, 0x0125}, {0x0128, 0x012A},
{0x012C, 0x0130}, {0x0134, 0x0137}, {0x0139, 0x013E},
{0x0143, 0x0143}, {0x0145, 0x0147}, {0x014C, 0x014C},
{0x014E, 0x0151}, {0x0154, 0x0165}, {0x0168, 0x016A},
{0x016C, 0x01CD}, {0x01CF, 0x01CF}, {0x01D1, 0x01D1},
{0x01D3, 0x01D3}, {0x01D5, 0x01D5}, {0x01D7, 0x01D7},
{0x01D9, 0x01D9}, {0x01DB, 0x01DB}, {0x01DD, 0x0250},
{0x0252, 0x0260}, {0x0262, 0x02C3}, {0x02C5, 0x02C6},
{0x02C8, 0x02C8}, {0x02CC, 0x02CC}, {0x02CE, 0x02CF},
{0x02D1, 0x02D7}, {0x02DC, 0x02DC}, {0x02DE, 0x02DE},
{0x02E0, 0x02FF}, {0x0370, 0x0377}, {0x037A, 0x037F},
{0x0384, 0x038A}, {0x038C, 0x038C}, {0x038E, 0x0390},
{0x03AA, 0x03B0}, {0x03C2, 0x03C2}, {0x03CA, 0x0400},
{0x0402, 0x040F}, {0x0450, 0x0450}, {0x0452, 0x052F},
{0x0531, 0x0556}, {0x0559, 0x055F}, {0x0561, 0x0587},
{0x0589, 0x058A}, {0x058D, 0x058F}, {0x0591, 0x05C7},
{0x05D0, 0x05EA}, {0x05F0, 0x05F4}, {0x0600, 0x061C},
{0x061E, 0x070D}, {0x070F, 0x074A}, {0x074D, 0x07B1},
{0x07C0, 0x07FA}, {0x0800, 0x082D}, {0x0830, 0x083E},
{0x0840, 0x085B}, {0x085E, 0x085E}, {0x08A0, 0x08B4},
{0x08B6, 0x08BD}, {0x08D4, 0x0983}, {0x0985, 0x098C},
{0x098F, 0x0990}, {0x0993, 0x09A8}, {0x09AA, 0x09B0},
{0x09B2, 0x09B2}, {0x09B6, 0x09B9}, {0x09BC, 0x09C4},
{0x09C7, 0x09C8}, {0x09CB, 0x09CE}, {0x09D7, 0x09D7},
{0x09DC, 0x09DD}, {0x09DF, 0x09E3}, {0x09E6, 0x09FB},
{0x0A01, 0x0A03}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10},
{0x0A13, 0x0A28}, {0x0A2A, 0x0A30}, {0x0A32, 0x0A33},
{0x0A35, 0x0A36}, {0x0A38, 0x0A39}, {0x0A3C, 0x0A3C},
{0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D},
{0x0A51, 0x0A51}, {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E},
{0x0A66, 0x0A75}, {0x0A81, 0x0A83}, {0x0A85, 0x0A8D},
{0x0A8F, 0x0A91}, {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0},
{0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABC, 0x0AC5},
{0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0AD0, 0x0AD0},
{0x0AE0, 0x0AE3}, {0x0AE6, 0x0AF1}, {0x0AF9, 0x0AF9},
{0x0B01, 0x0B03}, {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10},
{0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
{0x0B35, 0x0B39}, {0x0B3C, 0x0B44}, {0x0B47, 0x0B48},
{0x0B4B, 0x0B4D}, {0x0B56, 0x0B57}, {0x0B5C, 0x0B5D},
{0x0B5F, 0x0B63}, {0x0B66, 0x0B77}, {0x0B82, 0x0B83},
{0x0B85, 0x0B8A}, {0x0B8E, 0x0B90}, {0x0B92, 0x0B95},
{0x0B99, 0x0B9A}, {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F},
{0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9},
{0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD},
{0x0BD0, 0x0BD0}, {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BFA},
{0x0C00, 0x0C03}, {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10},
{0x0C12, 0x0C28}, {0x0C2A, 0x0C39}, {0x0C3D, 0x0C44},
{0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56},
{0x0C58, 0x0C5A}, {0x0C60, 0x0C63}, {0x0C66, 0x0C6F},
{0x0C78, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
{0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9},
{0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD},
{0x0CD5, 0x0CD6}, {0x0CDE, 0x0CDE}, {0x0CE0, 0x0CE3},
{0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2}, {0x0D01, 0x0D03},
{0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D3A},
{0x0D3D, 0x0D44}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4F},
{0x0D54, 0x0D63}, {0x0D66, 0x0D7F}, {0x0D82, 0x0D83},
{0x0D85, 0x0D96}, {0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB},
{0x0DBD, 0x0DBD}, {0x0DC0, 0x0DC6}, {0x0DCA, 0x0DCA},
{0x0DCF, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF},
{0x0DE6, 0x0DEF}, {0x0DF2, 0x0DF4}, {0x0E01, 0x0E3A},
{0x0E3F, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E84, 0x0E84},
{0x0E87, 0x0E88}, {0x0E8A, 0x0E8A}, {0x0E8D, 0x0E8D},
{0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3},
{0x0EA5, 0x0EA5}, {0x0EA7, 0x0EA7}, {0x0EAA, 0x0EAB},
{0x0EAD, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
{0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9},
{0x0EDC, 0x0EDF}, {0x0F00, 0x0F47}, {0x0F49, 0x0F6C},
{0x0F71, 0x0F97}, {0x0F99, 0x0FBC}, {0x0FBE, 0x0FCC},
{0x0FCE, 0x0FDA}, {0x1000, 0x10C5}, {0x10C7, 0x10C7},
{0x10CD, 0x10CD}, {0x10D0, 0x10FF}, {0x1160, 0x1248},
{0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258},
{0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D},
{0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x12B8, 0x12BE},
{0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6},
{0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A},
{0x135D, 0x137C}, {0x1380, 0x1399}, {0x13A0, 0x13F5},
{0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x16A0, 0x16F8},
{0x1700, 0x170C}, {0x170E, 0x1714}, {0x1720, 0x1736},
{0x1740, 0x1753}, {0x1760, 0x176C}, {0x176E, 0x1770},
{0x1772, 0x1773}, {0x1780, 0x17DD}, {0x17E0, 0x17E9},
{0x17F0, 0x17F9}, {0x1800, 0x180E}, {0x1810, 0x1819},
{0x1820, 0x1877}, {0x1880, 0x18AA}, {0x18B0, 0x18F5},
{0x1900, 0x191E}, {0x1920, 0x192B}, {0x1930, 0x193B},
{0x1940, 0x1940}, {0x1944, 0x196D}, {0x1970, 0x1974},
{0x1980, 0x19AB}, {0x19B0, 0x19C9}, {0x19D0, 0x19DA},
{0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E}, {0x1A60, 0x1A7C},
{0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, {0x1AA0, 0x1AAD},
{0x1AB0, 0x1ABE}, {0x1B00, 0x1B4B}, {0x1B50, 0x1B7C},
{0x1B80, 0x1BF3}, {0x1BFC, 0x1C37}, {0x1C3B, 0x1C49},
{0x1C4D, 0x1C88}, {0x1CC0, 0x1CC7}, {0x1CD0, 0x1CF6},
{0x1CF8, 0x1CF9}, {0x1D00, 0x1DF5}, {0x1DFB, 0x1F15},
{0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
{0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B},
{0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4},
{0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3}, {0x1FD6, 0x1FDB},
{0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFE},
{0x2000, 0x200F}, {0x2011, 0x2012}, {0x2017, 0x2017},
{0x201A, 0x201B}, {0x201E, 0x201F}, {0x2023, 0x2023},
{0x2028, 0x202F}, {0x2031, 0x2031}, {0x2034, 0x2034},
{0x2036, 0x203A}, {0x203C, 0x203D}, {0x203F, 0x2064},
{0x2066, 0x2071}, {0x2075, 0x207E}, {0x2080, 0x2080},
{0x2085, 0x208E}, {0x2090, 0x209C}, {0x20A0, 0x20A8},
{0x20AA, 0x20AB}, {0x20AD, 0x20BE}, {0x20D0, 0x20F0},
{0x2100, 0x2102}, {0x2104, 0x2104}, {0x2106, 0x2108},
{0x210A, 0x2112}, {0x2114, 0x2115}, {0x2117, 0x2120},
{0x2123, 0x2125}, {0x2127, 0x212A}, {0x212C, 0x2152},
{0x2155, 0x215A}, {0x215F, 0x215F}, {0x216C, 0x216F},
{0x217A, 0x2188}, {0x218A, 0x218B}, {0x219A, 0x21B7},
{0x21BA, 0x21D1}, {0x21D3, 0x21D3}, {0x21D5, 0x21E6},
{0x21E8, 0x21FF}, {0x2201, 0x2201}, {0x2204, 0x2206},
{0x2209, 0x220A}, {0x220C, 0x220E}, {0x2210, 0x2210},
{0x2212, 0x2214}, {0x2216, 0x2219}, {0x221B, 0x221C},
{0x2221, 0x2222}, {0x2224, 0x2224}, {0x2226, 0x2226},
{0x222D, 0x222D}, {0x222F, 0x2233}, {0x2238, 0x223B},
{0x223E, 0x2247}, {0x2249, 0x224B}, {0x224D, 0x2251},
{0x2253, 0x225F}, {0x2262, 0x2263}, {0x2268, 0x2269},
{0x226C, 0x226D}, {0x2270, 0x2281}, {0x2284, 0x2285},
{0x2288, 0x2294}, {0x2296, 0x2298}, {0x229A, 0x22A4},
{0x22A6, 0x22BE}, {0x22C0, 0x2311}, {0x2313, 0x2319},
{0x231C, 0x2328}, {0x232B, 0x23E8}, {0x23ED, 0x23EF},
{0x23F1, 0x23F2}, {0x23F4, 0x23FE}, {0x2400, 0x2426},
{0x2440, 0x244A}, {0x24EA, 0x24EA}, {0x254C, 0x254F},
{0x2574, 0x257F}, {0x2590, 0x2591}, {0x2596, 0x259F},
{0x25A2, 0x25A2}, {0x25AA, 0x25B1}, {0x25B4, 0x25B5},
{0x25B8, 0x25BB}, {0x25BE, 0x25BF}, {0x25C2, 0x25C5},
{0x25C9, 0x25CA}, {0x25CC, 0x25CD}, {0x25D2, 0x25E1},
{0x25E6, 0x25EE}, {0x25F0, 0x25FC}, {0x25FF, 0x2604},
{0x2607, 0x2608}, {0x260A, 0x260D}, {0x2610, 0x2613},
{0x2616, 0x261B}, {0x261D, 0x261D}, {0x261F, 0x263F},
{0x2641, 0x2641}, {0x2643, 0x2647}, {0x2654, 0x265F},
{0x2662, 0x2662}, {0x2666, 0x2666}, {0x266B, 0x266B},
{0x266E, 0x266E}, {0x2670, 0x267E}, {0x2680, 0x2692},
{0x2694, 0x269D}, {0x26A0, 0x26A0}, {0x26A2, 0x26A9},
{0x26AC, 0x26BC}, {0x26C0, 0x26C3}, {0x26E2, 0x26E2},
{0x26E4, 0x26E7}, {0x2700, 0x2704}, {0x2706, 0x2709},
{0x270C, 0x2727}, {0x2729, 0x273C}, {0x273E, 0x274B},
{0x274D, 0x274D}, {0x274F, 0x2752}, {0x2756, 0x2756},
{0x2758, 0x2775}, {0x2780, 0x2794}, {0x2798, 0x27AF},
{0x27B1, 0x27BE}, {0x27C0, 0x27E5}, {0x27EE, 0x2984},
{0x2987, 0x2B1A}, {0x2B1D, 0x2B4F}, {0x2B51, 0x2B54},
{0x2B5A, 0x2B73}, {0x2B76, 0x2B95}, {0x2B98, 0x2BB9},
{0x2BBD, 0x2BC8}, {0x2BCA, 0x2BD1}, {0x2BEC, 0x2BEF},
{0x2C00, 0x2C2E}, {0x2C30, 0x2C5E}, {0x2C60, 0x2CF3},
{0x2CF9, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D},
{0x2D30, 0x2D67}, {0x2D6F, 0x2D70}, {0x2D7F, 0x2D96},
{0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6},
{0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE},
{0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x2DE0, 0x2E44},
{0x303F, 0x303F}, {0x4DC0, 0x4DFF}, {0xA4D0, 0xA62B},
{0xA640, 0xA6F7}, {0xA700, 0xA7AE}, {0xA7B0, 0xA7B7},
{0xA7F7, 0xA82B}, {0xA830, 0xA839}, {0xA840, 0xA877},
{0xA880, 0xA8C5}, {0xA8CE, 0xA8D9}, {0xA8E0, 0xA8FD},
{0xA900, 0xA953}, {0xA95F, 0xA95F}, {0xA980, 0xA9CD},
{0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xAA00, 0xAA36},
{0xAA40, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA5C, 0xAAC2},
{0xAADB, 0xAAF6}, {0xAB01, 0xAB06}, {0xAB09, 0xAB0E},
{0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E},
{0xAB30, 0xAB65}, {0xAB70, 0xABED}, {0xABF0, 0xABF9},
{0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, {0xD800, 0xDFFF},
{0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB36},
{0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41},
{0xFB43, 0xFB44}, {0xFB46, 0xFBC1}, {0xFBD3, 0xFD3F},
{0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFD},
{0xFE20, 0xFE2F}, {0xFE70, 0xFE74}, {0xFE76, 0xFEFC},
{0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFC}, {0x10000, 0x1000B},
{0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D},
{0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x10080, 0x100FA},
{0x10100, 0x10102}, {0x10107, 0x10133}, {0x10137, 0x1018E},
{0x10190, 0x1019B}, {0x101A0, 0x101A0}, {0x101D0, 0x101FD},
{0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x102E0, 0x102FB},
{0x10300, 0x10323}, {0x10330, 0x1034A}, {0x10350, 0x1037A},
{0x10380, 0x1039D}, {0x1039F, 0x103C3}, {0x103C8, 0x103D5},
{0x10400, 0x1049D}, {0x104A0, 0x104A9}, {0x104B0, 0x104D3},
{0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563},
{0x1056F, 0x1056F}, {0x10600, 0x10736}, {0x10740, 0x10755},
{0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808},
{0x1080A, 0x10835}, {0x10837, 0x10838}, {0x1083C, 0x1083C},
{0x1083F, 0x10855}, {0x10857, 0x1089E}, {0x108A7, 0x108AF},
{0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x108FB, 0x1091B},
{0x1091F, 0x10939}, {0x1093F, 0x1093F}, {0x10980, 0x109B7},
{0x109BC, 0x109CF}, {0x109D2, 0x10A03}, {0x10A05, 0x10A06},
{0x10A0C, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A33},
{0x10A38, 0x10A3A}, {0x10A3F, 0x10A47}, {0x10A50, 0x10A58},
{0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6}, {0x10AEB, 0x10AF6},
{0x10B00, 0x10B35}, {0x10B39, 0x10B55}, {0x10B58, 0x10B72},
{0x10B78, 0x10B91}, {0x10B99, 0x10B9C}, {0x10BA9, 0x10BAF},
{0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2},
{0x10CFA, 0x10CFF}, {0x10E60, 0x10E7E}, {0x11000, 0x1104D},
{0x11052, 0x1106F}, {0x1107F, 0x110C1}, {0x110D0, 0x110E8},
{0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11143},
{0x11150, 0x11176}, {0x11180, 0x111CD}, {0x111D0, 0x111DF},
{0x111E1, 0x111F4}, {0x11200, 0x11211}, {0x11213, 0x1123E},
{0x11280, 0x11286}, {0x11288, 0x11288}, {0x1128A, 0x1128D},
{0x1128F, 0x1129D}, {0x1129F, 0x112A9}, {0x112B0, 0x112EA},
{0x112F0, 0x112F9}, {0x11300, 0x11303}, {0x11305, 0x1130C},
{0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330},
{0x11332, 0x11333}, {0x11335, 0x11339}, {0x1133C, 0x11344},
{0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11350, 0x11350},
{0x11357, 0x11357}, {0x1135D, 0x11363}, {0x11366, 0x1136C},
{0x11370, 0x11374}, {0x11400, 0x11459}, {0x1145B, 0x1145B},
{0x1145D, 0x1145D}, {0x11480, 0x114C7}, {0x114D0, 0x114D9},
{0x11580, 0x115B5}, {0x115B8, 0x115DD}, {0x11600, 0x11644},
{0x11650, 0x11659}, {0x11660, 0x1166C}, {0x11680, 0x116B7},
{0x116C0, 0x116C9}, {0x11700, 0x11719}, {0x1171D, 0x1172B},
{0x11730, 0x1173F}, {0x118A0, 0x118F2}, {0x118FF, 0x118FF},
{0x11AC0, 0x11AF8}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C36},
{0x11C38, 0x11C45}, {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F},
{0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x12000, 0x12399},
{0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543},
{0x13000, 0x1342E}, {0x14400, 0x14646}, {0x16800, 0x16A38},
{0x16A40, 0x16A5E}, {0x16A60, 0x16A69}, {0x16A6E, 0x16A6F},
{0x16AD0, 0x16AED}, {0x16AF0, 0x16AF5}, {0x16B00, 0x16B45},
{0x16B50, 0x16B59}, {0x16B5B, 0x16B61}, {0x16B63, 0x16B77},
{0x16B7D, 0x16B8F}, {0x16F00, 0x16F44}, {0x16F50, 0x16F7E},
{0x16F8F, 0x16F9F}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C},
{0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BCA3},
{0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D1E8},
{0x1D200, 0x1D245}, {0x1D300, 0x1D356}, {0x1D360, 0x1D371},
{0x1D400, 0x1D454}, {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F},
{0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC},
{0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3},
{0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514},
{0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E},
{0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550},
{0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B},
{0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006},
{0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024},
{0x1E026, 0x1E02A}, {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6},
{0x1E900, 0x1E94A}, {0x1E950, 0x1E959}, {0x1E95E, 0x1E95F},
{0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22},
{0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27}, {0x1EE29, 0x1EE32},
{0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B},
{0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49},
{0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52},
{0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59},
{0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, {0x1EE5F, 0x1EE5F},
{0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A},
{0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C},
{0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B},
{0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB},
{0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F003}, {0x1F005, 0x1F02B},
{0x1F030, 0x1F093}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF},
{0x1F0C1, 0x1F0CE}, {0x1F0D1, 0x1F0F5}, {0x1F10B, 0x1F10C},
{0x1F12E, 0x1F12E}, {0x1F16A, 0x1F16B}, {0x1F1E6, 0x1F1FF},
{0x1F321, 0x1F32C}, {0x1F336, 0x1F336}, {0x1F37D, 0x1F37D},
{0x1F394, 0x1F39F}, {0x1F3CB, 0x1F3CE}, {0x1F3D4, 0x1F3DF},
{0x1F3F1, 0x1F3F3}, {0x1F3F5, 0x1F3F7}, {0x1F43F, 0x1F43F},
{0x1F441, 0x1F441}, {0x1F4FD, 0x1F4FE}, {0x1F53E, 0x1F54A},
{0x1F54F, 0x1F54F}, {0x1F568, 0x1F579}, {0x1F57B, 0x1F594},
{0x1F597, 0x1F5A3}, {0x1F5A5, 0x1F5FA}, {0x1F650, 0x1F67F},
{0x1F6C6, 0x1F6CB}, {0x1F6CD, 0x1F6CF}, {0x1F6E0, 0x1F6EA},
{0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F773}, {0x1F780, 0x1F7D4},
{0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859},
{0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0xE0001, 0xE0001},
{0xE0020, 0xE007F},
}
// Condition have flag EastAsianWidth whether the current locale is CJK or not. // Condition have flag EastAsianWidth whether the current locale is CJK or not.
type Condition struct { type Condition struct {
EastAsianWidth bool EastAsianWidth bool
@ -822,11 +102,9 @@ func NewCondition() *Condition {
// See http://www.unicode.org/reports/tr11/ // See http://www.unicode.org/reports/tr11/
func (c *Condition) RuneWidth(r rune) int { func (c *Condition) RuneWidth(r rune) int {
switch { switch {
case r < 0 || r > 0x10FFFF || case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining, notassigned):
inTables(r, nonprint, combining, notassigned):
return 0 return 0
case (c.EastAsianWidth && IsAmbiguousWidth(r)) || case (c.EastAsianWidth && IsAmbiguousWidth(r)) || inTables(r, doublewidth):
inTables(r, doublewidth, emoji):
return 2 return 2
default: default:
return 1 return 1
@ -848,9 +126,12 @@ func (c *Condition) stringWidthZeroJoiner(s string) (width int) {
} }
w := c.RuneWidth(r) w := c.RuneWidth(r)
if r2 == 0x200D && inTables(r, emoji) && inTables(r1, emoji) { if r2 == 0x200D && inTables(r, emoji) && inTables(r1, emoji) {
w = 0 if width < w {
width = w
} }
} else {
width += w width += w
}
r1, r2 = r2, r r1, r2 = r2, r
} }
return width return width

427
vendor/github.com/mattn/go-runewidth/runewidth_table.go generated vendored Normal file
View file

@ -0,0 +1,427 @@
package runewidth
var combining = table{
{0x0300, 0x036F}, {0x0483, 0x0489}, {0x07EB, 0x07F3},
{0x0C00, 0x0C00}, {0x0C04, 0x0C04}, {0x0D00, 0x0D01},
{0x135D, 0x135F}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1ABE},
{0x1B6B, 0x1B73}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF},
{0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2DE0, 0x2DFF},
{0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D},
{0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA8E0, 0xA8F1},
{0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x10376, 0x1037A},
{0x10F46, 0x10F50}, {0x11300, 0x11301}, {0x1133B, 0x1133C},
{0x11366, 0x1136C}, {0x11370, 0x11374}, {0x16AF0, 0x16AF4},
{0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182},
{0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244},
{0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021},
{0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E8D0, 0x1E8D6},
}
var doublewidth = table{
{0x1100, 0x115F}, {0x231A, 0x231B}, {0x2329, 0x232A},
{0x23E9, 0x23EC}, {0x23F0, 0x23F0}, {0x23F3, 0x23F3},
{0x25FD, 0x25FE}, {0x2614, 0x2615}, {0x2648, 0x2653},
{0x267F, 0x267F}, {0x2693, 0x2693}, {0x26A1, 0x26A1},
{0x26AA, 0x26AB}, {0x26BD, 0x26BE}, {0x26C4, 0x26C5},
{0x26CE, 0x26CE}, {0x26D4, 0x26D4}, {0x26EA, 0x26EA},
{0x26F2, 0x26F3}, {0x26F5, 0x26F5}, {0x26FA, 0x26FA},
{0x26FD, 0x26FD}, {0x2705, 0x2705}, {0x270A, 0x270B},
{0x2728, 0x2728}, {0x274C, 0x274C}, {0x274E, 0x274E},
{0x2753, 0x2755}, {0x2757, 0x2757}, {0x2795, 0x2797},
{0x27B0, 0x27B0}, {0x27BF, 0x27BF}, {0x2B1B, 0x2B1C},
{0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99},
{0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB},
{0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF},
{0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31BA},
{0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247},
{0x3250, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6},
{0xA960, 0xA97C}, {0xAC00, 0xD7A3}, {0xF900, 0xFAFF},
{0xFE10, 0xFE19}, {0xFE30, 0xFE52}, {0xFE54, 0xFE66},
{0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6},
{0x16FE0, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18AF2},
{0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167},
{0x1B170, 0x1B2FB}, {0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF},
{0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A}, {0x1F200, 0x1F202},
{0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, {0x1F250, 0x1F251},
{0x1F260, 0x1F265}, {0x1F300, 0x1F320}, {0x1F32D, 0x1F335},
{0x1F337, 0x1F37C}, {0x1F37E, 0x1F393}, {0x1F3A0, 0x1F3CA},
{0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0}, {0x1F3F4, 0x1F3F4},
{0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440}, {0x1F442, 0x1F4FC},
{0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E}, {0x1F550, 0x1F567},
{0x1F57A, 0x1F57A}, {0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A4},
{0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC},
{0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D5}, {0x1F6EB, 0x1F6EC},
{0x1F6F4, 0x1F6FA}, {0x1F7E0, 0x1F7EB}, {0x1F90D, 0x1F971},
{0x1F973, 0x1F976}, {0x1F97A, 0x1F9A2}, {0x1F9A5, 0x1F9AA},
{0x1F9AE, 0x1F9CA}, {0x1F9CD, 0x1F9FF}, {0x1FA70, 0x1FA73},
{0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA82}, {0x1FA90, 0x1FA95},
{0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
}
var ambiguous = table{
{0x00A1, 0x00A1}, {0x00A4, 0x00A4}, {0x00A7, 0x00A8},
{0x00AA, 0x00AA}, {0x00AD, 0x00AE}, {0x00B0, 0x00B4},
{0x00B6, 0x00BA}, {0x00BC, 0x00BF}, {0x00C6, 0x00C6},
{0x00D0, 0x00D0}, {0x00D7, 0x00D8}, {0x00DE, 0x00E1},
{0x00E6, 0x00E6}, {0x00E8, 0x00EA}, {0x00EC, 0x00ED},
{0x00F0, 0x00F0}, {0x00F2, 0x00F3}, {0x00F7, 0x00FA},
{0x00FC, 0x00FC}, {0x00FE, 0x00FE}, {0x0101, 0x0101},
{0x0111, 0x0111}, {0x0113, 0x0113}, {0x011B, 0x011B},
{0x0126, 0x0127}, {0x012B, 0x012B}, {0x0131, 0x0133},
{0x0138, 0x0138}, {0x013F, 0x0142}, {0x0144, 0x0144},
{0x0148, 0x014B}, {0x014D, 0x014D}, {0x0152, 0x0153},
{0x0166, 0x0167}, {0x016B, 0x016B}, {0x01CE, 0x01CE},
{0x01D0, 0x01D0}, {0x01D2, 0x01D2}, {0x01D4, 0x01D4},
{0x01D6, 0x01D6}, {0x01D8, 0x01D8}, {0x01DA, 0x01DA},
{0x01DC, 0x01DC}, {0x0251, 0x0251}, {0x0261, 0x0261},
{0x02C4, 0x02C4}, {0x02C7, 0x02C7}, {0x02C9, 0x02CB},
{0x02CD, 0x02CD}, {0x02D0, 0x02D0}, {0x02D8, 0x02DB},
{0x02DD, 0x02DD}, {0x02DF, 0x02DF}, {0x0300, 0x036F},
{0x0391, 0x03A1}, {0x03A3, 0x03A9}, {0x03B1, 0x03C1},
{0x03C3, 0x03C9}, {0x0401, 0x0401}, {0x0410, 0x044F},
{0x0451, 0x0451}, {0x2010, 0x2010}, {0x2013, 0x2016},
{0x2018, 0x2019}, {0x201C, 0x201D}, {0x2020, 0x2022},
{0x2024, 0x2027}, {0x2030, 0x2030}, {0x2032, 0x2033},
{0x2035, 0x2035}, {0x203B, 0x203B}, {0x203E, 0x203E},
{0x2074, 0x2074}, {0x207F, 0x207F}, {0x2081, 0x2084},
{0x20AC, 0x20AC}, {0x2103, 0x2103}, {0x2105, 0x2105},
{0x2109, 0x2109}, {0x2113, 0x2113}, {0x2116, 0x2116},
{0x2121, 0x2122}, {0x2126, 0x2126}, {0x212B, 0x212B},
{0x2153, 0x2154}, {0x215B, 0x215E}, {0x2160, 0x216B},
{0x2170, 0x2179}, {0x2189, 0x2189}, {0x2190, 0x2199},
{0x21B8, 0x21B9}, {0x21D2, 0x21D2}, {0x21D4, 0x21D4},
{0x21E7, 0x21E7}, {0x2200, 0x2200}, {0x2202, 0x2203},
{0x2207, 0x2208}, {0x220B, 0x220B}, {0x220F, 0x220F},
{0x2211, 0x2211}, {0x2215, 0x2215}, {0x221A, 0x221A},
{0x221D, 0x2220}, {0x2223, 0x2223}, {0x2225, 0x2225},
{0x2227, 0x222C}, {0x222E, 0x222E}, {0x2234, 0x2237},
{0x223C, 0x223D}, {0x2248, 0x2248}, {0x224C, 0x224C},
{0x2252, 0x2252}, {0x2260, 0x2261}, {0x2264, 0x2267},
{0x226A, 0x226B}, {0x226E, 0x226F}, {0x2282, 0x2283},
{0x2286, 0x2287}, {0x2295, 0x2295}, {0x2299, 0x2299},
{0x22A5, 0x22A5}, {0x22BF, 0x22BF}, {0x2312, 0x2312},
{0x2460, 0x24E9}, {0x24EB, 0x254B}, {0x2550, 0x2573},
{0x2580, 0x258F}, {0x2592, 0x2595}, {0x25A0, 0x25A1},
{0x25A3, 0x25A9}, {0x25B2, 0x25B3}, {0x25B6, 0x25B7},
{0x25BC, 0x25BD}, {0x25C0, 0x25C1}, {0x25C6, 0x25C8},
{0x25CB, 0x25CB}, {0x25CE, 0x25D1}, {0x25E2, 0x25E5},
{0x25EF, 0x25EF}, {0x2605, 0x2606}, {0x2609, 0x2609},
{0x260E, 0x260F}, {0x261C, 0x261C}, {0x261E, 0x261E},
{0x2640, 0x2640}, {0x2642, 0x2642}, {0x2660, 0x2661},
{0x2663, 0x2665}, {0x2667, 0x266A}, {0x266C, 0x266D},
{0x266F, 0x266F}, {0x269E, 0x269F}, {0x26BF, 0x26BF},
{0x26C6, 0x26CD}, {0x26CF, 0x26D3}, {0x26D5, 0x26E1},
{0x26E3, 0x26E3}, {0x26E8, 0x26E9}, {0x26EB, 0x26F1},
{0x26F4, 0x26F4}, {0x26F6, 0x26F9}, {0x26FB, 0x26FC},
{0x26FE, 0x26FF}, {0x273D, 0x273D}, {0x2776, 0x277F},
{0x2B56, 0x2B59}, {0x3248, 0x324F}, {0xE000, 0xF8FF},
{0xFE00, 0xFE0F}, {0xFFFD, 0xFFFD}, {0x1F100, 0x1F10A},
{0x1F110, 0x1F12D}, {0x1F130, 0x1F169}, {0x1F170, 0x1F18D},
{0x1F18F, 0x1F190}, {0x1F19B, 0x1F1AC}, {0xE0100, 0xE01EF},
{0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD},
}
var notassigned = table{
{0x27E6, 0x27ED}, {0x2985, 0x2986},
}
var neutral = table{
{0x0000, 0x001F}, {0x007F, 0x00A0}, {0x00A9, 0x00A9},
{0x00AB, 0x00AB}, {0x00B5, 0x00B5}, {0x00BB, 0x00BB},
{0x00C0, 0x00C5}, {0x00C7, 0x00CF}, {0x00D1, 0x00D6},
{0x00D9, 0x00DD}, {0x00E2, 0x00E5}, {0x00E7, 0x00E7},
{0x00EB, 0x00EB}, {0x00EE, 0x00EF}, {0x00F1, 0x00F1},
{0x00F4, 0x00F6}, {0x00FB, 0x00FB}, {0x00FD, 0x00FD},
{0x00FF, 0x0100}, {0x0102, 0x0110}, {0x0112, 0x0112},
{0x0114, 0x011A}, {0x011C, 0x0125}, {0x0128, 0x012A},
{0x012C, 0x0130}, {0x0134, 0x0137}, {0x0139, 0x013E},
{0x0143, 0x0143}, {0x0145, 0x0147}, {0x014C, 0x014C},
{0x014E, 0x0151}, {0x0154, 0x0165}, {0x0168, 0x016A},
{0x016C, 0x01CD}, {0x01CF, 0x01CF}, {0x01D1, 0x01D1},
{0x01D3, 0x01D3}, {0x01D5, 0x01D5}, {0x01D7, 0x01D7},
{0x01D9, 0x01D9}, {0x01DB, 0x01DB}, {0x01DD, 0x0250},
{0x0252, 0x0260}, {0x0262, 0x02C3}, {0x02C5, 0x02C6},
{0x02C8, 0x02C8}, {0x02CC, 0x02CC}, {0x02CE, 0x02CF},
{0x02D1, 0x02D7}, {0x02DC, 0x02DC}, {0x02DE, 0x02DE},
{0x02E0, 0x02FF}, {0x0370, 0x0377}, {0x037A, 0x037F},
{0x0384, 0x038A}, {0x038C, 0x038C}, {0x038E, 0x0390},
{0x03AA, 0x03B0}, {0x03C2, 0x03C2}, {0x03CA, 0x0400},
{0x0402, 0x040F}, {0x0450, 0x0450}, {0x0452, 0x052F},
{0x0531, 0x0556}, {0x0559, 0x058A}, {0x058D, 0x058F},
{0x0591, 0x05C7}, {0x05D0, 0x05EA}, {0x05EF, 0x05F4},
{0x0600, 0x061C}, {0x061E, 0x070D}, {0x070F, 0x074A},
{0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D},
{0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E},
{0x0860, 0x086A}, {0x08A0, 0x08B4}, {0x08B6, 0x08BD},
{0x08D3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
{0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2},
{0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8},
{0x09CB, 0x09CE}, {0x09D7, 0x09D7}, {0x09DC, 0x09DD},
{0x09DF, 0x09E3}, {0x09E6, 0x09FE}, {0x0A01, 0x0A03},
{0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
{0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36},
{0x0A38, 0x0A39}, {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42},
{0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51},
{0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76},
{0x0A81, 0x0A83}, {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91},
{0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3},
{0x0AB5, 0x0AB9}, {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9},
{0x0ACB, 0x0ACD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3},
{0x0AE6, 0x0AF1}, {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03},
{0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28},
{0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39},
{0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
{0x0B56, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63},
{0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
{0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A},
{0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4},
{0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2},
{0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0},
{0x0BD7, 0x0BD7}, {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C},
{0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39},
{0x0C3D, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
{0x0C55, 0x0C56}, {0x0C58, 0x0C5A}, {0x0C60, 0x0C63},
{0x0C66, 0x0C6F}, {0x0C77, 0x0C8C}, {0x0C8E, 0x0C90},
{0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9},
{0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD},
{0x0CD5, 0x0CD6}, {0x0CDE, 0x0CDE}, {0x0CE0, 0x0CE3},
{0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2}, {0x0D00, 0x0D03},
{0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D44},
{0x0D46, 0x0D48}, {0x0D4A, 0x0D4F}, {0x0D54, 0x0D63},
{0x0D66, 0x0D7F}, {0x0D82, 0x0D83}, {0x0D85, 0x0D96},
{0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD},
{0x0DC0, 0x0DC6}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4},
{0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF},
{0x0DF2, 0x0DF4}, {0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B},
{0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A},
{0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD},
{0x0EC0, 0x0EC4}, {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD},
{0x0ED0, 0x0ED9}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F47},
{0x0F49, 0x0F6C}, {0x0F71, 0x0F97}, {0x0F99, 0x0FBC},
{0x0FBE, 0x0FCC}, {0x0FCE, 0x0FDA}, {0x1000, 0x10C5},
{0x10C7, 0x10C7}, {0x10CD, 0x10CD}, {0x10D0, 0x10FF},
{0x1160, 0x1248}, {0x124A, 0x124D}, {0x1250, 0x1256},
{0x1258, 0x1258}, {0x125A, 0x125D}, {0x1260, 0x1288},
{0x128A, 0x128D}, {0x1290, 0x12B0}, {0x12B2, 0x12B5},
{0x12B8, 0x12BE}, {0x12C0, 0x12C0}, {0x12C2, 0x12C5},
{0x12C8, 0x12D6}, {0x12D8, 0x1310}, {0x1312, 0x1315},
{0x1318, 0x135A}, {0x135D, 0x137C}, {0x1380, 0x1399},
{0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1400, 0x169C},
{0x16A0, 0x16F8}, {0x1700, 0x170C}, {0x170E, 0x1714},
{0x1720, 0x1736}, {0x1740, 0x1753}, {0x1760, 0x176C},
{0x176E, 0x1770}, {0x1772, 0x1773}, {0x1780, 0x17DD},
{0x17E0, 0x17E9}, {0x17F0, 0x17F9}, {0x1800, 0x180E},
{0x1810, 0x1819}, {0x1820, 0x1878}, {0x1880, 0x18AA},
{0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1920, 0x192B},
{0x1930, 0x193B}, {0x1940, 0x1940}, {0x1944, 0x196D},
{0x1970, 0x1974}, {0x1980, 0x19AB}, {0x19B0, 0x19C9},
{0x19D0, 0x19DA}, {0x19DE, 0x1A1B}, {0x1A1E, 0x1A5E},
{0x1A60, 0x1A7C}, {0x1A7F, 0x1A89}, {0x1A90, 0x1A99},
{0x1AA0, 0x1AAD}, {0x1AB0, 0x1ABE}, {0x1B00, 0x1B4B},
{0x1B50, 0x1B7C}, {0x1B80, 0x1BF3}, {0x1BFC, 0x1C37},
{0x1C3B, 0x1C49}, {0x1C4D, 0x1C88}, {0x1C90, 0x1CBA},
{0x1CBD, 0x1CC7}, {0x1CD0, 0x1CFA}, {0x1D00, 0x1DF9},
{0x1DFB, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45},
{0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59},
{0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D},
{0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3},
{0x1FD6, 0x1FDB}, {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4},
{0x1FF6, 0x1FFE}, {0x2000, 0x200F}, {0x2011, 0x2012},
{0x2017, 0x2017}, {0x201A, 0x201B}, {0x201E, 0x201F},
{0x2023, 0x2023}, {0x2028, 0x202F}, {0x2031, 0x2031},
{0x2034, 0x2034}, {0x2036, 0x203A}, {0x203C, 0x203D},
{0x203F, 0x2064}, {0x2066, 0x2071}, {0x2075, 0x207E},
{0x2080, 0x2080}, {0x2085, 0x208E}, {0x2090, 0x209C},
{0x20A0, 0x20A8}, {0x20AA, 0x20AB}, {0x20AD, 0x20BF},
{0x20D0, 0x20F0}, {0x2100, 0x2102}, {0x2104, 0x2104},
{0x2106, 0x2108}, {0x210A, 0x2112}, {0x2114, 0x2115},
{0x2117, 0x2120}, {0x2123, 0x2125}, {0x2127, 0x212A},
{0x212C, 0x2152}, {0x2155, 0x215A}, {0x215F, 0x215F},
{0x216C, 0x216F}, {0x217A, 0x2188}, {0x218A, 0x218B},
{0x219A, 0x21B7}, {0x21BA, 0x21D1}, {0x21D3, 0x21D3},
{0x21D5, 0x21E6}, {0x21E8, 0x21FF}, {0x2201, 0x2201},
{0x2204, 0x2206}, {0x2209, 0x220A}, {0x220C, 0x220E},
{0x2210, 0x2210}, {0x2212, 0x2214}, {0x2216, 0x2219},
{0x221B, 0x221C}, {0x2221, 0x2222}, {0x2224, 0x2224},
{0x2226, 0x2226}, {0x222D, 0x222D}, {0x222F, 0x2233},
{0x2238, 0x223B}, {0x223E, 0x2247}, {0x2249, 0x224B},
{0x224D, 0x2251}, {0x2253, 0x225F}, {0x2262, 0x2263},
{0x2268, 0x2269}, {0x226C, 0x226D}, {0x2270, 0x2281},
{0x2284, 0x2285}, {0x2288, 0x2294}, {0x2296, 0x2298},
{0x229A, 0x22A4}, {0x22A6, 0x22BE}, {0x22C0, 0x2311},
{0x2313, 0x2319}, {0x231C, 0x2328}, {0x232B, 0x23E8},
{0x23ED, 0x23EF}, {0x23F1, 0x23F2}, {0x23F4, 0x2426},
{0x2440, 0x244A}, {0x24EA, 0x24EA}, {0x254C, 0x254F},
{0x2574, 0x257F}, {0x2590, 0x2591}, {0x2596, 0x259F},
{0x25A2, 0x25A2}, {0x25AA, 0x25B1}, {0x25B4, 0x25B5},
{0x25B8, 0x25BB}, {0x25BE, 0x25BF}, {0x25C2, 0x25C5},
{0x25C9, 0x25CA}, {0x25CC, 0x25CD}, {0x25D2, 0x25E1},
{0x25E6, 0x25EE}, {0x25F0, 0x25FC}, {0x25FF, 0x2604},
{0x2607, 0x2608}, {0x260A, 0x260D}, {0x2610, 0x2613},
{0x2616, 0x261B}, {0x261D, 0x261D}, {0x261F, 0x263F},
{0x2641, 0x2641}, {0x2643, 0x2647}, {0x2654, 0x265F},
{0x2662, 0x2662}, {0x2666, 0x2666}, {0x266B, 0x266B},
{0x266E, 0x266E}, {0x2670, 0x267E}, {0x2680, 0x2692},
{0x2694, 0x269D}, {0x26A0, 0x26A0}, {0x26A2, 0x26A9},
{0x26AC, 0x26BC}, {0x26C0, 0x26C3}, {0x26E2, 0x26E2},
{0x26E4, 0x26E7}, {0x2700, 0x2704}, {0x2706, 0x2709},
{0x270C, 0x2727}, {0x2729, 0x273C}, {0x273E, 0x274B},
{0x274D, 0x274D}, {0x274F, 0x2752}, {0x2756, 0x2756},
{0x2758, 0x2775}, {0x2780, 0x2794}, {0x2798, 0x27AF},
{0x27B1, 0x27BE}, {0x27C0, 0x27E5}, {0x27EE, 0x2984},
{0x2987, 0x2B1A}, {0x2B1D, 0x2B4F}, {0x2B51, 0x2B54},
{0x2B5A, 0x2B73}, {0x2B76, 0x2B95}, {0x2B98, 0x2C2E},
{0x2C30, 0x2C5E}, {0x2C60, 0x2CF3}, {0x2CF9, 0x2D25},
{0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67},
{0x2D6F, 0x2D70}, {0x2D7F, 0x2D96}, {0x2DA0, 0x2DA6},
{0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE},
{0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6},
{0x2DD8, 0x2DDE}, {0x2DE0, 0x2E4F}, {0x303F, 0x303F},
{0x4DC0, 0x4DFF}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7},
{0xA700, 0xA7BF}, {0xA7C2, 0xA7C6}, {0xA7F7, 0xA82B},
{0xA830, 0xA839}, {0xA840, 0xA877}, {0xA880, 0xA8C5},
{0xA8CE, 0xA8D9}, {0xA8E0, 0xA953}, {0xA95F, 0xA95F},
{0xA980, 0xA9CD}, {0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE},
{0xAA00, 0xAA36}, {0xAA40, 0xAA4D}, {0xAA50, 0xAA59},
{0xAA5C, 0xAAC2}, {0xAADB, 0xAAF6}, {0xAB01, 0xAB06},
{0xAB09, 0xAB0E}, {0xAB11, 0xAB16}, {0xAB20, 0xAB26},
{0xAB28, 0xAB2E}, {0xAB30, 0xAB67}, {0xAB70, 0xABED},
{0xABF0, 0xABF9}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB},
{0xD800, 0xDFFF}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17},
{0xFB1D, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E},
{0xFB40, 0xFB41}, {0xFB43, 0xFB44}, {0xFB46, 0xFBC1},
{0xFBD3, 0xFD3F}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7},
{0xFDF0, 0xFDFD}, {0xFE20, 0xFE2F}, {0xFE70, 0xFE74},
{0xFE76, 0xFEFC}, {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFC},
{0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A},
{0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D},
{0x10080, 0x100FA}, {0x10100, 0x10102}, {0x10107, 0x10133},
{0x10137, 0x1018E}, {0x10190, 0x1019B}, {0x101A0, 0x101A0},
{0x101D0, 0x101FD}, {0x10280, 0x1029C}, {0x102A0, 0x102D0},
{0x102E0, 0x102FB}, {0x10300, 0x10323}, {0x1032D, 0x1034A},
{0x10350, 0x1037A}, {0x10380, 0x1039D}, {0x1039F, 0x103C3},
{0x103C8, 0x103D5}, {0x10400, 0x1049D}, {0x104A0, 0x104A9},
{0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527},
{0x10530, 0x10563}, {0x1056F, 0x1056F}, {0x10600, 0x10736},
{0x10740, 0x10755}, {0x10760, 0x10767}, {0x10800, 0x10805},
{0x10808, 0x10808}, {0x1080A, 0x10835}, {0x10837, 0x10838},
{0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10857, 0x1089E},
{0x108A7, 0x108AF}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5},
{0x108FB, 0x1091B}, {0x1091F, 0x10939}, {0x1093F, 0x1093F},
{0x10980, 0x109B7}, {0x109BC, 0x109CF}, {0x109D2, 0x10A03},
{0x10A05, 0x10A06}, {0x10A0C, 0x10A13}, {0x10A15, 0x10A17},
{0x10A19, 0x10A35}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A48},
{0x10A50, 0x10A58}, {0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6},
{0x10AEB, 0x10AF6}, {0x10B00, 0x10B35}, {0x10B39, 0x10B55},
{0x10B58, 0x10B72}, {0x10B78, 0x10B91}, {0x10B99, 0x10B9C},
{0x10BA9, 0x10BAF}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2},
{0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27}, {0x10D30, 0x10D39},
{0x10E60, 0x10E7E}, {0x10F00, 0x10F27}, {0x10F30, 0x10F59},
{0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x1106F},
{0x1107F, 0x110C1}, {0x110CD, 0x110CD}, {0x110D0, 0x110E8},
{0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11146},
{0x11150, 0x11176}, {0x11180, 0x111CD}, {0x111D0, 0x111DF},
{0x111E1, 0x111F4}, {0x11200, 0x11211}, {0x11213, 0x1123E},
{0x11280, 0x11286}, {0x11288, 0x11288}, {0x1128A, 0x1128D},
{0x1128F, 0x1129D}, {0x1129F, 0x112A9}, {0x112B0, 0x112EA},
{0x112F0, 0x112F9}, {0x11300, 0x11303}, {0x11305, 0x1130C},
{0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330},
{0x11332, 0x11333}, {0x11335, 0x11339}, {0x1133B, 0x11344},
{0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11350, 0x11350},
{0x11357, 0x11357}, {0x1135D, 0x11363}, {0x11366, 0x1136C},
{0x11370, 0x11374}, {0x11400, 0x11459}, {0x1145B, 0x1145B},
{0x1145D, 0x1145F}, {0x11480, 0x114C7}, {0x114D0, 0x114D9},
{0x11580, 0x115B5}, {0x115B8, 0x115DD}, {0x11600, 0x11644},
{0x11650, 0x11659}, {0x11660, 0x1166C}, {0x11680, 0x116B8},
{0x116C0, 0x116C9}, {0x11700, 0x1171A}, {0x1171D, 0x1172B},
{0x11730, 0x1173F}, {0x11800, 0x1183B}, {0x118A0, 0x118F2},
{0x118FF, 0x118FF}, {0x119A0, 0x119A7}, {0x119AA, 0x119D7},
{0x119DA, 0x119E4}, {0x11A00, 0x11A47}, {0x11A50, 0x11AA2},
{0x11AC0, 0x11AF8}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C36},
{0x11C38, 0x11C45}, {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F},
{0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06},
{0x11D08, 0x11D09}, {0x11D0B, 0x11D36}, {0x11D3A, 0x11D3A},
{0x11D3C, 0x11D3D}, {0x11D3F, 0x11D47}, {0x11D50, 0x11D59},
{0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E},
{0x11D90, 0x11D91}, {0x11D93, 0x11D98}, {0x11DA0, 0x11DA9},
{0x11EE0, 0x11EF8}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399},
{0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543},
{0x13000, 0x1342E}, {0x13430, 0x13438}, {0x14400, 0x14646},
{0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A60, 0x16A69},
{0x16A6E, 0x16A6F}, {0x16AD0, 0x16AED}, {0x16AF0, 0x16AF5},
{0x16B00, 0x16B45}, {0x16B50, 0x16B59}, {0x16B5B, 0x16B61},
{0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, {0x16E40, 0x16E9A},
{0x16F00, 0x16F4A}, {0x16F4F, 0x16F87}, {0x16F8F, 0x16F9F},
{0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88},
{0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BCA3}, {0x1D000, 0x1D0F5},
{0x1D100, 0x1D126}, {0x1D129, 0x1D1E8}, {0x1D200, 0x1D245},
{0x1D2E0, 0x1D2F3}, {0x1D300, 0x1D356}, {0x1D360, 0x1D378},
{0x1D400, 0x1D454}, {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F},
{0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC},
{0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3},
{0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514},
{0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E},
{0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550},
{0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B},
{0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006},
{0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024},
{0x1E026, 0x1E02A}, {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D},
{0x1E140, 0x1E149}, {0x1E14E, 0x1E14F}, {0x1E2C0, 0x1E2F9},
{0x1E2FF, 0x1E2FF}, {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6},
{0x1E900, 0x1E94B}, {0x1E950, 0x1E959}, {0x1E95E, 0x1E95F},
{0x1EC71, 0x1ECB4}, {0x1ED01, 0x1ED3D}, {0x1EE00, 0x1EE03},
{0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24},
{0x1EE27, 0x1EE27}, {0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37},
{0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42},
{0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B},
{0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52}, {0x1EE54, 0x1EE54},
{0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B},
{0x1EE5D, 0x1EE5D}, {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62},
{0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72},
{0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E},
{0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3},
{0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, {0x1EEF0, 0x1EEF1},
{0x1F000, 0x1F003}, {0x1F005, 0x1F02B}, {0x1F030, 0x1F093},
{0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF}, {0x1F0C1, 0x1F0CE},
{0x1F0D1, 0x1F0F5}, {0x1F10B, 0x1F10C}, {0x1F12E, 0x1F12F},
{0x1F16A, 0x1F16C}, {0x1F1E6, 0x1F1FF}, {0x1F321, 0x1F32C},
{0x1F336, 0x1F336}, {0x1F37D, 0x1F37D}, {0x1F394, 0x1F39F},
{0x1F3CB, 0x1F3CE}, {0x1F3D4, 0x1F3DF}, {0x1F3F1, 0x1F3F3},
{0x1F3F5, 0x1F3F7}, {0x1F43F, 0x1F43F}, {0x1F441, 0x1F441},
{0x1F4FD, 0x1F4FE}, {0x1F53E, 0x1F54A}, {0x1F54F, 0x1F54F},
{0x1F568, 0x1F579}, {0x1F57B, 0x1F594}, {0x1F597, 0x1F5A3},
{0x1F5A5, 0x1F5FA}, {0x1F650, 0x1F67F}, {0x1F6C6, 0x1F6CB},
{0x1F6CD, 0x1F6CF}, {0x1F6D3, 0x1F6D4}, {0x1F6E0, 0x1F6EA},
{0x1F6F0, 0x1F6F3}, {0x1F700, 0x1F773}, {0x1F780, 0x1F7D8},
{0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859},
{0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F900, 0x1F90B},
{0x1FA00, 0x1FA53}, {0x1FA60, 0x1FA6D}, {0xE0001, 0xE0001},
{0xE0020, 0xE007F},
}
var emoji = table{
{0x203C, 0x203C}, {0x2049, 0x2049}, {0x2122, 0x2122},
{0x2139, 0x2139}, {0x2194, 0x2199}, {0x21A9, 0x21AA},
{0x231A, 0x231B}, {0x2328, 0x2328}, {0x2388, 0x2388},
{0x23CF, 0x23CF}, {0x23E9, 0x23F3}, {0x23F8, 0x23FA},
{0x24C2, 0x24C2}, {0x25AA, 0x25AB}, {0x25B6, 0x25B6},
{0x25C0, 0x25C0}, {0x25FB, 0x25FE}, {0x2600, 0x2605},
{0x2607, 0x2612}, {0x2614, 0x2685}, {0x2690, 0x2705},
{0x2708, 0x2712}, {0x2714, 0x2714}, {0x2716, 0x2716},
{0x271D, 0x271D}, {0x2721, 0x2721}, {0x2728, 0x2728},
{0x2733, 0x2734}, {0x2744, 0x2744}, {0x2747, 0x2747},
{0x274C, 0x274C}, {0x274E, 0x274E}, {0x2753, 0x2755},
{0x2757, 0x2757}, {0x2763, 0x2767}, {0x2795, 0x2797},
{0x27A1, 0x27A1}, {0x27B0, 0x27B0}, {0x27BF, 0x27BF},
{0x2934, 0x2935}, {0x2B05, 0x2B07}, {0x2B1B, 0x2B1C},
{0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x3030, 0x3030},
{0x303D, 0x303D}, {0x3297, 0x3297}, {0x3299, 0x3299},
{0x1F000, 0x1F0FF}, {0x1F10D, 0x1F10F}, {0x1F12F, 0x1F12F},
{0x1F16C, 0x1F171}, {0x1F17E, 0x1F17F}, {0x1F18E, 0x1F18E},
{0x1F191, 0x1F19A}, {0x1F1AD, 0x1F1E5}, {0x1F201, 0x1F20F},
{0x1F21A, 0x1F21A}, {0x1F22F, 0x1F22F}, {0x1F232, 0x1F23A},
{0x1F23C, 0x1F23F}, {0x1F249, 0x1F3FA}, {0x1F400, 0x1F53D},
{0x1F546, 0x1F64F}, {0x1F680, 0x1F6FF}, {0x1F774, 0x1F77F},
{0x1F7D5, 0x1F7FF}, {0x1F80C, 0x1F80F}, {0x1F848, 0x1F84F},
{0x1F85A, 0x1F85F}, {0x1F888, 0x1F88F}, {0x1F8AE, 0x1F8FF},
{0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1FFFD},
}

5
vendor/github.com/mattn/go-xmpp/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,5 @@
language: go
go:
- tip
script:
- go test

View file

@ -199,13 +199,35 @@ type Options struct {
// NewClient establishes a new Client connection based on a set of Options. // NewClient establishes a new Client connection based on a set of Options.
func (o Options) NewClient() (*Client, error) { func (o Options) NewClient() (*Client, error) {
host := o.Host host := o.Host
if strings.TrimSpace(host) == "" {
a := strings.SplitN(o.User, "@", 2)
if len(a) == 2 {
if _, addrs, err := net.LookupSRV("xmpp-client", "tcp", a[1]); err == nil {
if len(addrs) > 0 {
// default to first record
host = fmt.Sprintf("%s:%d", addrs[0].Target, addrs[0].Port)
defP := addrs[0].Priority
for _, adr := range addrs {
if adr.Priority < defP {
host = fmt.Sprintf("%s:%d", adr.Target, adr.Port)
defP = adr.Priority
}
}
} else {
host = a[1]
}
} else {
host = a[1]
}
}
}
c, err := connect(host, o.User, o.Password) c, err := connect(host, o.User, o.Password)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if strings.LastIndex(o.Host, ":") > 0 { if strings.LastIndex(host, ":") > 0 {
host = host[:strings.LastIndex(o.Host, ":")] host = host[:strings.LastIndex(host, ":")]
} }
client := new(Client) client := new(Client)
@ -575,6 +597,8 @@ type Chat struct {
Text string Text string
Subject string Subject string
Thread string Thread string
Ooburl string
Oobdesc string
Roster Roster Roster Roster
Other []string Other []string
OtherElem []XMLElement OtherElem []XMLElement
@ -616,6 +640,11 @@ func (c *Client) Recv() (stanza interface{}, err error) {
} }
switch v := val.(type) { switch v := val.(type) {
case *clientMessage: case *clientMessage:
if v.Event.XMLNS == XMPPNS_PUBSUB_EVENT {
// Handle Pubsub notifications
return pubsubClientToReturn(v.Event), nil
}
stamp, _ := time.Parse( stamp, _ := time.Parse(
"2006-01-02T15:04:05Z", "2006-01-02T15:04:05Z",
v.Delay.Stamp, v.Delay.Stamp,
@ -640,35 +669,138 @@ func (c *Client) Recv() (stanza interface{}, err error) {
case *clientPresence: case *clientPresence:
return Presence{v.From, v.To, v.Type, v.Show, v.Status}, nil return Presence{v.From, v.To, v.Type, v.Show, v.Status}, nil
case *clientIQ: case *clientIQ:
switch {
case v.Query.XMLName.Space == "urn:xmpp:ping":
// TODO check more strictly // TODO check more strictly
if bytes.Equal(bytes.TrimSpace(v.Query), []byte(`<ping xmlns='urn:xmpp:ping'/>`)) || bytes.Equal(bytes.TrimSpace(v.Query), []byte(`<ping xmlns="urn:xmpp:ping"/>`)) {
err := c.SendResultPing(v.ID, v.From) err := c.SendResultPing(v.ID, v.From)
if err != nil { if err != nil {
return Chat{}, err return Chat{}, err
} }
fallthrough
case v.Type == "error":
switch v.ID {
case "sub1":
// Pubsub subscription failed
var errs []clientPubsubError
err := xml.Unmarshal([]byte(v.Error.InnerXML), &errs)
if err != nil {
return PubsubSubscription{}, err
}
var errsStr []string
for _, e := range errs {
errsStr = append(errsStr, e.XMLName.Local)
}
return PubsubSubscription{
Errors: errsStr,
}, nil
}
case v.Type == "result" && v.ID == "unsub1":
// Unsubscribing MAY contain a pubsub element. But it does
// not have to
return PubsubUnsubscription{
SubID: "",
JID: v.From,
Node: "",
Errors: nil,
}, nil
case v.Query.XMLName.Local == "pubsub":
switch v.ID {
case "sub1":
// Subscription or unsubscription was successful
var sub clientPubsubSubscription
err := xml.Unmarshal([]byte(v.Query.InnerXML), &sub)
if err != nil {
return PubsubSubscription{}, err
}
return PubsubSubscription{
SubID: sub.SubID,
JID: sub.JID,
Node: sub.Node,
Errors: nil,
}, nil
case "unsub1":
var sub clientPubsubSubscription
err := xml.Unmarshal([]byte(v.Query.InnerXML), &sub)
if err != nil {
return PubsubUnsubscription{}, err
}
return PubsubUnsubscription{
SubID: sub.SubID,
JID: v.From,
Node: sub.Node,
Errors: nil,
}, nil
case "items1", "items3":
var p clientPubsubItems
err := xml.Unmarshal([]byte(v.Query.InnerXML), &p)
if err != nil {
return PubsubItems{}, err
}
return PubsubItems{
p.Node,
pubsubItemsToReturn(p.Items),
}, nil
}
case v.Query.XMLName.Local == "":
return IQ{ID: v.ID, From: v.From, To: v.To, Type: v.Type}, nil
default:
res, err := xml.Marshal(v.Query)
if err != nil {
return Chat{}, err
}
return IQ{ID: v.ID, From: v.From, To: v.To, Type: v.Type,
Query: res}, nil
} }
return IQ{ID: v.ID, From: v.From, To: v.To, Type: v.Type, Query: v.Query}, nil
} }
} }
} }
// Send sends the message wrapped inside an XMPP message stanza body. // Send sends the message wrapped inside an XMPP message stanza body.
func (c *Client) Send(chat Chat) (n int, err error) { func (c *Client) Send(chat Chat) (n int, err error) {
var subtext = `` var subtext, thdtext, oobtext string
var thdtext = ``
if chat.Subject != `` { if chat.Subject != `` {
subtext = `<subject>` + xmlEscape(chat.Subject) + `</subject>` subtext = `<subject>` + xmlEscape(chat.Subject) + `</subject>`
} }
if chat.Thread != `` { if chat.Thread != `` {
thdtext = `<thread>` + xmlEscape(chat.Thread) + `</thread>` thdtext = `<thread>` + xmlEscape(chat.Thread) + `</thread>`
} }
if chat.Ooburl != `` {
oobtext = `<x xmlns="jabber:x:oob"><url>` + xmlEscape(chat.Ooburl) + `</url>`
if chat.Oobdesc != `` {
oobtext += `<desc>` + xmlEscape(chat.Oobdesc) + `</desc>`
}
oobtext += `</x>`
}
stanza := "<message to='%s' type='%s' id='%s' xml:lang='en'>" + subtext + "<body>%s</body>" + thdtext + "</message>" stanza := "<message to='%s' type='%s' id='%s' xml:lang='en'>" + subtext + "<body>%s</body>" + oobtext + thdtext + "</message>"
return fmt.Fprintf(c.conn, stanza, return fmt.Fprintf(c.conn, stanza,
xmlEscape(chat.Remote), xmlEscape(chat.Type), cnonce(), xmlEscape(chat.Text)) xmlEscape(chat.Remote), xmlEscape(chat.Type), cnonce(), xmlEscape(chat.Text))
} }
// SendOOB sends OOB data wrapped inside an XMPP message stanza, without actual body.
func (c *Client) SendOOB(chat Chat) (n int, err error) {
var thdtext, oobtext string
if chat.Thread != `` {
thdtext = `<thread>` + xmlEscape(chat.Thread) + `</thread>`
}
if chat.Ooburl != `` {
oobtext = `<x xmlns="jabber:x:oob"><url>` + xmlEscape(chat.Ooburl) + `</url>`
if chat.Oobdesc != `` {
oobtext += `<desc>` + xmlEscape(chat.Oobdesc) + `</desc>`
}
oobtext += `</x>`
}
return fmt.Fprintf(c.conn, "<message to='%s' type='%s' id='%s' xml:lang='en'>"+oobtext+thdtext+"</message>",
xmlEscape(chat.Remote), xmlEscape(chat.Type), cnonce())
}
// SendOrg sends the original text without being wrapped in an XMPP message stanza. // SendOrg sends the original text without being wrapped in an XMPP message stanza.
func (c *Client) SendOrg(org string) (n int, err error) { func (c *Client) SendOrg(org string) (n int, err error) {
return fmt.Fprint(c.conn, org) return fmt.Fprint(c.conn, org)
@ -777,6 +909,9 @@ type clientMessage struct {
Body string `xml:"body"` Body string `xml:"body"`
Thread string `xml:"thread"` Thread string `xml:"thread"`
// Pubsub
Event clientPubsubEvent `xml:"event"`
// Any hasn't matched element // Any hasn't matched element
Other []XMLElement `xml:",any"` Other []XMLElement `xml:",any"`
@ -848,7 +983,7 @@ type clientIQ struct {
ID string `xml:"id,attr"` ID string `xml:"id,attr"`
To string `xml:"to,attr"` To string `xml:"to,attr"`
Type string `xml:"type,attr"` // error, get, result, set Type string `xml:"type,attr"` // error, get, result, set
Query []byte `xml:",innerxml"` Query XMLElement `xml:",any"`
Error clientError Error clientError
Bind bindBind Bind bindBind
} }
@ -856,8 +991,9 @@ type clientIQ struct {
type clientError struct { type clientError struct {
XMLName xml.Name `xml:"jabber:client error"` XMLName xml.Name `xml:"jabber:client error"`
Code string `xml:",attr"` Code string `xml:",attr"`
Type string `xml:",attr"` Type string `xml:"type,attr"`
Any xml.Name Any xml.Name
InnerXML []byte `xml:",innerxml"`
Text string Text string
} }

133
vendor/github.com/mattn/go-xmpp/xmpp_pubsub.go generated vendored Normal file
View file

@ -0,0 +1,133 @@
package xmpp
import (
"encoding/xml"
"fmt"
)
const (
XMPPNS_PUBSUB = "http://jabber.org/protocol/pubsub"
XMPPNS_PUBSUB_EVENT = "http://jabber.org/protocol/pubsub#event"
)
type clientPubsubItem struct {
XMLName xml.Name `xml:"item"`
ID string `xml:"id,attr"`
Body []byte `xml:",innerxml"`
}
type clientPubsubItems struct {
XMLName xml.Name `xml:"items"`
Node string `xml:"node,attr"`
Items []clientPubsubItem `xml:"item"`
}
type clientPubsub struct {
XMLName xml.Name `xml:"pubsub"`
Items clientPubsubItems `xml:"items"`
}
type clientPubsubEvent struct {
XMLName xml.Name `xml:"event"`
XMLNS string `xml:"xmlns,attr"`
Items clientPubsubItems `xml:"items"`
}
type clientPubsubError struct {
XMLName xml.Name
}
type clientPubsubSubscription struct {
XMLName xml.Name `xml:"subscription"`
Node string `xml:"node,attr"`
JID string `xml:"jid,attr"`
SubID string `xml:"subid,attr"`
}
type PubsubEvent struct {
Node string
Items []PubsubItem
}
type PubsubSubscription struct {
SubID string
JID string
Node string
Errors []string
}
type PubsubUnsubscription PubsubSubscription
type PubsubItem struct {
ID string
InnerXML []byte
}
type PubsubItems struct {
Node string
Items []PubsubItem
}
// Converts []clientPubsubItem to []PubsubItem
func pubsubItemsToReturn(items []clientPubsubItem) []PubsubItem {
var tmp []PubsubItem
for _, i := range items {
tmp = append(tmp, PubsubItem{
ID: i.ID,
InnerXML: i.Body,
})
}
return tmp
}
func pubsubClientToReturn(event clientPubsubEvent) PubsubEvent {
return PubsubEvent{
Node: event.Items.Node,
Items: pubsubItemsToReturn(event.Items.Items),
}
}
func pubsubStanza(body string) string {
return fmt.Sprintf("<pubsub xmlns='%s'>%s</pubsub>",
XMPPNS_PUBSUB, body)
}
func pubsubSubscriptionStanza(node, jid string) string {
body := fmt.Sprintf("<subscribe node='%s' jid='%s'/>",
xmlEscape(node),
xmlEscape(jid))
return pubsubStanza(body)
}
func pubsubUnsubscriptionStanza(node, jid string) string {
body := fmt.Sprintf("<unsubscribe node='%s' jid='%s'/>",
xmlEscape(node),
xmlEscape(jid))
return pubsubStanza(body)
}
func (c *Client) PubsubSubscribeNode(node, jid string) {
c.RawInformation(c.jid,
jid,
"sub1",
"set",
pubsubSubscriptionStanza(node, c.jid))
}
func (c *Client) PubsubUnsubscribeNode(node, jid string) {
c.RawInformation(c.jid,
jid,
"unsub1",
"set",
pubsubUnsubscriptionStanza(node, c.jid))
}
func (c *Client) PubsubRequestLastItems(node, jid string) {
body := fmt.Sprintf("<items node='%s'/>", node)
c.RawInformation(c.jid, jid, "items1", "get", pubsubStanza(body))
}
func (c *Client) PubsubRequestItem(node, jid, id string) {
body := fmt.Sprintf("<items node='%s'><item id='%s'/></items>", node, id)
c.RawInformation(c.jid, jid, "items3", "get", pubsubStanza(body))
}

26
vendor/github.com/mmcdole/gofeed/.gitignore generated vendored Normal file
View file

@ -0,0 +1,26 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
.DS_STORE

16
vendor/github.com/mmcdole/gofeed/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,16 @@
language: go
go:
- tip
- 1.6
- 1.5
- 1.4
install:
- go get -t -v ./...
- go get github.com/go-playground/overalls
- go get github.com/mattn/goveralls
- go get golang.org/x/tools/cmd/cover
script:
- go test -v ./...
- $GOPATH/bin/overalls -project=github.com/mmcdole/gofeed -covermode=count -ignore=.git,vendor -debug
after_success:
- $GOPATH/bin/goveralls -coverprofile=overalls.coverprofile -service=travis-ci

View file

@ -2,23 +2,9 @@
[![Build Status](https://travis-ci.org/mmcdole/gofeed.svg?branch=master)](https://travis-ci.org/mmcdole/gofeed) [![Coverage Status](https://coveralls.io/repos/github/mmcdole/gofeed/badge.svg?branch=master)](https://coveralls.io/github/mmcdole/gofeed?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/mmcdole/gofeed)](https://goreportcard.com/report/github.com/mmcdole/gofeed) [![](https://godoc.org/github.com/mmcdole/gofeed?status.svg)](http://godoc.org/github.com/mmcdole/gofeed) [![License](http://img.shields.io/:license-mit-blue.svg)](http://doge.mit-license.org) [![Build Status](https://travis-ci.org/mmcdole/gofeed.svg?branch=master)](https://travis-ci.org/mmcdole/gofeed) [![Coverage Status](https://coveralls.io/repos/github/mmcdole/gofeed/badge.svg?branch=master)](https://coveralls.io/github/mmcdole/gofeed?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/mmcdole/gofeed)](https://goreportcard.com/report/github.com/mmcdole/gofeed) [![](https://godoc.org/github.com/mmcdole/gofeed?status.svg)](http://godoc.org/github.com/mmcdole/gofeed) [![License](http://img.shields.io/:license-mit-blue.svg)](http://doge.mit-license.org)
The `gofeed` library is a robust feed parser that supports parsing both [RSS](https://en.wikipedia.org/wiki/RSS) and [Atom](https://en.wikipedia.org/wiki/Atom_(standard)) feeds. The library provides a universal `gofeed.Parser` that will parse and convert all feed types into a hybrid `gofeed.Feed` model. You also have the option of utilizing the feed specific `atom.Parser` or `rss.Parser` parsers which generate `atom.Feed` and `rss.Feed` respectively. The `gofeed` library is a robust feed parser that supports parsing both [RSS](https://en.wikipedia.org/wiki/RSS) and [Atom](https://en.wikipedia.org/wiki/Atom_(standard)) feeds. The universal `gofeed.Parser` will parse and convert all feed types into a hybrid `gofeed.Feed` model. You also have the option of parsing them into their respective `atom.Feed` and `rss.Feed` models using the feed specific `atom.Parser` or `rss.Parser`.
## Table of Contents ##### Supported feed types:
- [Features](#features)
- [Overview](#overview)
- [Basic Usage](#basic-usage)
- [Advanced Usage](#advanced-usage)
- [Extensions](#extensions)
- [Invalid Feeds](#invalid-feeds)
- [Default Mappings](#default-mappings)
- [Dependencies](#dependencies)
- [License](#license)
- [Credits](#credits)
## Features
#### Supported feed types:
* RSS 0.90 * RSS 0.90
* Netscape RSS 0.91 * Netscape RSS 0.91
* Userland RSS 0.91 * Userland RSS 0.91
@ -30,26 +16,22 @@ The `gofeed` library is a robust feed parser that supports parsing both [RSS](ht
* Atom 0.3 * Atom 0.3
* Atom 1.0 * Atom 1.0
#### Extension Support It also provides support for parsing several popular predefined extension modules, including [Dublin Core](http://dublincore.org/documents/dces/) and [Apples iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390), as well as arbitrary extensions. See the [Extensions](#extensions) section for more details.
The `gofeed` library provides support for parsing several popular predefined extensions into ready-made structs, including [Dublin Core](http://dublincore.org/documents/dces/) and [Apples iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390). ## Table of Contents
- [Overview](#overview)
It parses all other feed extensions in a generic way (see the [Extensions](#extensions) section for more details). - [Basic Usage](#basic-usage)
- [Advanced Usage](#advanced-usage)
#### Invalid Feeds - [Extensions](#extensions)
- [Invalid Feeds](#invalid-feeds)
A best-effort attempt is made at parsing broken and invalid XML feeds. Currently, `gofeed` can succesfully parse feeds with the following issues: - [Default Mappings](#default-mappings)
- Unescaped/Naked Markup in feed elements - [Dependencies](#dependencies)
- Undeclared namespace prefixes - [License](#license)
- Missing closing tags on certain elements - [Donate](#donate)
- Illegal tags within feed elements without namespace prefixes - [Credits](#credits)
- Missing "required" elements as specified by the respective feed specs.
- Incorrect date formats
## Overview ## Overview
The `gofeed` library is comprised of a universal feed parser and several feed specific parsers. Which one you choose depends entirely on your usecase. If you will be handling both rss and atom feeds then it makes sense to use the `gofeed.Parser`. If you know ahead of time that you will only be parsing one feed type then it would make sense to use `rss.Parser` or `atom.Parser`.
#### Universal Feed Parser #### Universal Feed Parser
The universal `gofeed.Parser` works in 3 stages: detection, parsing and translation. It first detects the feed type that it is currently parsing. Then it uses a feed specific parser to parse the feed into its true representation which will be either a `rss.Feed` or `atom.Feed`. These models cover every field possible for their respective feed types. Finally, they are *translated* into a `gofeed.Feed` model that is a hybrid of both feed types. Performing the universal feed parsing in these 3 stages allows for more flexibility and keeps the code base more maintainable by separating RSS and Atom parsing into seperate packages. The universal `gofeed.Parser` works in 3 stages: detection, parsing and translation. It first detects the feed type that it is currently parsing. Then it uses a feed specific parser to parse the feed into its true representation which will be either a `rss.Feed` or `atom.Feed`. These models cover every field possible for their respective feed types. Finally, they are *translated* into a `gofeed.Feed` model that is a hybrid of both feed types. Performing the universal feed parsing in these 3 stages allows for more flexibility and keeps the code base more maintainable by separating RSS and Atom parsing into seperate packages.
@ -62,6 +44,8 @@ The translation step is done by anything which adheres to the `gofeed.Translator
The `gofeed` library provides two feed specific parsers: `atom.Parser` and `rss.Parser`. If the hybrid `gofeed.Feed` model that the universal `gofeed.Parser` produces does not contain a field from the `atom.Feed` or `rss.Feed` model that you require, it might be beneficial to use the feed specific parsers. When using the `atom.Parser` or `rss.Parser` directly, you can access all of fields found in the `atom.Feed` and `rss.Feed` models. It is also marginally faster because you are able to skip the translation step. The `gofeed` library provides two feed specific parsers: `atom.Parser` and `rss.Parser`. If the hybrid `gofeed.Feed` model that the universal `gofeed.Parser` produces does not contain a field from the `atom.Feed` or `rss.Feed` model that you require, it might be beneficial to use the feed specific parsers. When using the `atom.Parser` or `rss.Parser` directly, you can access all of fields found in the `atom.Feed` and `rss.Feed` models. It is also marginally faster because you are able to skip the translation step.
However, for the *vast* majority of users, the universal `gofeed.Parser` is the best way to parse feeds. This allows the user of `gofeed` library to not care about the differences between RSS or Atom feeds.
## Basic Usage ## Basic Usage
#### Universal Feed Parser #### Universal Feed Parser
@ -201,6 +185,16 @@ Every element which does not belong to the feed's default namespace is considere
In addition to the generic handling of extensions, `gofeed` also has built in support for parsing certain popular extensions into their own structs for convenience. It currently supports the [Dublin Core](http://dublincore.org/documents/dces/) and [Apple iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390) extensions which you can access at `Feed.ItunesExt`, `feed.DublinCoreExt` and `Item.ITunesExt` and `Item.DublinCoreExt` In addition to the generic handling of extensions, `gofeed` also has built in support for parsing certain popular extensions into their own structs for convenience. It currently supports the [Dublin Core](http://dublincore.org/documents/dces/) and [Apple iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390) extensions which you can access at `Feed.ItunesExt`, `feed.DublinCoreExt` and `Item.ITunesExt` and `Item.DublinCoreExt`
## Invalid Feeds
A best-effort attempt is made at parsing broken and invalid XML feeds. Currently, `gofeed` can succesfully parse feeds with the following issues:
- Unescaped/Naked Markup in feed elements
- Undeclared namespace prefixes
- Missing closing tags on certain elements
- Illegal tags within feed elements without namespace prefixes
- Missing "required" elements as specified by the respective feed specs.
- Incorrect date formats
## Default Mappings ## Default Mappings
The ```DefaultRSSTranslator``` and the ```DefaultAtomTranslator``` map the following ```rss.Feed``` and ```atom.Feed``` fields to their respective ```gofeed.Feed``` fields. They are listed in order of precedence (highest to lowest): The ```DefaultRSSTranslator``` and the ```DefaultAtomTranslator``` map the following ```rss.Feed``` and ```atom.Feed``` fields to their respective ```gofeed.Feed``` fields. They are listed in order of precedence (highest to lowest):
@ -226,12 +220,12 @@ Categories | /rss/channel/category<br>/rss/channel/itunes:category<br>/rss/chann
--- | --- | --- --- | --- | ---
Title | /rss/channel/item/title<br>/rdf:RDF/item/title<br>/rdf:RDF/item/dc:title<br>/rss/channel/item/dc:title | /feed/entry/title Title | /rss/channel/item/title<br>/rdf:RDF/item/title<br>/rdf:RDF/item/dc:title<br>/rss/channel/item/dc:title | /feed/entry/title
Description | /rss/channel/item/description<br>/rdf:RDF/item/description<br>/rss/channel/item/dc:description<br>/rdf:RDF/item/dc:description | /feed/entry/summary Description | /rss/channel/item/description<br>/rdf:RDF/item/description<br>/rss/channel/item/dc:description<br>/rdf:RDF/item/dc:description | /feed/entry/summary
Content | /rss/channel/item/content:encoded | /feed/entry/content Content | | /feed/entry/content
Link | /rss/channel/item/link<br>/rdf:RDF/item/link | /feed/entry/link[@rel=”alternate”]/@href<br>/feed/entry/link[not(@rel)]/@href Link | /rss/channel/item/link<br>/rdf:RDF/item/link | /feed/entry/link[@rel=”alternate”]/@href<br>/feed/entry/link[not(@rel)]/@href
Updated | /rss/channel/item/dc:date<br>/rdf:RDF/rdf:item/dc:date | /feed/entry/modified<br>/feed/entry/updated Updated | /rss/channel/item/dc:date<br>/rdf:RDF/rdf:item/dc:date | /feed/entry/modified<br>/feed/entry/updated
Published | /rss/channel/item/pubDate<br>/rss/channel/item/dc:date | /feed/entry/published<br>/feed/entry/issued Published | /rss/channel/item/pubDate<br>/rss/channel/item/dc:date | /feed/entry/published<br>/feed/entry/issued
Author | /rss/channel/item/author<br>/rss/channel/item/dc:author<br>/rdf:RDF/item/dc:author<br>/rss/channel/item/dc:creator<br>/rdf:RDF/item/dc:creator<br>/rss/channel/item/itunes:author | /feed/entry/author Author | /rss/channel/item/author<br>/rss/channel/item/dc:author<br>/rdf:RDF/item/dc:author<br>/rss/channel/item/dc:creator<br>/rdf:RDF/item/dc:creator<br>/rss/channel/item/itunes:author | /feed/entry/author
GUID | /rss/channel/item/guid | /feed/entry/id Guid | /rss/channel/item/guid | /feed/entry/id
Image | /rss/channel/item/itunes:image<br>/rss/channel/item/media:image | Image | /rss/channel/item/itunes:image<br>/rss/channel/item/media:image |
Categories | /rss/channel/item/category<br>/rss/channel/item/dc:subject<br>/rss/channel/item/itunes:keywords<br>/rdf:RDF/channel/item/dc:subject | /feed/entry/category Categories | /rss/channel/item/category<br>/rss/channel/item/dc:subject<br>/rss/channel/item/itunes:keywords<br>/rdf:RDF/channel/item/dc:subject | /feed/entry/category
Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”] Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”]
@ -246,9 +240,14 @@ Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”
This project is licensed under the [MIT License](https://raw.githubusercontent.com/mmcdole/gofeed/master/LICENSE) This project is licensed under the [MIT License](https://raw.githubusercontent.com/mmcdole/gofeed/master/LICENSE)
## Donate
I write open source software for fun. However, if you want to buy me a beer because you found something I wrote useful, feel free!
Bitcoin: 1CXrjBBkxgVNgKXRAq5MnsR7zzZbHvUHkJ
## Credits ## Credits
* [cristoper](https://github.com/cristoper) for his work on implementing xml:base relative URI handling.
* [Mark Pilgrim](https://en.wikipedia.org/wiki/Mark_Pilgrim) and [Kurt McKee](http://kurtmckee.org) for their work on the excellent [Universal Feed Parser](https://github.com/kurtmckee/feedparser) Python library. This library was the inspiration for the `gofeed` library. * [Mark Pilgrim](https://en.wikipedia.org/wiki/Mark_Pilgrim) and [Kurt McKee](http://kurtmckee.org) for their work on the excellent [Universal Feed Parser](https://github.com/kurtmckee/feedparser) Python library. This library was the inspiration for the `gofeed` library.
* [Dan MacTough](http://blog.mact.me) for his work on [node-feedparser](https://github.com/danmactough/node-feedparser). It provided inspiration for the set of fields that should be covered in the hybrid `gofeed.Feed` model. * [Dan MacTough](http://blog.mact.me) for his work on [node-feedparser](https://github.com/danmactough/node-feedparser). It provided inspiration for the set of fields that should be covered in the hybrid `gofeed.Feed` model.
* [Matt Jibson](https://mattjibson.com/) for his date parsing function in the [goread](https://github.com/mjibson/goread) project. * [Matt Jibson](https://mattjibson.com/) for his date parsing function in the [goread](https://github.com/mjibson/goread) project.

View file

@ -6,43 +6,19 @@ import (
"strings" "strings"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
ext "github.com/mmcdole/gofeed/extensions" "github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/gofeed/internal/shared" "github.com/mmcdole/gofeed/internal/shared"
xpp "github.com/mmcdole/goxpp" "github.com/mmcdole/goxpp"
)
var (
// Atom elements which contain URIs
// https://tools.ietf.org/html/rfc4287
uriElements = map[string]bool{
"icon": true,
"id": true,
"logo": true,
"uri": true,
"url": true, // atom 0.3
}
// Atom attributes which contain URIs
// https://tools.ietf.org/html/rfc4287
atomURIAttrs = map[string]bool{
"href": true,
"scheme": true,
"src": true,
"uri": true,
}
) )
// Parser is an Atom Parser // Parser is an Atom Parser
type Parser struct { type Parser struct{}
base *shared.XMLBase
}
// Parse parses an xml feed into an atom.Feed // Parse parses an xml feed into an atom.Feed
func (ap *Parser) Parse(feed io.Reader) (*Feed, error) { func (ap *Parser) Parse(feed io.Reader) (*Feed, error) {
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel) p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
ap.base = &shared.XMLBase{URIAttrs: atomURIAttrs}
_, err := ap.base.FindRoot(p) _, err := shared.FindRoot(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -67,7 +43,7 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
extensions := ext.Extensions{} extensions := ext.Extensions{}
for { for {
tok, err := ap.base.NextTag(p) tok, err := shared.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -221,7 +197,7 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
extensions := ext.Extensions{} extensions := ext.Extensions{}
for { for {
tok, err := ap.base.NextTag(p) tok, err := shared.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -376,7 +352,7 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {
extensions := ext.Extensions{} extensions := ext.Extensions{}
for { for {
tok, err := ap.base.NextTag(p) tok, err := shared.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -534,7 +510,7 @@ func (ap *Parser) parsePerson(name string, p *xpp.XMLPullParser) (*Person, error
person := &Person{} person := &Person{}
for { for {
tok, err := ap.base.NextTag(p) tok, err := shared.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -678,46 +654,31 @@ func (ap *Parser) parseAtomText(p *xpp.XMLPullParser) (string, error) {
result := text.InnerXML result := text.InnerXML
result = strings.TrimSpace(result) result = strings.TrimSpace(result)
if strings.HasPrefix(result, "<![CDATA[") &&
strings.HasSuffix(result, "]]>") {
result = strings.TrimPrefix(result, "<![CDATA[")
result = strings.TrimSuffix(result, "]]>")
return result, nil
}
lowerType := strings.ToLower(text.Type) lowerType := strings.ToLower(text.Type)
lowerMode := strings.ToLower(text.Mode) lowerMode := strings.ToLower(text.Mode)
if strings.Contains(result, "<![CDATA[") {
result = shared.StripCDATA(result)
if lowerType == "html" || strings.Contains(lowerType, "xhtml") {
result, _ = ap.base.ResolveHTML(result)
}
} else {
// decode non-CDATA contents depending on type
if lowerType == "text" || if lowerType == "text" ||
strings.HasPrefix(lowerType, "text/") || strings.HasPrefix(lowerType, "text/") ||
(lowerType == "" && lowerMode == "") { (lowerType == "" && lowerMode == "") {
result, err = shared.DecodeEntities(result) result, err = shared.DecodeEntities(result)
} else if strings.Contains(lowerType, "xhtml") { } else if strings.Contains(lowerType, "xhtml") {
result = ap.stripWrappingDiv(result) result = ap.stripWrappingDiv(result)
result, _ = ap.base.ResolveHTML(result)
} else if lowerType == "html" { } else if lowerType == "html" {
result = ap.stripWrappingDiv(result) result = ap.stripWrappingDiv(result)
result, err = shared.DecodeEntities(result) result, err = shared.DecodeEntities(result)
if err == nil {
result, _ = ap.base.ResolveHTML(result)
}
} else { } else {
decodedStr, err := base64.StdEncoding.DecodeString(result) decodedStr, err := base64.StdEncoding.DecodeString(result)
if err == nil { if err == nil {
result = string(decodedStr) result = string(decodedStr)
} }
} }
}
// resolve relative URIs in URI-containing elements according to xml:base
name := strings.ToLower(p.Name)
if uriElements[name] {
resolved, err := ap.base.ResolveURL(result)
if err == nil {
result = resolved
}
}
return result, err return result, err
} }

View file

@ -5,7 +5,7 @@ import (
"strings" "strings"
"github.com/mmcdole/gofeed/internal/shared" "github.com/mmcdole/gofeed/internal/shared"
xpp "github.com/mmcdole/goxpp" "github.com/mmcdole/goxpp"
) )
// FeedType represents one of the possible feed // FeedType represents one of the possible feed
@ -28,8 +28,7 @@ const (
func DetectFeedType(feed io.Reader) FeedType { func DetectFeedType(feed io.Reader) FeedType {
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel) p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
xmlBase := shared.XMLBase{} _, err := shared.FindRoot(p)
_, err := xmlBase.FindRoot(p)
if err != nil { if err != nil {
return FeedTypeUnknown return FeedTypeUnknown
} }

View file

@ -14,7 +14,6 @@ type ITunesFeedExtension struct {
Image string `json:"image,omitempty"` Image string `json:"image,omitempty"`
Complete string `json:"complete,omitempty"` Complete string `json:"complete,omitempty"`
NewFeedURL string `json:"newFeedUrl,omitempty"` NewFeedURL string `json:"newFeedUrl,omitempty"`
Type string `json:"type,omitempty"`
} }
// ITunesItemExtension is a set of extension // ITunesItemExtension is a set of extension
@ -59,7 +58,6 @@ func NewITunesFeedExtension(extensions map[string][]Extension) *ITunesFeedExtens
feed.NewFeedURL = parseTextExtension("new-feed-url", extensions) feed.NewFeedURL = parseTextExtension("new-feed-url", extensions)
feed.Categories = parseCategories(extensions) feed.Categories = parseCategories(extensions)
feed.Owner = parseOwner(extensions) feed.Owner = parseOwner(extensions)
feed.Type = parseTextExtension("type", extensions)
return feed return feed
} }
@ -73,7 +71,6 @@ func NewITunesItemExtension(extensions map[string][]Extension) *ITunesItemExtens
entry.Explicit = parseTextExtension("explicit", extensions) entry.Explicit = parseTextExtension("explicit", extensions)
entry.Subtitle = parseTextExtension("subtitle", extensions) entry.Subtitle = parseTextExtension("subtitle", extensions)
entry.Summary = parseTextExtension("summary", extensions) entry.Summary = parseTextExtension("summary", extensions)
entry.Keywords = parseTextExtension("keywords", extensions)
entry.Image = parseImage(extensions) entry.Image = parseImage(extensions)
entry.IsClosedCaptioned = parseTextExtension("isClosedCaptioned", extensions) entry.IsClosedCaptioned = parseTextExtension("isClosedCaptioned", extensions)
entry.Order = parseTextExtension("order", extensions) entry.Order = parseTextExtension("order", extensions)

View file

@ -1,12 +0,0 @@
module github.com/mmcdole/gofeed
require (
github.com/PuerkitoBio/goquery v1.5.0
github.com/codegangsta/cli v1.20.0
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/stretchr/testify v1.2.2
golang.org/x/net v0.0.0-20181220203305-927f97764cc3
golang.org/x/text v0.3.0
)

View file

@ -1,20 +0,0 @@
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/codegangsta/cli v1.20.0 h1:iX1FXEgwzd5+XN6wk5cVHOGQj6Q3Dcp20lUeS4lHNTw=
github.com/codegangsta/cli v1.20.0/go.mod h1:/qJNoX69yVSKu5o4jLyXAENLRyk1uhi7zkbQ3slBdOA=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181220203305-927f97764cc3 h1:eH6Eip3UpmR+yM/qI9Ijluzb1bNv/cAU/n+6l8tRSis=
golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

View file

@ -18,27 +18,47 @@ var dateFormats = []string{
time.RFC1123, time.RFC1123,
time.ANSIC, time.ANSIC,
"Mon, January 2 2006 15:04:05 -0700", "Mon, January 2 2006 15:04:05 -0700",
"Mon, January 02, 2006, 15:04:05 MST",
"Mon, January 02, 2006 15:04:05 MST",
"Mon, Jan 2, 2006 15:04 MST",
"Mon, Jan 2 2006 15:04 MST",
"Mon, Jan 2, 2006 15:04:05 MST",
"Mon, Jan 2 2006 15:04:05 -700", "Mon, Jan 2 2006 15:04:05 -700",
"Mon, Jan 2 2006 15:04:05 -0700", "Mon, Jan 2 2006 15:04:05 -0700",
"Mon Jan 2 15:04 2006", "Mon Jan 2 15:04 2006",
"Mon Jan 2 15:04:05 2006 MST",
"Mon Jan 02, 2006 3:04 pm", "Mon Jan 02, 2006 3:04 pm",
"Mon, Jan 02,2006 15:04:05 MST",
"Mon Jan 02 2006 15:04:05 -0700", "Mon Jan 02 2006 15:04:05 -0700",
"Monday, January 2, 2006 15:04:05 MST",
"Monday, January 2, 2006 03:04 PM", "Monday, January 2, 2006 03:04 PM",
"Monday, January 2, 2006", "Monday, January 2, 2006",
"Monday, January 02, 2006", "Monday, January 02, 2006",
"Monday, 2 January 2006 15:04:05 MST",
"Monday, 2 January 2006 15:04:05 -0700", "Monday, 2 January 2006 15:04:05 -0700",
"Monday, 2 Jan 2006 15:04:05 MST",
"Monday, 2 Jan 2006 15:04:05 -0700", "Monday, 2 Jan 2006 15:04:05 -0700",
"Monday, 02 January 2006 15:04:05 MST",
"Monday, 02 January 2006 15:04:05 -0700", "Monday, 02 January 2006 15:04:05 -0700",
"Monday, 02 January 2006 15:04:05", "Monday, 02 January 2006 15:04:05",
"Mon, 2 January 2006 15:04 MST",
"Mon, 2 January 2006, 15:04 -0700", "Mon, 2 January 2006, 15:04 -0700",
"Mon, 2 January 2006, 15:04:05 MST",
"Mon, 2 January 2006 15:04:05 MST",
"Mon, 2 January 2006 15:04:05 -0700", "Mon, 2 January 2006 15:04:05 -0700",
"Mon, 2 January 2006", "Mon, 2 January 2006",
"Mon, 2 Jan 2006 3:04:05 PM -0700", "Mon, 2 Jan 2006 3:04:05 PM -0700",
"Mon, 2 Jan 2006 15:4:5 MST",
"Mon, 2 Jan 2006 15:4:5 -0700 GMT", "Mon, 2 Jan 2006 15:4:5 -0700 GMT",
"Mon, 2, Jan 2006 15:4", "Mon, 2, Jan 2006 15:4",
"Mon, 2 Jan 2006 15:04 MST",
"Mon, 2 Jan 2006, 15:04 -0700", "Mon, 2 Jan 2006, 15:04 -0700",
"Mon, 2 Jan 2006 15:04 -0700", "Mon, 2 Jan 2006 15:04 -0700",
"Mon, 2 Jan 2006 15:04:05 UT", "Mon, 2 Jan 2006 15:04:05 UT",
"Mon, 2 Jan 2006 15:04:05MST",
"Mon, 2 Jan 2006 15:04:05 MST",
"Mon 2 Jan 2006 15:04:05 MST",
"mon,2 Jan 2006 15:04:05 MST",
"Mon, 2 Jan 2006 15:04:05 -0700 MST", "Mon, 2 Jan 2006 15:04:05 -0700 MST",
"Mon, 2 Jan 2006 15:04:05-0700", "Mon, 2 Jan 2006 15:04:05-0700",
"Mon, 2 Jan 2006 15:04:05 -0700", "Mon, 2 Jan 2006 15:04:05 -0700",
@ -46,15 +66,25 @@ var dateFormats = []string{
"Mon, 2 Jan 2006 15:04", "Mon, 2 Jan 2006 15:04",
"Mon,2 Jan 2006", "Mon,2 Jan 2006",
"Mon, 2 Jan 2006", "Mon, 2 Jan 2006",
"Mon, 2 Jan 15:04:05 MST",
"Mon, 2 Jan 06 15:04:05 MST",
"Mon, 2 Jan 06 15:04:05 -0700", "Mon, 2 Jan 06 15:04:05 -0700",
"Mon, 2006-01-02 15:04", "Mon, 2006-01-02 15:04",
"Mon,02 January 2006 14:04:05 MST",
"Mon, 02 January 2006", "Mon, 02 January 2006",
"Mon, 02 Jan 2006 3:04:05 PM MST",
"Mon, 02 Jan 2006 15 -0700", "Mon, 02 Jan 2006 15 -0700",
"Mon,02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006 15:04 -0700", "Mon, 02 Jan 2006 15:04 -0700",
"Mon, 02 Jan 2006 15:04:05 Z", "Mon, 02 Jan 2006 15:04:05 Z",
"Mon, 02 Jan 2006 15:04:05 UT", "Mon, 02 Jan 2006 15:04:05 UT",
"Mon, 02 Jan 2006 15:04:05 MST-07:00", "Mon, 02 Jan 2006 15:04:05 MST-07:00",
"Mon, 02 Jan 2006 15:04:05 MST -0700", "Mon, 02 Jan 2006 15:04:05 MST -0700",
"Mon, 02 Jan 2006, 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05MST",
"Mon, 02 Jan 2006 15:04:05 MST",
"Mon , 02 Jan 2006 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05 GMT-0700", "Mon, 02 Jan 2006 15:04:05 GMT-0700",
"Mon,02 Jan 2006 15:04:05 -0700", "Mon,02 Jan 2006 15:04:05 -0700",
"Mon, 02 Jan 2006 15:04:05 -0700", "Mon, 02 Jan 2006 15:04:05 -0700",
@ -65,23 +95,30 @@ var dateFormats = []string{
"Mon, 02 Jan 2006 15:04:05 00", "Mon, 02 Jan 2006 15:04:05 00",
"Mon, 02 Jan 2006 15:04:05", "Mon, 02 Jan 2006 15:04:05",
"Mon, 02 Jan 2006", "Mon, 02 Jan 2006",
"Mon, 02 Jan 06 15:04:05 MST",
"January 2, 2006 3:04 PM", "January 2, 2006 3:04 PM",
"January 2, 2006, 3:04 p.m.", "January 2, 2006, 3:04 p.m.",
"January 2, 2006 15:04:05 MST",
"January 2, 2006 15:04:05", "January 2, 2006 15:04:05",
"January 2, 2006 03:04 PM", "January 2, 2006 03:04 PM",
"January 2, 2006", "January 2, 2006",
"January 02, 2006 15:04:05 MST",
"January 02, 2006 15:04", "January 02, 2006 15:04",
"January 02, 2006 03:04 PM", "January 02, 2006 03:04 PM",
"January 02, 2006", "January 02, 2006",
"Jan 2, 2006 3:04:05 PM MST",
"Jan 2, 2006 3:04:05 PM", "Jan 2, 2006 3:04:05 PM",
"Jan 2, 2006 15:04:05 MST",
"Jan 2, 2006", "Jan 2, 2006",
"Jan 02 2006 03:04:05PM", "Jan 02 2006 03:04:05PM",
"Jan 02, 2006", "Jan 02, 2006",
"6/1/2 15:04", "6/1/2 15:04",
"6-1-2 15:04", "6-1-2 15:04",
"2 January 2006 15:04:05 MST",
"2 January 2006 15:04:05 -0700", "2 January 2006 15:04:05 -0700",
"2 January 2006", "2 January 2006",
"2 Jan 2006 15:04:05 Z", "2 Jan 2006 15:04:05 Z",
"2 Jan 2006 15:04:05 MST",
"2 Jan 2006 15:04:05 -0700", "2 Jan 2006 15:04:05 -0700",
"2 Jan 2006", "2 Jan 2006",
"2.1.2006 15:04:05", "2.1.2006 15:04:05",
@ -104,6 +141,7 @@ var dateFormats = []string{
"2006-01-02T15:04:05", "2006-01-02T15:04:05",
"2006-01-02 at 15:04:05", "2006-01-02 at 15:04:05",
"2006-01-02 15:04:05Z", "2006-01-02 15:04:05Z",
"2006-01-02 15:04:05 MST",
"2006-01-02 15:04:05-0700", "2006-01-02 15:04:05-0700",
"2006-01-02 15:04:05-07:00", "2006-01-02 15:04:05-07:00",
"2006-01-02 15:04:05 -0700", "2006-01-02 15:04:05 -0700",
@ -112,15 +150,21 @@ var dateFormats = []string{
"2006/01/02", "2006/01/02",
"2006-01-02", "2006-01-02",
"15:04 02.01.2006 -0700", "15:04 02.01.2006 -0700",
"1/2/2006 3:04:05 PM MST",
"1/2/2006 3:04:05 PM", "1/2/2006 3:04:05 PM",
"1/2/2006 15:04:05 MST",
"1/2/2006", "1/2/2006",
"06/1/2 15:04", "06/1/2 15:04",
"06-1-2 15:04", "06-1-2 15:04",
"02 Monday, Jan 2006 15:04", "02 Monday, Jan 2006 15:04",
"02 Jan 2006 15:04 MST",
"02 Jan 2006 15:04:05 UT", "02 Jan 2006 15:04:05 UT",
"02 Jan 2006 15:04:05 MST",
"02 Jan 2006 15:04:05 -0700", "02 Jan 2006 15:04:05 -0700",
"02 Jan 2006 15:04:05", "02 Jan 2006 15:04:05",
"02 Jan 2006", "02 Jan 2006",
"02/01/2006 15:04 MST",
"02-01-2006 15:04:05 MST",
"02.01.2006 15:04:05", "02.01.2006 15:04:05",
"02/01/2006 15:04:05", "02/01/2006 15:04:05",
"02.01.2006 15:04", "02.01.2006 15:04",
@ -129,60 +173,12 @@ var dateFormats = []string{
"02/01/2006", "02/01/2006",
"02-01-2006", "02-01-2006",
"01/02/2006 3:04 PM", "01/02/2006 3:04 PM",
"01/02/2006 15:04:05 MST",
"01/02/2006 - 15:04", "01/02/2006 - 15:04",
"01/02/2006", "01/02/2006",
"01-02-2006", "01-02-2006",
} }
// Named zone cannot be consistently loaded, so handle separately
var dateFormatsWithNamedZone = []string{
"Mon, January 02, 2006, 15:04:05 MST",
"Mon, January 02, 2006 15:04:05 MST",
"Mon, Jan 2, 2006 15:04 MST",
"Mon, Jan 2 2006 15:04 MST",
"Mon, Jan 2, 2006 15:04:05 MST",
"Mon Jan 2 15:04:05 2006 MST",
"Mon, Jan 02,2006 15:04:05 MST",
"Monday, January 2, 2006 15:04:05 MST",
"Monday, 2 January 2006 15:04:05 MST",
"Monday, 2 Jan 2006 15:04:05 MST",
"Monday, 02 January 2006 15:04:05 MST",
"Mon, 2 January 2006 15:04 MST",
"Mon, 2 January 2006, 15:04:05 MST",
"Mon, 2 January 2006 15:04:05 MST",
"Mon, 2 Jan 2006 15:4:5 MST",
"Mon, 2 Jan 2006 15:04 MST",
"Mon, 2 Jan 2006 15:04:05MST",
"Mon, 2 Jan 2006 15:04:05 MST",
"Mon 2 Jan 2006 15:04:05 MST",
"mon,2 Jan 2006 15:04:05 MST",
"Mon, 2 Jan 15:04:05 MST",
"Mon, 2 Jan 06 15:04:05 MST",
"Mon,02 January 2006 14:04:05 MST",
"Mon, 02 Jan 2006 3:04:05 PM MST",
"Mon,02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006 15:04 MST",
"Mon, 02 Jan 2006, 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05MST",
"Mon, 02 Jan 2006 15:04:05 MST",
"Mon , 02 Jan 2006 15:04:05 MST",
"Mon, 02 Jan 06 15:04:05 MST",
"January 2, 2006 15:04:05 MST",
"January 02, 2006 15:04:05 MST",
"Jan 2, 2006 3:04:05 PM MST",
"Jan 2, 2006 15:04:05 MST",
"2 January 2006 15:04:05 MST",
"2 Jan 2006 15:04:05 MST",
"2006-01-02 15:04:05 MST",
"1/2/2006 3:04:05 PM MST",
"1/2/2006 15:04:05 MST",
"02 Jan 2006 15:04 MST",
"02 Jan 2006 15:04:05 MST",
"02/01/2006 15:04 MST",
"02-01-2006 15:04:05 MST",
"01/02/2006 15:04:05 MST",
}
// ParseDate parses a given date string using a large // ParseDate parses a given date string using a large
// list of commonly found feed date formats. // list of commonly found feed date formats.
func ParseDate(ds string) (t time.Time, err error) { func ParseDate(ds string) (t time.Time, err error) {
@ -195,25 +191,6 @@ func ParseDate(ds string) (t time.Time, err error) {
return return
} }
} }
for _, f := range dateFormatsWithNamedZone {
t, err = time.Parse(f, d)
if err != nil {
continue
}
// This is a format match! Now try to load the timezone name
loc, err := time.LoadLocation(t.Location().String())
if err != nil {
// We couldn't load the TZ name. Just use UTC instead...
return t, nil
}
if t, err = time.ParseInLocation(f, ds, loc); err == nil {
return t, nil
}
// This should not be reachable
}
err = fmt.Errorf("Failed to parse date: %s", ds) err = fmt.Errorf("Failed to parse date: %s", ds)
return return
} }

View file

@ -8,7 +8,7 @@ import (
"strconv" "strconv"
"strings" "strings"
xpp "github.com/mmcdole/goxpp" "github.com/mmcdole/goxpp"
) )
var ( var (
@ -21,8 +21,48 @@ var (
InvalidNumericReference = errors.New("invalid numeric reference") InvalidNumericReference = errors.New("invalid numeric reference")
) )
const CDATA_START = "<![CDATA[" // FindRoot iterates through the tokens of an xml document until
const CDATA_END = "]]>" // it encounters its first StartTag event. It returns an error
// if it reaches EndDocument before finding a tag.
func FindRoot(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
event, err = p.Next()
if err != nil {
return event, err
}
if event == xpp.StartTag {
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find root node before document end.")
}
}
return
}
// NextTag iterates through the tokens until it reaches a StartTag or EndTag
// It is similar to goxpp's NextTag method except it wont throw an error if
// the next immediate token isnt a Start/EndTag. Instead, it will continue to
// consume tokens until it hits a Start/EndTag or EndDocument.
func NextTag(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
event, err = p.Next()
if err != nil {
return event, err
}
if event == xpp.StartTag || event == xpp.EndTag {
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find NextTag before reaching the end of the document.")
}
}
return
}
// ParseText is a helper function for parsing the text // ParseText is a helper function for parsing the text
// from the current element of the XMLPullParser. // from the current element of the XMLPullParser.
@ -42,46 +82,16 @@ func ParseText(p *xpp.XMLPullParser) (string, error) {
result := text.InnerXML result := text.InnerXML
result = strings.TrimSpace(result) result = strings.TrimSpace(result)
if strings.Contains(result, CDATA_START) { if strings.HasPrefix(result, "<![CDATA[") &&
return StripCDATA(result), nil strings.HasSuffix(result, "]]>") {
result = strings.TrimPrefix(result, "<![CDATA[")
result = strings.TrimSuffix(result, "]]>")
return result, nil
} }
return DecodeEntities(result) return DecodeEntities(result)
} }
// StripCDATA removes CDATA tags from the string
// content outside of CDATA tags is passed via DecodeEntities
func StripCDATA(str string) string {
buf := bytes.NewBuffer([]byte{})
curr := 0
for curr < len(str) {
start := indexAt(str, CDATA_START, curr)
if start == -1 {
dec, _ := DecodeEntities(str[curr:])
buf.Write([]byte(dec))
return buf.String()
}
end := indexAt(str, CDATA_END, start)
if end == -1 {
dec, _ := DecodeEntities(str[curr:])
buf.Write([]byte(dec))
return buf.String()
}
buf.Write([]byte(str[start+len(CDATA_START) : end]))
curr = curr + end + len(CDATA_END)
}
return buf.String()
}
// DecodeEntities decodes escaped XML entities // DecodeEntities decodes escaped XML entities
// in a string and returns the unescaped string // in a string and returns the unescaped string
func DecodeEntities(str string) (string, error) { func DecodeEntities(str string) (string, error) {
@ -184,11 +194,3 @@ func ParseNameAddress(nameAddressText string) (name string, address string) {
} }
return return
} }
func indexAt(str, substr string, start int) int {
idx := strings.Index(str[start:], substr)
if idx > -1 {
idx += start
}
return idx
}

View file

@ -1,258 +0,0 @@
package shared
import (
"bytes"
"fmt"
"golang.org/x/net/html"
"net/url"
"strings"
"github.com/mmcdole/goxpp"
)
var (
// HTML attributes which contain URIs
// https://pythonhosted.org/feedparser/resolving-relative-links.html
// To catch every possible URI attribute is non-trivial:
// https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
htmlURIAttrs = map[string]bool{
"action": true,
"background": true,
"cite": true,
"codebase": true,
"data": true,
"href": true,
"poster": true,
"profile": true,
"scheme": true,
"src": true,
"uri": true,
"usemap": true,
}
)
type urlStack []*url.URL
func (s *urlStack) push(u *url.URL) {
*s = append([]*url.URL{u}, *s...)
}
func (s *urlStack) pop() *url.URL {
if s == nil || len(*s) == 0 {
return nil
}
var top *url.URL
top, *s = (*s)[0], (*s)[1:]
return top
}
func (s *urlStack) top() *url.URL {
if s == nil || len(*s) == 0 {
return nil
}
return (*s)[0]
}
type XMLBase struct {
stack urlStack
URIAttrs map[string]bool
}
// FindRoot iterates through the tokens of an xml document until
// it encounters its first StartTag event. It returns an error
// if it reaches EndDocument before finding a tag.
func (b *XMLBase) FindRoot(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
event, err = b.NextTag(p)
if err != nil {
return event, err
}
if event == xpp.StartTag {
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find root node before document end.")
}
}
return
}
// XMLBase.NextTag iterates through the tokens until it reaches a StartTag or
// EndTag It maintains the urlStack upon encountering StartTag and EndTags, so
// that the top of the stack (accessible through the CurrentBase() and
// CurrentBaseURL() methods) is the absolute base URI by which relative URIs
// should be resolved.
//
// NextTag is similar to goxpp's NextTag method except it wont throw an error
// if the next immediate token isnt a Start/EndTag. Instead, it will continue
// to consume tokens until it hits a Start/EndTag or EndDocument.
func (b *XMLBase) NextTag(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
for {
if p.Event == xpp.EndTag {
// Pop xml:base after each end tag
b.pop()
}
event, err = p.Next()
if err != nil {
return event, err
}
if event == xpp.EndTag {
break
}
if event == xpp.StartTag {
base := parseBase(p)
err = b.push(base)
if err != nil {
return
}
err = b.resolveAttrs(p)
if err != nil {
return
}
break
}
if event == xpp.EndDocument {
return event, fmt.Errorf("Failed to find NextTag before reaching the end of the document.")
}
}
return
}
func parseBase(p *xpp.XMLPullParser) string {
xmlURI := "http://www.w3.org/XML/1998/namespace"
for _, attr := range p.Attrs {
if attr.Name.Local == "base" && attr.Name.Space == xmlURI {
return attr.Value
}
}
return ""
}
func (b *XMLBase) push(base string) error {
newURL, err := url.Parse(base)
if err != nil {
return err
}
topURL := b.CurrentBaseURL()
if topURL != nil {
newURL = topURL.ResolveReference(newURL)
}
b.stack.push(newURL)
return nil
}
// returns the popped base URL
func (b *XMLBase) pop() string {
url := b.stack.pop()
if url != nil {
return url.String()
}
return ""
}
func (b *XMLBase) CurrentBaseURL() *url.URL {
return b.stack.top()
}
func (b *XMLBase) CurrentBase() string {
if url := b.CurrentBaseURL(); url != nil {
return url.String()
}
return ""
}
// resolve the given string as a URL relative to current base
func (b *XMLBase) ResolveURL(u string) (string, error) {
if b.CurrentBase() == "" {
return u, nil
}
relURL, err := url.Parse(u)
if err != nil {
return u, err
}
curr := b.CurrentBaseURL()
if curr.Path != "" && u != "" && curr.Path[len(curr.Path)-1] != '/' {
// There's no reason someone would use a path in xml:base if they
// didn't mean for it to be a directory
curr.Path = curr.Path + "/"
}
absURL := b.CurrentBaseURL().ResolveReference(relURL)
return absURL.String(), nil
}
// resolve relative URI attributes according to xml:base
func (b *XMLBase) resolveAttrs(p *xpp.XMLPullParser) error {
for i, attr := range p.Attrs {
lowerName := strings.ToLower(attr.Name.Local)
if b.URIAttrs[lowerName] {
absURL, err := b.ResolveURL(attr.Value)
if err != nil {
return err
}
p.Attrs[i].Value = absURL
}
}
return nil
}
// Transforms html by resolving any relative URIs in attributes
// if an error occurs during parsing or serialization, then the original string
// is returned along with the error.
func (b *XMLBase) ResolveHTML(relHTML string) (string, error) {
if b.CurrentBase() == "" {
return relHTML, nil
}
htmlReader := strings.NewReader(relHTML)
doc, err := html.Parse(htmlReader)
if err != nil {
return relHTML, err
}
var visit func(*html.Node)
// recursively traverse HTML resolving any relative URIs in attributes
visit = func(n *html.Node) {
if n.Type == html.ElementNode {
for i, a := range n.Attr {
if htmlURIAttrs[a.Key] {
absVal, err := b.ResolveURL(a.Val)
if err == nil {
n.Attr[i].Val = absVal
}
break
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
visit(c)
}
}
visit(doc)
var w bytes.Buffer
err = html.Render(&w, doc)
if err != nil {
return relHTML, err
}
// html.Render() always writes a complete html5 document, so strip the html
// and body tags
absHTML := w.String()
absHTML = strings.TrimPrefix(absHTML, "<html><head></head><body>")
absHTML = strings.TrimSuffix(absHTML, "</body></html>")
return absHTML, err
}

View file

@ -12,10 +12,6 @@ import (
"github.com/mmcdole/gofeed/rss" "github.com/mmcdole/gofeed/rss"
) )
// ErrFeedTypeNotDetected is returned when the detection system can not figure
// out the Feed format
var ErrFeedTypeNotDetected = errors.New("Failed to detect feed type")
// HTTPError represents an HTTP error returned by a server. // HTTPError represents an HTTP error returned by a server.
type HTTPError struct { type HTTPError struct {
StatusCode int StatusCode int
@ -69,21 +65,14 @@ func (f *Parser) Parse(feed io.Reader) (*Feed, error) {
case FeedTypeRSS: case FeedTypeRSS:
return f.parseRSSFeed(r) return f.parseRSSFeed(r)
} }
return nil, errors.New("Failed to detect feed type")
return nil, ErrFeedTypeNotDetected
} }
// ParseURL fetches the contents of a given url and // ParseURL fetches the contents of a given url and
// attempts to parse the response into the universal feed type. // attempts to parse the response into the universal feed type.
func (f *Parser) ParseURL(feedURL string) (feed *Feed, err error) { func (f *Parser) ParseURL(feedURL string) (feed *Feed, err error) {
client := f.httpClient() client := f.httpClient()
resp, err := client.Get(feedURL)
req, err := http.NewRequest("GET", feedURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Gofeed/1.0")
resp, err := client.Do(req)
if err != nil { if err != nil {
return nil, err return nil, err

View file

@ -5,22 +5,19 @@ import (
"io" "io"
"strings" "strings"
ext "github.com/mmcdole/gofeed/extensions" "github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/gofeed/internal/shared" "github.com/mmcdole/gofeed/internal/shared"
xpp "github.com/mmcdole/goxpp" "github.com/mmcdole/goxpp"
) )
// Parser is a RSS Parser // Parser is a RSS Parser
type Parser struct { type Parser struct{}
base *shared.XMLBase
}
// Parse parses an xml feed into an rss.Feed // Parse parses an xml feed into an rss.Feed
func (rp *Parser) Parse(feed io.Reader) (*Feed, error) { func (rp *Parser) Parse(feed io.Reader) (*Feed, error) {
p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel) p := xpp.NewXMLPullParser(feed, false, shared.NewReaderLabel)
rp.base = &shared.XMLBase{}
_, err := rp.base.FindRoot(p) _, err := shared.FindRoot(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -44,7 +41,7 @@ func (rp *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
ver := rp.parseVersion(p) ver := rp.parseVersion(p)
for { for {
tok, err := rp.base.NextTag(p) tok, err := shared.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -130,7 +127,7 @@ func (rp *Parser) parseChannel(p *xpp.XMLPullParser) (rss *Feed, err error) {
categories := []*Category{} categories := []*Category{}
for { for {
tok, err := rp.base.NextTag(p) tok, err := shared.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -321,7 +318,7 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) {
categories := []*Category{} categories := []*Category{}
for { for {
tok, err := rp.base.NextTag(p) tok, err := shared.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -495,7 +492,7 @@ func (rp *Parser) parseImage(p *xpp.XMLPullParser) (image *Image, err error) {
image = &Image{} image = &Image{}
for { for {
tok, err := rp.base.NextTag(p) tok, err := shared.NextTag(p)
if err != nil { if err != nil {
return image, err return image, err
} }
@ -607,7 +604,7 @@ func (rp *Parser) parseTextInput(p *xpp.XMLPullParser) (*TextInput, error) {
ti := &TextInput{} ti := &TextInput{}
for { for {
tok, err := rp.base.NextTag(p) tok, err := shared.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -664,7 +661,7 @@ func (rp *Parser) parseSkipHours(p *xpp.XMLPullParser) ([]string, error) {
hours := []string{} hours := []string{}
for { for {
tok, err := rp.base.NextTag(p) tok, err := shared.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -702,7 +699,7 @@ func (rp *Parser) parseSkipDays(p *xpp.XMLPullParser) ([]string, error) {
days := []string{} days := []string{}
for { for {
tok, err := rp.base.NextTag(p) tok, err := shared.NextTag(p)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -744,7 +741,7 @@ func (rp *Parser) parseCloud(p *xpp.XMLPullParser) (*Cloud, error) {
cloud.RegisterProcedure = p.Attribute("registerProcedure") cloud.RegisterProcedure = p.Attribute("registerProcedure")
cloud.Protocol = p.Attribute("protocol") cloud.Protocol = p.Attribute("protocol")
rp.base.NextTag(p) shared.NextTag(p)
if err := p.Expect(xpp.EndTag, "cloud"); err != nil { if err := p.Expect(xpp.EndTag, "cloud"); err != nil {
return nil, err return nil, err

View file

@ -6,7 +6,7 @@ import (
"time" "time"
"github.com/mmcdole/gofeed/atom" "github.com/mmcdole/gofeed/atom"
ext "github.com/mmcdole/gofeed/extensions" "github.com/mmcdole/gofeed/extensions"
"github.com/mmcdole/gofeed/internal/shared" "github.com/mmcdole/gofeed/internal/shared"
"github.com/mmcdole/gofeed/rss" "github.com/mmcdole/gofeed/rss"
) )

24
vendor/github.com/mmcdole/goxpp/.gitignore generated vendored Normal file
View file

@ -0,0 +1,24 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof

17
vendor/github.com/mmcdole/goxpp/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,17 @@
language: go
go:
- tip
- 1.6
- 1.5
- 1.4
install:
- go get github.com/stretchr/testify/assert
- go get golang.org/x/tools/cmd/cover
- go get github.com/mattn/goveralls
script:
- go test -v -covermode=count -coverprofile=coverage.out
- $HOME/gopath/bin/goveralls -coverprofile=coverage.out -service=travis-ci -repotoken=$COVERALLS_TOKEN
env:
global:
secure: IZqRp8DmY6LLP+9PTjttLFuCeA/IqW4qQWGnNqyc8nwKoqbHTVY/MZav9WrocGBZZGdI/zLghnE3wp2bywPyBoBPJv9oq7NUZ47DGpdkaLazgP9O68dtshOZ/zDZU7kSbGDF6mePVYjPjkBkWFyt13np89n/bB0L0zdsQE3bKukJ0lSnN32aOjphL+nFEfxM5ghnOuZ7ZDyXTjehQNkNZG73T2ttic8pxi1M+xxl1FXODNgTz0D6qs2ZdKSjJeE9n5iOJqNxIRfW1iIXPn8L2UQBV1+8aohxGy22flwz1ZCO2MZJLqdR1apGZdqVrYhKjxOnyyRWfEX3mpl6/EiW1gLqPgpzjKPuu/wiwfUJOBdFbrn5WGoR6f16XJ6bmxo4NGUEtBXeZz932HWl8XXD+CAGfLNw1NAabH1HpNAYBd4CFpiIi5RtU5sRtumzXwjvgHxlRhwIRb6jWqWezBbRL10MrnhnUyyXu1AWV7LrSvPDbXZ/5NL4/fbW/Piop1vhuNhHauLvZxWa1yv9q1CN8Uad8KEiJcjRj6lac7CCTspoEaPEVlL01tJZDllga1XQCJcRJVPSzt0qEzG2XqhwjWaRkbMLqLXJ9/0YsY/QU9BA0mtPwo+2e2J8ZRmqqXNatysNk9l5sH5TBk3lP+n4jFtI0nGndCxwqPHFGsjhPsE=

15
vendor/github.com/olekukonko/tablewriter/.gitignore generated vendored Normal file
View file

@ -0,0 +1,15 @@
# Created by .ignore support plugin (hsz.mobi)
### Go template
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, build with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out

14
vendor/github.com/olekukonko/tablewriter/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,14 @@
language: go
go:
- 1.1
- 1.2
- 1.3
- 1.4
- 1.5
- 1.6
- 1.7
- 1.8
- 1.9
- "1.10"
- tip

View file

@ -233,7 +233,63 @@ table.Render()
#### Table with color Output #### Table with color Output
![Table with Color](https://cloud.githubusercontent.com/assets/6460392/21101956/bbc7b356-c0a1-11e6-9f36-dba694746efc.png) ![Table with Color](https://cloud.githubusercontent.com/assets/6460392/21101956/bbc7b356-c0a1-11e6-9f36-dba694746efc.png)
#### Example 6 - Set table caption #### Example - 7 Table Cells with Color
Individual Cell Colors from `func Rich` take precedence over Column Colors
```go
data := [][]string{
[]string{"Test1Merge", "HelloCol2 - 1", "HelloCol3 - 1", "HelloCol4 - 1"},
[]string{"Test1Merge", "HelloCol2 - 2", "HelloCol3 - 2", "HelloCol4 - 2"},
[]string{"Test1Merge", "HelloCol2 - 3", "HelloCol3 - 3", "HelloCol4 - 3"},
[]string{"Test2Merge", "HelloCol2 - 4", "HelloCol3 - 4", "HelloCol4 - 4"},
[]string{"Test2Merge", "HelloCol2 - 5", "HelloCol3 - 5", "HelloCol4 - 5"},
[]string{"Test2Merge", "HelloCol2 - 6", "HelloCol3 - 6", "HelloCol4 - 6"},
[]string{"Test2Merge", "HelloCol2 - 7", "HelloCol3 - 7", "HelloCol4 - 7"},
[]string{"Test3Merge", "HelloCol2 - 8", "HelloCol3 - 8", "HelloCol4 - 8"},
[]string{"Test3Merge", "HelloCol2 - 9", "HelloCol3 - 9", "HelloCol4 - 9"},
[]string{"Test3Merge", "HelloCol2 - 10", "HelloCol3 -10", "HelloCol4 - 10"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Col1", "Col2", "Col3", "Col4"})
table.SetFooter([]string{"", "", "Footer3", "Footer4"})
table.SetBorder(false)
table.SetHeaderColor(tablewriter.Colors{tablewriter.Bold, tablewriter.BgGreenColor},
tablewriter.Colors{tablewriter.FgHiRedColor, tablewriter.Bold, tablewriter.BgBlackColor},
tablewriter.Colors{tablewriter.BgRedColor, tablewriter.FgWhiteColor},
tablewriter.Colors{tablewriter.BgCyanColor, tablewriter.FgWhiteColor})
table.SetColumnColor(tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiBlackColor},
tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiRedColor},
tablewriter.Colors{tablewriter.Bold, tablewriter.FgHiBlackColor},
tablewriter.Colors{tablewriter.Bold, tablewriter.FgBlackColor})
table.SetFooterColor(tablewriter.Colors{}, tablewriter.Colors{},
tablewriter.Colors{tablewriter.Bold},
tablewriter.Colors{tablewriter.FgHiRedColor})
colorData1 := []string{"TestCOLOR1Merge", "HelloCol2 - COLOR1", "HelloCol3 - COLOR1", "HelloCol4 - COLOR1"}
colorData2 := []string{"TestCOLOR2Merge", "HelloCol2 - COLOR2", "HelloCol3 - COLOR2", "HelloCol4 - COLOR2"}
for i, row := range data {
if i == 4 {
table.Rich(colorData1, []tablewriter.Colors{tablewriter.Colors{}, tablewriter.Colors{tablewriter.Normal, tablewriter.FgCyanColor}, tablewriter.Colors{tablewriter.Bold, tablewriter.FgWhiteColor}, tablewriter.Colors{}})
table.Rich(colorData2, []tablewriter.Colors{tablewriter.Colors{tablewriter.Normal, tablewriter.FgMagentaColor}, tablewriter.Colors{}, tablewriter.Colors{tablewriter.Bold, tablewriter.BgRedColor}, tablewriter.Colors{tablewriter.FgHiGreenColor, tablewriter.Italic, tablewriter.BgHiCyanColor}})
}
table.Append(row)
}
table.SetAutoMergeCells(true)
table.Render()
```
##### Table cells with color Output
![Table cells with Color](https://user-images.githubusercontent.com/9064687/63969376-bcd88d80-ca6f-11e9-9466-c3d954700b25.png)
#### Example 8 - Set table caption
```go ```go
data := [][]string{ data := [][]string{
[]string{"A", "The Good", "500"}, []string{"A", "The Good", "500"},
@ -254,7 +310,7 @@ table.Render() // Send output
Note: Caption text will wrap with total width of rendered table. Note: Caption text will wrap with total width of rendered table.
##### Output 6 ##### Output 7
``` ```
+------+-----------------------+--------+ +------+-----------------------+--------+
| NAME | SIGN | RATING | | NAME | SIGN | RATING |
@ -267,6 +323,41 @@ Note: Caption text will wrap with total width of rendered table.
Movie ratings. Movie ratings.
``` ```
#### Example 8 - Set NoWhiteSpace and TablePadding option
```go
data := [][]string{
{"node1.example.com", "Ready", "compute", "1.11"},
{"node2.example.com", "Ready", "compute", "1.11"},
{"node3.example.com", "Ready", "compute", "1.11"},
{"node4.example.com", "NotReady", "compute", "1.11"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Name", "Status", "Role", "Version"})
table.SetAutoWrapText(false)
table.SetAutoFormatHeaders(true)
table.SetHeaderAlignment(ALIGN_LEFT)
table.SetAlignment(ALIGN_LEFT)
table.SetCenterSeparator("")
table.SetColumnSeparator("")
table.SetRowSeparator("")
table.SetHeaderLine(false)
table.SetBorder(false)
table.SetTablePadding("\t") // pad with tabs
table.SetNoWhiteSpace(true)
table.AppendBulk(data) // Add Bulk Data
table.Render()
```
##### Output 8
```
NAME STATUS ROLE VERSION
node1.example.com Ready compute 1.11
node2.example.com Ready compute 1.11
node3.example.com Ready compute 1.11
node4.example.com NotReady compute 1.11
```
#### Render table into a string #### Render table into a string
Instead of rendering the table to `io.Stdout` you can also render it into a string. Go 1.10 introduced the `strings.Builder` type which implements the `io.Writer` interface and can therefore be used for this task. Example: Instead of rendering the table to `io.Stdout` you can also render it into a string. Go 1.10 introduced the `strings.Builder` type which implements the `io.Writer` interface and can therefore be used for this task. Example:

View file

@ -2,7 +2,4 @@ module github.com/olekukonko/tablewriter
go 1.12 go 1.12
require ( require github.com/mattn/go-runewidth v0.0.7
github.com/mattn/go-runewidth v0.0.4
github.com/olekukonko/tablewriter v0.0.1
)

View file

@ -1,4 +1,2 @@
github.com/mattn/go-runewidth v0.0.4 h1:2BvfKmzob6Bmd4YsL0zygOqfdFnK7GR4QL06Do4/p7Y= github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54=
github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/olekukonko/tablewriter v0.0.1 h1:b3iUnf1v+ppJiOfNX4yxxqfWKMQPZR5yoh8urCTFX88=
github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo=

View file

@ -72,6 +72,8 @@ type Table struct {
newLine string newLine string
rowLine bool rowLine bool
autoMergeCells bool autoMergeCells bool
noWhiteSpace bool
tablePadding string
hdrLine bool hdrLine bool
borders Border borders Border
colSize int colSize int
@ -225,6 +227,16 @@ func (t *Table) SetAlignment(align int) {
t.align = align t.align = align
} }
// Set No White Space
func (t *Table) SetNoWhiteSpace(allow bool) {
t.noWhiteSpace = allow
}
// Set Table Padding
func (t *Table) SetTablePadding(padding string) {
t.tablePadding = padding
}
func (t *Table) SetColumnAlignment(keys []int) { func (t *Table) SetColumnAlignment(keys []int) {
for _, v := range keys { for _, v := range keys {
switch v { switch v {
@ -296,6 +308,33 @@ func (t *Table) Append(row []string) {
t.lines = append(t.lines, line) t.lines = append(t.lines, line)
} }
// Append row to table with color attributes
func (t *Table) Rich(row []string, colors []Colors) {
rowSize := len(t.headers)
if rowSize > t.colSize {
t.colSize = rowSize
}
n := len(t.lines)
line := [][]string{}
for i, v := range row {
// Detect string width
// Detect String height
// Break strings into words
out := t.parseDimension(v, i, n)
if len(colors) > i {
color := colors[i]
out[0] = format(out[0], color)
}
// Append broken words
line = append(line, out)
}
t.lines = append(t.lines, line)
}
// Allow Support for Bulk Append // Allow Support for Bulk Append
// Eliminates repeated for loops // Eliminates repeated for loops
func (t *Table) AppendBulk(rows [][]string) { func (t *Table) AppendBulk(rows [][]string) {
@ -411,11 +450,14 @@ func (t *Table) printHeading() {
for x := 0; x < max; x++ { for x := 0; x < max; x++ {
// Check if border is set // Check if border is set
// Replace with space if not set // Replace with space if not set
if !t.noWhiteSpace {
fmt.Fprint(t.out, ConditionString(t.borders.Left, t.pColumn, SPACE)) fmt.Fprint(t.out, ConditionString(t.borders.Left, t.pColumn, SPACE))
}
for y := 0; y <= end; y++ { for y := 0; y <= end; y++ {
v := t.cs[y] v := t.cs[y]
h := "" h := ""
if y < len(t.headers) && x < len(t.headers[y]) { if y < len(t.headers) && x < len(t.headers[y]) {
h = t.headers[y][x] h = t.headers[y][x]
} }
@ -423,15 +465,30 @@ func (t *Table) printHeading() {
h = Title(h) h = Title(h)
} }
pad := ConditionString((y == end && !t.borders.Left), SPACE, t.pColumn) pad := ConditionString((y == end && !t.borders.Left), SPACE, t.pColumn)
if t.noWhiteSpace {
pad = ConditionString((y == end && !t.borders.Left), SPACE, t.tablePadding)
}
if is_esc_seq { if is_esc_seq {
if !t.noWhiteSpace {
fmt.Fprintf(t.out, " %s %s", fmt.Fprintf(t.out, " %s %s",
format(padFunc(h, SPACE, v), format(padFunc(h, SPACE, v),
t.headerParams[y]), pad) t.headerParams[y]), pad)
} else { } else {
fmt.Fprintf(t.out, "%s %s",
format(padFunc(h, SPACE, v),
t.headerParams[y]), pad)
}
} else {
if !t.noWhiteSpace {
fmt.Fprintf(t.out, " %s %s", fmt.Fprintf(t.out, " %s %s",
padFunc(h, SPACE, v), padFunc(h, SPACE, v),
pad) pad)
} else {
// the spaces between breaks the kube formatting
fmt.Fprintf(t.out, "%s%s",
padFunc(h, SPACE, v),
pad)
}
} }
} }
// Next line // Next line
@ -654,9 +711,11 @@ func (t *Table) printRow(columns [][]string, rowIdx int) {
for y := 0; y < total; y++ { for y := 0; y < total; y++ {
// Check if border is set // Check if border is set
if !t.noWhiteSpace {
fmt.Fprint(t.out, ConditionString((!t.borders.Left && y == 0), SPACE, t.pColumn)) fmt.Fprint(t.out, ConditionString((!t.borders.Left && y == 0), SPACE, t.pColumn))
fmt.Fprintf(t.out, SPACE) fmt.Fprintf(t.out, SPACE)
}
str := columns[y][x] str := columns[y][x]
// Embedding escape sequence with column value // Embedding escape sequence with column value
@ -688,11 +747,17 @@ func (t *Table) printRow(columns [][]string, rowIdx int) {
} }
} }
if !t.noWhiteSpace {
fmt.Fprintf(t.out, SPACE) fmt.Fprintf(t.out, SPACE)
} else {
fmt.Fprintf(t.out, t.tablePadding)
}
} }
// Check if border is set // Check if border is set
// Replace with space if not set // Replace with space if not set
if !t.noWhiteSpace {
fmt.Fprint(t.out, ConditionString(t.borders.Left, t.pColumn, SPACE)) fmt.Fprint(t.out, ConditionString(t.borders.Left, t.pColumn, SPACE))
}
fmt.Fprint(t.out, t.newLine) fmt.Fprint(t.out, t.newLine)
} }
@ -804,7 +869,7 @@ func (t *Table) printRowMergeCells(writer io.Writer, columns [][]string, rowIdx
//The new previous line is the current one //The new previous line is the current one
previousLine = make([]string, total) previousLine = make([]string, total)
for y := 0; y < total; y++ { for y := 0; y < total; y++ {
previousLine[y] = strings.TrimRight(strings.Join(columns[y], " ")," ") //Store the full line for multi-lines cells previousLine[y] = strings.TrimRight(strings.Join(columns[y], " "), " ") //Store the full line for multi-lines cells
} }
//Returns the newly added line and wether or not a border should be displayed above. //Returns the newly added line and wether or not a border should be displayed above.
return previousLine, displayCellBorder return previousLine, displayCellBorder

View file

@ -89,6 +89,8 @@ func format(s string, codes interface{}) string {
seq = v seq = v
case []int: case []int:
seq = makeSequence(v) seq = makeSequence(v)
case Colors:
seq = makeSequence(v)
default: default:
return s return s
} }

14
vendor/github.com/ssor/bom/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,14 @@
language: go
go:
- tip
- 1.8
- 1.7
- 1.6
- 1.5
- 1.4
- 1.3
- 1.2
notifications:
email:
on_success: change
on_failure: always

3
vendor/golang.org/x/net/AUTHORS generated vendored Normal file
View file

@ -0,0 +1,3 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

3
vendor/golang.org/x/net/CONTRIBUTORS generated vendored Normal file
View file

@ -0,0 +1,3 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

View file

@ -1,712 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
//go:generate go run gen.go
//go:generate go run gen.go -test
package main
import (
"bytes"
"flag"
"fmt"
"go/format"
"io/ioutil"
"math/rand"
"os"
"sort"
"strings"
)
// identifier converts s to a Go exported identifier.
// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
func identifier(s string) string {
b := make([]byte, 0, len(s))
cap := true
for _, c := range s {
if c == '-' {
cap = true
continue
}
if cap && 'a' <= c && c <= 'z' {
c -= 'a' - 'A'
}
cap = false
b = append(b, byte(c))
}
return string(b)
}
var test = flag.Bool("test", false, "generate table_test.go")
func genFile(name string, buf *bytes.Buffer) {
b, err := format.Source(buf.Bytes())
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
if err := ioutil.WriteFile(name, b, 0644); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
func main() {
flag.Parse()
var all []string
all = append(all, elements...)
all = append(all, attributes...)
all = append(all, eventHandlers...)
all = append(all, extra...)
sort.Strings(all)
// uniq - lists have dups
w := 0
for _, s := range all {
if w == 0 || all[w-1] != s {
all[w] = s
w++
}
}
all = all[:w]
if *test {
var buf bytes.Buffer
fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n")
fmt.Fprintln(&buf, "package atom\n")
fmt.Fprintln(&buf, "var testAtomList = []string{")
for _, s := range all {
fmt.Fprintf(&buf, "\t%q,\n", s)
}
fmt.Fprintln(&buf, "}")
genFile("table_test.go", &buf)
return
}
// Find hash that minimizes table size.
var best *table
for i := 0; i < 1000000; i++ {
if best != nil && 1<<(best.k-1) < len(all) {
break
}
h := rand.Uint32()
for k := uint(0); k <= 16; k++ {
if best != nil && k >= best.k {
break
}
var t table
if t.init(h, k, all) {
best = &t
break
}
}
}
if best == nil {
fmt.Fprintf(os.Stderr, "failed to construct string table\n")
os.Exit(1)
}
// Lay out strings, using overlaps when possible.
layout := append([]string{}, all...)
// Remove strings that are substrings of other strings
for changed := true; changed; {
changed = false
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i != j && t != "" && strings.Contains(s, t) {
changed = true
layout[j] = ""
}
}
}
}
// Join strings where one suffix matches another prefix.
for {
// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
// maximizing overlap length k.
besti := -1
bestj := -1
bestk := 0
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i == j {
continue
}
for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
if s[len(s)-k:] == t[:k] {
besti = i
bestj = j
bestk = k
}
}
}
}
if bestk > 0 {
layout[besti] += layout[bestj][bestk:]
layout[bestj] = ""
continue
}
break
}
text := strings.Join(layout, "")
atom := map[string]uint32{}
for _, s := range all {
off := strings.Index(text, s)
if off < 0 {
panic("lost string " + s)
}
atom[s] = uint32(off<<8 | len(s))
}
var buf bytes.Buffer
// Generate the Go code.
fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
fmt.Fprintln(&buf, "//go:generate go run gen.go\n")
fmt.Fprintln(&buf, "package atom\n\nconst (")
// compute max len
maxLen := 0
for _, s := range all {
if maxLen < len(s) {
maxLen = len(s)
}
fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s])
}
fmt.Fprintln(&buf, ")\n")
fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0)
fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen)
fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k)
for i, s := range best.tab {
if s == "" {
continue
}
fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s)
}
fmt.Fprintf(&buf, "}\n")
datasize := (1 << best.k) * 4
fmt.Fprintln(&buf, "const atomText =")
textsize := len(text)
for len(text) > 60 {
fmt.Fprintf(&buf, "\t%q +\n", text[:60])
text = text[60:]
}
fmt.Fprintf(&buf, "\t%q\n\n", text)
genFile("table.go", &buf)
fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
}
type byLen []string
func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x byLen) Len() int { return len(x) }
// fnv computes the FNV hash with an arbitrary starting value h.
func fnv(h uint32, s string) uint32 {
for i := 0; i < len(s); i++ {
h ^= uint32(s[i])
h *= 16777619
}
return h
}
// A table represents an attempt at constructing the lookup table.
// The lookup table uses cuckoo hashing, meaning that each string
// can be found in one of two positions.
type table struct {
h0 uint32
k uint
mask uint32
tab []string
}
// hash returns the two hashes for s.
func (t *table) hash(s string) (h1, h2 uint32) {
h := fnv(t.h0, s)
h1 = h & t.mask
h2 = (h >> 16) & t.mask
return
}
// init initializes the table with the given parameters.
// h0 is the initial hash value,
// k is the number of bits of hash value to use, and
// x is the list of strings to store in the table.
// init returns false if the table cannot be constructed.
func (t *table) init(h0 uint32, k uint, x []string) bool {
t.h0 = h0
t.k = k
t.tab = make([]string, 1<<k)
t.mask = 1<<k - 1
for _, s := range x {
if !t.insert(s) {
return false
}
}
return true
}
// insert inserts s in the table.
func (t *table) insert(s string) bool {
h1, h2 := t.hash(s)
if t.tab[h1] == "" {
t.tab[h1] = s
return true
}
if t.tab[h2] == "" {
t.tab[h2] = s
return true
}
if t.push(h1, 0) {
t.tab[h1] = s
return true
}
if t.push(h2, 0) {
t.tab[h2] = s
return true
}
return false
}
// push attempts to push aside the entry in slot i.
func (t *table) push(i uint32, depth int) bool {
if depth > len(t.tab) {
return false
}
s := t.tab[i]
h1, h2 := t.hash(s)
j := h1 + h2 - i
if t.tab[j] != "" && !t.push(j, depth+1) {
return false
}
t.tab[j] = s
return true
}
// The lists of element names and attribute keys were taken from
// https://html.spec.whatwg.org/multipage/indices.html#index
// as of the "HTML Living Standard - Last Updated 16 April 2018" version.
// "command", "keygen" and "menuitem" have been removed from the spec,
// but are kept here for backwards compatibility.
var elements = []string{
"a",
"abbr",
"address",
"area",
"article",
"aside",
"audio",
"b",
"base",
"bdi",
"bdo",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"cite",
"code",
"col",
"colgroup",
"command",
"data",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"div",
"dl",
"dt",
"em",
"embed",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"i",
"iframe",
"img",
"input",
"ins",
"kbd",
"keygen",
"label",
"legend",
"li",
"link",
"main",
"map",
"mark",
"menu",
"menuitem",
"meta",
"meter",
"nav",
"noscript",
"object",
"ol",
"optgroup",
"option",
"output",
"p",
"param",
"picture",
"pre",
"progress",
"q",
"rp",
"rt",
"ruby",
"s",
"samp",
"script",
"section",
"select",
"slot",
"small",
"source",
"span",
"strong",
"style",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"tr",
"track",
"u",
"ul",
"var",
"video",
"wbr",
}
// https://html.spec.whatwg.org/multipage/indices.html#attributes-3
//
// "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup",
// "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec,
// but are kept here for backwards compatibility.
var attributes = []string{
"abbr",
"accept",
"accept-charset",
"accesskey",
"action",
"allowfullscreen",
"allowpaymentrequest",
"allowusermedia",
"alt",
"as",
"async",
"autocomplete",
"autofocus",
"autoplay",
"challenge",
"charset",
"checked",
"cite",
"class",
"color",
"cols",
"colspan",
"command",
"content",
"contenteditable",
"contextmenu",
"controls",
"coords",
"crossorigin",
"data",
"datetime",
"default",
"defer",
"dir",
"dirname",
"disabled",
"download",
"draggable",
"dropzone",
"enctype",
"for",
"form",
"formaction",
"formenctype",
"formmethod",
"formnovalidate",
"formtarget",
"headers",
"height",
"hidden",
"high",
"href",
"hreflang",
"http-equiv",
"icon",
"id",
"inputmode",
"integrity",
"is",
"ismap",
"itemid",
"itemprop",
"itemref",
"itemscope",
"itemtype",
"keytype",
"kind",
"label",
"lang",
"list",
"loop",
"low",
"manifest",
"max",
"maxlength",
"media",
"mediagroup",
"method",
"min",
"minlength",
"multiple",
"muted",
"name",
"nomodule",
"nonce",
"novalidate",
"open",
"optimum",
"pattern",
"ping",
"placeholder",
"playsinline",
"poster",
"preload",
"radiogroup",
"readonly",
"referrerpolicy",
"rel",
"required",
"reversed",
"rows",
"rowspan",
"sandbox",
"spellcheck",
"scope",
"scoped",
"seamless",
"selected",
"shape",
"size",
"sizes",
"sortable",
"sorted",
"slot",
"span",
"spellcheck",
"src",
"srcdoc",
"srclang",
"srcset",
"start",
"step",
"style",
"tabindex",
"target",
"title",
"translate",
"type",
"typemustmatch",
"updateviacache",
"usemap",
"value",
"width",
"workertype",
"wrap",
}
// "onautocomplete", "onautocompleteerror", "onmousewheel",
// "onshow" and "onsort" have been removed from the spec,
// but are kept here for backwards compatibility.
var eventHandlers = []string{
"onabort",
"onautocomplete",
"onautocompleteerror",
"onauxclick",
"onafterprint",
"onbeforeprint",
"onbeforeunload",
"onblur",
"oncancel",
"oncanplay",
"oncanplaythrough",
"onchange",
"onclick",
"onclose",
"oncontextmenu",
"oncopy",
"oncuechange",
"oncut",
"ondblclick",
"ondrag",
"ondragend",
"ondragenter",
"ondragexit",
"ondragleave",
"ondragover",
"ondragstart",
"ondrop",
"ondurationchange",
"onemptied",
"onended",
"onerror",
"onfocus",
"onhashchange",
"oninput",
"oninvalid",
"onkeydown",
"onkeypress",
"onkeyup",
"onlanguagechange",
"onload",
"onloadeddata",
"onloadedmetadata",
"onloadend",
"onloadstart",
"onmessage",
"onmessageerror",
"onmousedown",
"onmouseenter",
"onmouseleave",
"onmousemove",
"onmouseout",
"onmouseover",
"onmouseup",
"onmousewheel",
"onwheel",
"onoffline",
"ononline",
"onpagehide",
"onpageshow",
"onpaste",
"onpause",
"onplay",
"onplaying",
"onpopstate",
"onprogress",
"onratechange",
"onreset",
"onresize",
"onrejectionhandled",
"onscroll",
"onsecuritypolicyviolation",
"onseeked",
"onseeking",
"onselect",
"onshow",
"onsort",
"onstalled",
"onstorage",
"onsubmit",
"onsuspend",
"ontimeupdate",
"ontoggle",
"onunhandledrejection",
"onunload",
"onvolumechange",
"onwaiting",
}
// extra are ad-hoc values not covered by any of the lists above.
var extra = []string{
"acronym",
"align",
"annotation",
"annotation-xml",
"applet",
"basefont",
"bgsound",
"big",
"blink",
"center",
"color",
"desc",
"face",
"font",
"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
"foreignobject",
"frame",
"frameset",
"image",
"isindex",
"listing",
"malignmark",
"marquee",
"math",
"mglyph",
"mi",
"mn",
"mo",
"ms",
"mtext",
"nobr",
"noembed",
"noframes",
"plaintext",
"prompt",
"public",
"rb",
"rtc",
"spacer",
"strike",
"svg",
"system",
"tt",
"xmp",
}

View file

@ -52,7 +52,6 @@ var isSpecialElementMap = map[string]bool{
"iframe": true, "iframe": true,
"img": true, "img": true,
"input": true, "input": true,
"isindex": true, // The 'isindex' element has been removed, but keep it for backwards compatibility.
"keygen": true, "keygen": true,
"li": true, "li": true,
"link": true, "link": true,

View file

@ -172,7 +172,6 @@ var svgAttributeAdjustments = map[string]string{
"diffuseconstant": "diffuseConstant", "diffuseconstant": "diffuseConstant",
"edgemode": "edgeMode", "edgemode": "edgeMode",
"externalresourcesrequired": "externalResourcesRequired", "externalresourcesrequired": "externalResourcesRequired",
"filterres": "filterRes",
"filterunits": "filterUnits", "filterunits": "filterUnits",
"glyphref": "glyphRef", "glyphref": "glyphRef",
"gradienttransform": "gradientTransform", "gradienttransform": "gradientTransform",

View file

@ -18,6 +18,11 @@ const (
ElementNode ElementNode
CommentNode CommentNode
DoctypeNode DoctypeNode
// RawNode nodes are not returned by the parser, but can be part of the
// Node tree passed to func Render to insert raw HTML (without escaping).
// If so, this package makes no guarantee that the rendered HTML is secure
// (from e.g. Cross Site Scripting attacks) or well-formed.
RawNode
scopeMarkerNode scopeMarkerNode
) )

244
vendor/golang.org/x/net/html/parse.go generated vendored
View file

@ -184,6 +184,17 @@ func (p *parser) clearStackToContext(s scope) {
} }
} }
// parseGenericRawTextElements implements the generic raw text element parsing
// algorithm defined in 12.2.6.2.
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
// officially, need to make tokenizer consider both states.
func (p *parser) parseGenericRawTextElement() {
p.addElement()
p.originalIM = p.im
p.im = textIM
}
// generateImpliedEndTags pops nodes off the stack of open elements as long as // generateImpliedEndTags pops nodes off the stack of open elements as long as
// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
// If exceptions are specified, nodes with that name will not be popped off. // If exceptions are specified, nodes with that name will not be popped off.
@ -192,7 +203,9 @@ func (p *parser) generateImpliedEndTags(exceptions ...string) {
loop: loop:
for i = len(p.oe) - 1; i >= 0; i-- { for i = len(p.oe) - 1; i >= 0; i-- {
n := p.oe[i] n := p.oe[i]
if n.Type == ElementNode { if n.Type != ElementNode {
break
}
switch n.DataAtom { switch n.DataAtom {
case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
for _, except := range exceptions { for _, except := range exceptions {
@ -202,7 +215,6 @@ loop:
} }
continue continue
} }
}
break break
} }
@ -369,8 +381,7 @@ findIdenticalElements:
// Section 12.2.4.3. // Section 12.2.4.3.
func (p *parser) clearActiveFormattingElements() { func (p *parser) clearActiveFormattingElements() {
for { for {
n := p.afe.pop() if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
if len(p.afe) == 0 || n.Type == scopeMarkerNode {
return return
} }
} }
@ -625,25 +636,29 @@ func inHeadIM(p *parser) bool {
switch p.tok.DataAtom { switch p.tok.DataAtom {
case a.Html: case a.Html:
return inBodyIM(p) return inBodyIM(p)
case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta: case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
p.addElement() p.addElement()
p.oe.pop() p.oe.pop()
p.acknowledgeSelfClosingTag() p.acknowledgeSelfClosingTag()
return true return true
case a.Noscript: case a.Noscript:
p.addElement()
if p.scripting { if p.scripting {
p.setOriginalIM() p.parseGenericRawTextElement()
p.im = textIM
} else {
p.im = inHeadNoscriptIM
}
return true return true
case a.Script, a.Title, a.Noframes, a.Style: }
p.addElement()
p.im = inHeadNoscriptIM
// Don't let the tokenizer go into raw text mode when scripting is disabled.
p.tokenizer.NextIsNotRawText()
return true
case a.Script, a.Title:
p.addElement() p.addElement()
p.setOriginalIM() p.setOriginalIM()
p.im = textIM p.im = textIM
return true return true
case a.Noframes, a.Style:
p.parseGenericRawTextElement()
return true
case a.Head: case a.Head:
// Ignore the token. // Ignore the token.
return true return true
@ -855,7 +870,7 @@ func inBodyIM(p *parser) bool {
return true return true
} }
copyAttributes(p.oe[0], p.tok) copyAttributes(p.oe[0], p.tok)
case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
return inHeadIM(p) return inHeadIM(p)
case a.Body: case a.Body:
if p.oe.contains(a.Template) { if p.oe.contains(a.Template) {
@ -881,7 +896,7 @@ func inBodyIM(p *parser) bool {
p.addElement() p.addElement()
p.im = inFramesetIM p.im = inFramesetIM
return true return true
case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul: case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
p.popUntil(buttonScope, a.P) p.popUntil(buttonScope, a.P)
p.addElement() p.addElement()
case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
@ -1014,53 +1029,6 @@ func inBodyIM(p *parser) bool {
p.tok.DataAtom = a.Img p.tok.DataAtom = a.Img
p.tok.Data = a.Img.String() p.tok.Data = a.Img.String()
return false return false
case a.Isindex:
if p.form != nil {
// Ignore the token.
return true
}
action := ""
prompt := "This is a searchable index. Enter search keywords: "
attr := []Attribute{{Key: "name", Val: "isindex"}}
for _, t := range p.tok.Attr {
switch t.Key {
case "action":
action = t.Val
case "name":
// Ignore the attribute.
case "prompt":
prompt = t.Val
default:
attr = append(attr, t)
}
}
p.acknowledgeSelfClosingTag()
p.popUntil(buttonScope, a.P)
p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
if p.form == nil {
// NOTE: The 'isindex' element has been removed,
// and the 'template' element has not been designed to be
// collaborative with the index element.
//
// Ignore the token.
return true
}
if action != "" {
p.form.Attr = []Attribute{{Key: "action", Val: action}}
}
p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
p.addText(prompt)
p.addChild(&Node{
Type: ElementNode,
DataAtom: a.Input,
Data: a.Input.String(),
Attr: attr,
})
p.oe.pop()
p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
case a.Textarea: case a.Textarea:
p.addElement() p.addElement()
p.setOriginalIM() p.setOriginalIM()
@ -1070,18 +1038,21 @@ func inBodyIM(p *parser) bool {
p.popUntil(buttonScope, a.P) p.popUntil(buttonScope, a.P)
p.reconstructActiveFormattingElements() p.reconstructActiveFormattingElements()
p.framesetOK = false p.framesetOK = false
p.addElement() p.parseGenericRawTextElement()
p.setOriginalIM()
p.im = textIM
case a.Iframe: case a.Iframe:
p.framesetOK = false p.framesetOK = false
p.parseGenericRawTextElement()
case a.Noembed:
p.parseGenericRawTextElement()
case a.Noscript:
if p.scripting {
p.parseGenericRawTextElement()
return true
}
p.reconstructActiveFormattingElements()
p.addElement() p.addElement()
p.setOriginalIM() // Don't let the tokenizer go into raw text mode when scripting is disabled.
p.im = textIM p.tokenizer.NextIsNotRawText()
case a.Noembed, a.Noscript:
p.addElement()
p.setOriginalIM()
p.im = textIM
case a.Select: case a.Select:
p.reconstructActiveFormattingElements() p.reconstructActiveFormattingElements()
p.addElement() p.addElement()
@ -1137,7 +1108,7 @@ func inBodyIM(p *parser) bool {
return false return false
} }
return true return true
case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul: case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
p.popUntil(defaultScope, p.tok.DataAtom) p.popUntil(defaultScope, p.tok.DataAtom)
case a.Form: case a.Form:
if p.oe.contains(a.Template) { if p.oe.contains(a.Template) {
@ -1198,7 +1169,7 @@ func inBodyIM(p *parser) bool {
if len(p.templateStack) > 0 { if len(p.templateStack) > 0 {
p.im = inTemplateIM p.im = inTemplateIM
return false return false
} else { }
for _, e := range p.oe { for _, e := range p.oe {
switch e.DataAtom { switch e.DataAtom {
case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th, case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
@ -1208,7 +1179,6 @@ func inBodyIM(p *parser) bool {
} }
} }
} }
}
return true return true
} }
@ -1221,9 +1191,15 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
// Once the code successfully parses the comprehensive test suite, we should // Once the code successfully parses the comprehensive test suite, we should
// refactor this code to be more idiomatic. // refactor this code to be more idiomatic.
// Steps 1-4. The outer loop. // Steps 1-2
if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
p.oe.pop()
return
}
// Steps 3-5. The outer loop.
for i := 0; i < 8; i++ { for i := 0; i < 8; i++ {
// Step 5. Find the formatting element. // Step 6. Find the formatting element.
var formattingElement *Node var formattingElement *Node
for j := len(p.afe) - 1; j >= 0; j-- { for j := len(p.afe) - 1; j >= 0; j-- {
if p.afe[j].Type == scopeMarkerNode { if p.afe[j].Type == scopeMarkerNode {
@ -1238,17 +1214,22 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
p.inBodyEndTagOther(tagAtom, tagName) p.inBodyEndTagOther(tagAtom, tagName)
return return
} }
// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
feIndex := p.oe.index(formattingElement) feIndex := p.oe.index(formattingElement)
if feIndex == -1 { if feIndex == -1 {
p.afe.remove(formattingElement) p.afe.remove(formattingElement)
return return
} }
// Step 8. Ignore the tag if formatting element is not in the scope.
if !p.elementInScope(defaultScope, tagAtom) { if !p.elementInScope(defaultScope, tagAtom) {
// Ignore the tag. // Ignore the tag.
return return
} }
// Steps 9-10. Find the furthest block. // Step 9. This step is omitted because it's just a parse error but no need to return.
// Steps 10-11. Find the furthest block.
var furthestBlock *Node var furthestBlock *Node
for _, e := range p.oe[feIndex:] { for _, e := range p.oe[feIndex:] {
if isSpecialElement(e) { if isSpecialElement(e) {
@ -1265,47 +1246,65 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
return return
} }
// Steps 11-12. Find the common ancestor and bookmark node. // Steps 12-13. Find the common ancestor and bookmark node.
commonAncestor := p.oe[feIndex-1] commonAncestor := p.oe[feIndex-1]
bookmark := p.afe.index(formattingElement) bookmark := p.afe.index(formattingElement)
// Step 13. The inner loop. Find the lastNode to reparent. // Step 14. The inner loop. Find the lastNode to reparent.
lastNode := furthestBlock lastNode := furthestBlock
node := furthestBlock node := furthestBlock
x := p.oe.index(node) x := p.oe.index(node)
// Steps 13.1-13.2 // Step 14.1.
for j := 0; j < 3; j++ { j := 0
// Step 13.3. for {
// Step 14.2.
j++
// Step. 14.3.
x-- x--
node = p.oe[x] node = p.oe[x]
// Step 13.4 - 13.5. // Step 14.4. Go to the next step if node is formatting element.
if node == formattingElement {
break
}
// Step 14.5. Remove node from the list of active formatting elements if
// inner loop counter is greater than three and node is in the list of
// active formatting elements.
if ni := p.afe.index(node); j > 3 && ni > -1 {
p.afe.remove(node)
// If any element of the list of active formatting elements is removed,
// we need to take care whether bookmark should be decremented or not.
// This is because the value of bookmark may exceed the size of the
// list by removing elements from the list.
if ni <= bookmark {
bookmark--
}
continue
}
// Step 14.6. Continue the next inner loop if node is not in the list of
// active formatting elements.
if p.afe.index(node) == -1 { if p.afe.index(node) == -1 {
p.oe.remove(node) p.oe.remove(node)
continue continue
} }
// Step 13.6. // Step 14.7.
if node == formattingElement {
break
}
// Step 13.7.
clone := node.clone() clone := node.clone()
p.afe[p.afe.index(node)] = clone p.afe[p.afe.index(node)] = clone
p.oe[p.oe.index(node)] = clone p.oe[p.oe.index(node)] = clone
node = clone node = clone
// Step 13.8. // Step 14.8.
if lastNode == furthestBlock { if lastNode == furthestBlock {
bookmark = p.afe.index(node) + 1 bookmark = p.afe.index(node) + 1
} }
// Step 13.9. // Step 14.9.
if lastNode.Parent != nil { if lastNode.Parent != nil {
lastNode.Parent.RemoveChild(lastNode) lastNode.Parent.RemoveChild(lastNode)
} }
node.AppendChild(lastNode) node.AppendChild(lastNode)
// Step 13.10. // Step 14.10.
lastNode = node lastNode = node
} }
// Step 14. Reparent lastNode to the common ancestor, // Step 15. Reparent lastNode to the common ancestor,
// or for misnested table nodes, to the foster parent. // or for misnested table nodes, to the foster parent.
if lastNode.Parent != nil { if lastNode.Parent != nil {
lastNode.Parent.RemoveChild(lastNode) lastNode.Parent.RemoveChild(lastNode)
@ -1317,13 +1316,13 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
commonAncestor.AppendChild(lastNode) commonAncestor.AppendChild(lastNode)
} }
// Steps 15-17. Reparent nodes from the furthest block's children // Steps 16-18. Reparent nodes from the furthest block's children
// to a clone of the formatting element. // to a clone of the formatting element.
clone := formattingElement.clone() clone := formattingElement.clone()
reparentChildren(clone, furthestBlock) reparentChildren(clone, furthestBlock)
furthestBlock.AppendChild(clone) furthestBlock.AppendChild(clone)
// Step 18. Fix up the list of active formatting elements. // Step 19. Fix up the list of active formatting elements.
if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark { if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
// Move the bookmark with the rest of the list. // Move the bookmark with the rest of the list.
bookmark-- bookmark--
@ -1331,7 +1330,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
p.afe.remove(formattingElement) p.afe.remove(formattingElement)
p.afe.insert(bookmark, clone) p.afe.insert(bookmark, clone)
// Step 19. Fix up the stack of open elements. // Step 20. Fix up the stack of open elements.
p.oe.remove(formattingElement) p.oe.remove(formattingElement)
p.oe.insert(p.oe.index(furthestBlock)+1, clone) p.oe.insert(p.oe.index(furthestBlock)+1, clone)
} }
@ -1502,14 +1501,13 @@ func inCaptionIM(p *parser) bool {
case StartTagToken: case StartTagToken:
switch p.tok.DataAtom { switch p.tok.DataAtom {
case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr: case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
if p.popUntil(tableScope, a.Caption) { if !p.popUntil(tableScope, a.Caption) {
p.clearActiveFormattingElements()
p.im = inTableIM
return false
} else {
// Ignore the token. // Ignore the token.
return true return true
} }
p.clearActiveFormattingElements()
p.im = inTableIM
return false
case a.Select: case a.Select:
p.reconstructActiveFormattingElements() p.reconstructActiveFormattingElements()
p.addElement() p.addElement()
@ -1526,14 +1524,13 @@ func inCaptionIM(p *parser) bool {
} }
return true return true
case a.Table: case a.Table:
if p.popUntil(tableScope, a.Caption) { if !p.popUntil(tableScope, a.Caption) {
p.clearActiveFormattingElements()
p.im = inTableIM
return false
} else {
// Ignore the token. // Ignore the token.
return true return true
} }
p.clearActiveFormattingElements()
p.im = inTableIM
return false
case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
// Ignore the token. // Ignore the token.
return true return true
@ -1777,12 +1774,11 @@ func inSelectIM(p *parser) bool {
} }
p.addElement() p.addElement()
case a.Select: case a.Select:
if p.popUntil(selectScope, a.Select) { if !p.popUntil(selectScope, a.Select) {
p.resetInsertionMode()
} else {
// Ignore the token. // Ignore the token.
return true return true
} }
p.resetInsertionMode()
case a.Input, a.Keygen, a.Textarea: case a.Input, a.Keygen, a.Textarea:
if p.elementInScope(selectScope, a.Select) { if p.elementInScope(selectScope, a.Select) {
p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
@ -1810,12 +1806,11 @@ func inSelectIM(p *parser) bool {
p.oe = p.oe[:i] p.oe = p.oe[:i]
} }
case a.Select: case a.Select:
if p.popUntil(selectScope, a.Select) { if !p.popUntil(selectScope, a.Select) {
p.resetInsertionMode()
} else {
// Ignore the token. // Ignore the token.
return true return true
} }
p.resetInsertionMode()
case a.Template: case a.Template:
return inHeadIM(p) return inHeadIM(p)
} }
@ -2136,6 +2131,7 @@ func parseForeignContent(p *parser) bool {
Data: p.tok.Data, Data: p.tok.Data,
}) })
case StartTagToken: case StartTagToken:
if !p.fragment {
b := breakout[p.tok.Data] b := breakout[p.tok.Data]
if p.tok.DataAtom == a.Font { if p.tok.DataAtom == a.Font {
loop: loop:
@ -2157,7 +2153,9 @@ func parseForeignContent(p *parser) bool {
} }
return false return false
} }
switch p.top().Namespace { }
current := p.adjustedCurrentNode()
switch current.Namespace {
case "math": case "math":
adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments) adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
case "svg": case "svg":
@ -2172,7 +2170,7 @@ func parseForeignContent(p *parser) bool {
panic("html: bad parser state: unexpected namespace") panic("html: bad parser state: unexpected namespace")
} }
adjustForeignAttributes(p.tok.Attr) adjustForeignAttributes(p.tok.Attr)
namespace := p.top().Namespace namespace := current.Namespace
p.addElement() p.addElement()
p.top().Namespace = namespace p.top().Namespace = namespace
if namespace != "" { if namespace != "" {
@ -2201,12 +2199,20 @@ func parseForeignContent(p *parser) bool {
return true return true
} }
// Section 12.2.4.2.
func (p *parser) adjustedCurrentNode() *Node {
if len(p.oe) == 1 && p.fragment && p.context != nil {
return p.context
}
return p.oe.top()
}
// Section 12.2.6. // Section 12.2.6.
func (p *parser) inForeignContent() bool { func (p *parser) inForeignContent() bool {
if len(p.oe) == 0 { if len(p.oe) == 0 {
return false return false
} }
n := p.oe[len(p.oe)-1] n := p.adjustedCurrentNode()
if n.Namespace == "" { if n.Namespace == "" {
return false return false
} }
@ -2341,8 +2347,7 @@ func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
f(p) f(p)
} }
err := p.parse() if err := p.parse(); err != nil {
if err != nil {
return nil, err return nil, err
} }
return p.doc, nil return p.doc, nil
@ -2364,7 +2369,6 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
contextTag = context.DataAtom.String() contextTag = context.DataAtom.String()
} }
p := &parser{ p := &parser{
tokenizer: NewTokenizerFragment(r, contextTag),
doc: &Node{ doc: &Node{
Type: DocumentNode, Type: DocumentNode,
}, },
@ -2372,6 +2376,11 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
fragment: true, fragment: true,
context: context, context: context,
} }
if context != nil && context.Namespace != "" {
p.tokenizer = NewTokenizer(r)
} else {
p.tokenizer = NewTokenizerFragment(r, contextTag)
}
for _, f := range opts { for _, f := range opts {
f(p) f(p)
@ -2396,8 +2405,7 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
} }
} }
err := p.parse() if err := p.parse(); err != nil {
if err != nil {
return nil, err return nil, err
} }

View file

@ -134,6 +134,9 @@ func render1(w writer, n *Node) error {
} }
} }
return w.WriteByte('>') return w.WriteByte('>')
case RawNode:
_, err := w.WriteString(n.Data)
return err
default: default:
return errors.New("html: unknown node type") return errors.New("html: unknown node type")
} }
@ -256,7 +259,6 @@ var voidElements = map[string]bool{
"base": true, "base": true,
"br": true, "br": true,
"col": true, "col": true,
"command": true,
"embed": true, "embed": true,
"hr": true, "hr": true,
"img": true, "img": true,

View file

@ -296,8 +296,7 @@ func (z *Tokenizer) Buffered() []byte {
// too many times in succession. // too many times in succession.
func readAtLeastOneByte(r io.Reader, b []byte) (int, error) { func readAtLeastOneByte(r io.Reader, b []byte) (int, error) {
for i := 0; i < 100; i++ { for i := 0; i < 100; i++ {
n, err := r.Read(b) if n, err := r.Read(b); n != 0 || err != nil {
if n != 0 || err != nil {
return n, err return n, err
} }
} }
@ -347,6 +346,7 @@ loop:
break loop break loop
} }
if c != '/' { if c != '/' {
z.raw.end--
continue loop continue loop
} }
if z.readRawEndTag() || z.err != nil { if z.readRawEndTag() || z.err != nil {
@ -1067,6 +1067,11 @@ loop:
// Raw returns the unmodified text of the current token. Calling Next, Token, // Raw returns the unmodified text of the current token. Calling Next, Token,
// Text, TagName or TagAttr may change the contents of the returned slice. // Text, TagName or TagAttr may change the contents of the returned slice.
//
// The token stream's raw bytes partition the byte stream (up until an
// ErrorToken). There are no overlaps or gaps between two consecutive token's
// raw bytes. One implication is that the byte offset of the current token is
// the sum of the lengths of all previous tokens' raw bytes.
func (z *Tokenizer) Raw() []byte { func (z *Tokenizer) Raw() []byte {
return z.buf[z.raw.start:z.raw.end] return z.buf[z.raw.start:z.raw.end]
} }

3
vendor/golang.org/x/text/AUTHORS generated vendored Normal file
View file

@ -0,0 +1,3 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

3
vendor/golang.org/x/text/CONTRIBUTORS generated vendored Normal file
View file

@ -0,0 +1,3 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

View file

@ -1,556 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/internal/gen"
)
const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
` !"#$%&'()*+,-./0123456789:;<=>?` +
`@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
"`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
var encodings = []struct {
name string
mib string
comment string
varName string
replacement byte
mapping string
}{
{
"IBM Code Page 037",
"IBM037",
"",
"CodePage037",
0x3f,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm",
},
{
"IBM Code Page 437",
"PC8CodePage437",
"",
"CodePage437",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
},
{
"IBM Code Page 850",
"PC850Multilingual",
"",
"CodePage850",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
},
{
"IBM Code Page 852",
"PCp852",
"",
"CodePage852",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
},
{
"IBM Code Page 855",
"IBM855",
"",
"CodePage855",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
},
{
"Windows Code Page 858", // PC latin1 with Euro
"IBM00858",
"",
"CodePage858",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
},
{
"IBM Code Page 860",
"IBM860",
"",
"CodePage860",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm",
},
{
"IBM Code Page 862",
"PC862LatinHebrew",
"",
"CodePage862",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
},
{
"IBM Code Page 863",
"IBM863",
"",
"CodePage863",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm",
},
{
"IBM Code Page 865",
"IBM865",
"",
"CodePage865",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm",
},
{
"IBM Code Page 866",
"IBM866",
"",
"CodePage866",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-ibm866.txt",
},
{
"IBM Code Page 1047",
"IBM1047",
"",
"CodePage1047",
0x3f,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm",
},
{
"IBM Code Page 1140",
"IBM01140",
"",
"CodePage1140",
0x3f,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm",
},
{
"ISO 8859-1",
"ISOLatin1",
"",
"ISO8859_1",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
},
{
"ISO 8859-2",
"ISOLatin2",
"",
"ISO8859_2",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
},
{
"ISO 8859-3",
"ISOLatin3",
"",
"ISO8859_3",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
},
{
"ISO 8859-4",
"ISOLatin4",
"",
"ISO8859_4",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
},
{
"ISO 8859-5",
"ISOLatinCyrillic",
"",
"ISO8859_5",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
},
{
"ISO 8859-6",
"ISOLatinArabic",
"",
"ISO8859_6,ISO8859_6E,ISO8859_6I",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
},
{
"ISO 8859-7",
"ISOLatinGreek",
"",
"ISO8859_7",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
},
{
"ISO 8859-8",
"ISOLatinHebrew",
"",
"ISO8859_8,ISO8859_8E,ISO8859_8I",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
},
{
"ISO 8859-9",
"ISOLatin5",
"",
"ISO8859_9",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm",
},
{
"ISO 8859-10",
"ISOLatin6",
"",
"ISO8859_10",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
},
{
"ISO 8859-13",
"ISO885913",
"",
"ISO8859_13",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
},
{
"ISO 8859-14",
"ISO885914",
"",
"ISO8859_14",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
},
{
"ISO 8859-15",
"ISO885915",
"",
"ISO8859_15",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
},
{
"ISO 8859-16",
"ISO885916",
"",
"ISO8859_16",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
},
{
"KOI8-R",
"KOI8R",
"",
"KOI8R",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-koi8-r.txt",
},
{
"KOI8-U",
"KOI8U",
"",
"KOI8U",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-koi8-u.txt",
},
{
"Macintosh",
"Macintosh",
"",
"Macintosh",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-macintosh.txt",
},
{
"Macintosh Cyrillic",
"MacintoshCyrillic",
"",
"MacintoshCyrillic",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
},
{
"Windows 874",
"Windows874",
"",
"Windows874",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-874.txt",
},
{
"Windows 1250",
"Windows1250",
"",
"Windows1250",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1250.txt",
},
{
"Windows 1251",
"Windows1251",
"",
"Windows1251",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1251.txt",
},
{
"Windows 1252",
"Windows1252",
"",
"Windows1252",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1252.txt",
},
{
"Windows 1253",
"Windows1253",
"",
"Windows1253",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1253.txt",
},
{
"Windows 1254",
"Windows1254",
"",
"Windows1254",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1254.txt",
},
{
"Windows 1255",
"Windows1255",
"",
"Windows1255",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1255.txt",
},
{
"Windows 1256",
"Windows1256",
"",
"Windows1256",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1256.txt",
},
{
"Windows 1257",
"Windows1257",
"",
"Windows1257",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1257.txt",
},
{
"Windows 1258",
"Windows1258",
"",
"Windows1258",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1258.txt",
},
{
"X-User-Defined",
"XUserDefined",
"It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
"XUserDefined",
encoding.ASCIISub,
ascii +
"\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
"\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
"\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
"\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
"\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
"\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
"\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
"\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
"\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
"\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
"\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
"\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
"\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
"\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
"\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
"\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
},
}
func getWHATWG(url string) string {
res, err := http.Get(url)
if err != nil {
log.Fatalf("%q: Get: %v", url, err)
}
defer res.Body.Close()
mapping := make([]rune, 128)
for i := range mapping {
mapping[i] = '\ufffd'
}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := 0, 0
if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 128 <= x {
log.Fatalf("code %d is out of range", x)
}
if 0x80 <= y && y < 0xa0 {
// We diverge from the WHATWG spec by mapping control characters
// in the range [0x80, 0xa0) to U+FFFD.
continue
}
mapping[x] = rune(y)
}
return ascii + string(mapping)
}
func getUCM(url string) string {
res, err := http.Get(url)
if err != nil {
log.Fatalf("%q: Get: %v", url, err)
}
defer res.Body.Close()
mapping := make([]rune, 256)
for i := range mapping {
mapping[i] = '\ufffd'
}
charsFound := 0
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
var c byte
var r rune
if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
continue
}
mapping[c] = r
charsFound++
}
if charsFound < 200 {
log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
}
return string(mapping)
}
func main() {
mibs := map[string]bool{}
all := []string{}
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "charmap")
printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
printf("import (\n")
printf("\t\"golang.org/x/text/encoding\"\n")
printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
printf(")\n\n")
for _, e := range encodings {
varNames := strings.Split(e.varName, ",")
all = append(all, varNames...)
varName := varNames[0]
switch {
case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
e.mapping = getWHATWG(e.mapping)
case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
e.mapping = getUCM(e.mapping)
}
asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
if asciiSuperset {
low = 0x80
}
lvn := 1
if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
lvn = 3
}
lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
printf("// %s is the %s encoding.\n", varName, e.name)
if e.comment != "" {
printf("//\n// %s\n", e.comment)
}
printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n",
varName, lowerVarName, lowerVarName, e.name)
if mibs[e.mib] {
log.Fatalf("MIB type %q declared multiple times.", e.mib)
}
printf("mib: identifier.%s,\n", e.mib)
printf("asciiSuperset: %t,\n", asciiSuperset)
printf("low: 0x%02x,\n", low)
printf("replacement: 0x%02x,\n", e.replacement)
printf("decode: [256]utf8Enc{\n")
i, backMapping := 0, map[rune]byte{}
for _, c := range e.mapping {
if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
backMapping[c] = byte(i)
}
var buf [8]byte
n := utf8.EncodeRune(buf[:], c)
if n > 3 {
panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
}
printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
if i%2 == 1 {
printf("\n")
}
i++
}
printf("},\n")
printf("encode: [256]uint32{\n")
encode := make([]uint32, 0, 256)
for c, i := range backMapping {
encode = append(encode, uint32(i)<<24|uint32(c))
}
sort.Sort(byRune(encode))
for len(encode) < cap(encode) {
encode = append(encode, encode[len(encode)-1])
}
for i, enc := range encode {
printf("0x%08x,", enc)
if i%8 == 7 {
printf("\n")
}
}
printf("},\n}\n")
// Add an estimate of the size of a single Charmap{} struct value, which
// includes two 256 elem arrays of 4 bytes and some extra fields, which
// align to 3 uint64s on 64-bit architectures.
w.Size += 2*4*256 + 3*8
}
// TODO: add proper line breaking.
printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
}
type byRune []uint32
func (b byRune) Len() int { return len(b) }
func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

View file

@ -1,173 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"encoding/json"
"fmt"
"log"
"strings"
"golang.org/x/text/internal/gen"
)
type group struct {
Encodings []struct {
Labels []string
Name string
}
}
func main() {
gen.Init()
r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
var groups []group
if err := json.NewDecoder(r).Decode(&groups); err != nil {
log.Fatalf("Error reading encodings.json: %v", err)
}
w := &bytes.Buffer{}
fmt.Fprintln(w, "type htmlEncoding byte")
fmt.Fprintln(w, "const (")
for i, g := range groups {
for _, e := range g.Encodings {
key := strings.ToLower(e.Name)
name := consts[key]
if name == "" {
log.Fatalf("No const defined for %s.", key)
}
if i == 0 {
fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
} else {
fmt.Fprintf(w, "%s\n", name)
}
}
}
fmt.Fprintln(w, "numEncodings")
fmt.Fprint(w, ")\n\n")
fmt.Fprintln(w, "var canonical = [numEncodings]string{")
for _, g := range groups {
for _, e := range g.Encodings {
fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
}
}
fmt.Fprint(w, "}\n\n")
fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
for _, g := range groups {
for _, e := range g.Encodings {
for _, l := range e.Labels {
key := strings.ToLower(e.Name)
name := consts[key]
fmt.Fprintf(w, "%q: %s,\n", l, name)
}
}
}
fmt.Fprint(w, "}\n\n")
var tags []string
fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
for _, loc := range locales {
tags = append(tags, loc.tag)
fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
}
fmt.Fprint(w, "}\n\n")
fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
}
// consts maps canonical encoding name to internal constant.
var consts = map[string]string{
"utf-8": "utf8",
"ibm866": "ibm866",
"iso-8859-2": "iso8859_2",
"iso-8859-3": "iso8859_3",
"iso-8859-4": "iso8859_4",
"iso-8859-5": "iso8859_5",
"iso-8859-6": "iso8859_6",
"iso-8859-7": "iso8859_7",
"iso-8859-8": "iso8859_8",
"iso-8859-8-i": "iso8859_8I",
"iso-8859-10": "iso8859_10",
"iso-8859-13": "iso8859_13",
"iso-8859-14": "iso8859_14",
"iso-8859-15": "iso8859_15",
"iso-8859-16": "iso8859_16",
"koi8-r": "koi8r",
"koi8-u": "koi8u",
"macintosh": "macintosh",
"windows-874": "windows874",
"windows-1250": "windows1250",
"windows-1251": "windows1251",
"windows-1252": "windows1252",
"windows-1253": "windows1253",
"windows-1254": "windows1254",
"windows-1255": "windows1255",
"windows-1256": "windows1256",
"windows-1257": "windows1257",
"windows-1258": "windows1258",
"x-mac-cyrillic": "macintoshCyrillic",
"gbk": "gbk",
"gb18030": "gb18030",
// "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
"big5": "big5",
"euc-jp": "eucjp",
"iso-2022-jp": "iso2022jp",
"shift_jis": "shiftJIS",
"euc-kr": "euckr",
"replacement": "replacement",
"utf-16be": "utf16be",
"utf-16le": "utf16le",
"x-user-defined": "xUserDefined",
}
// locales is taken from
// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
var locales = []struct{ tag, name string }{
// The default value. Explicitly state latin to benefit from the exact
// script option, while still making 1252 the default encoding for languages
// written in Latin script.
{"und_Latn", "windows-1252"},
{"ar", "windows-1256"},
{"ba", "windows-1251"},
{"be", "windows-1251"},
{"bg", "windows-1251"},
{"cs", "windows-1250"},
{"el", "iso-8859-7"},
{"et", "windows-1257"},
{"fa", "windows-1256"},
{"he", "windows-1255"},
{"hr", "windows-1250"},
{"hu", "iso-8859-2"},
{"ja", "shift_jis"},
{"kk", "windows-1251"},
{"ko", "euc-kr"},
{"ku", "windows-1254"},
{"ky", "windows-1251"},
{"lt", "windows-1257"},
{"lv", "windows-1257"},
{"mk", "windows-1251"},
{"pl", "iso-8859-2"},
{"ru", "windows-1251"},
{"sah", "windows-1251"},
{"sk", "windows-1250"},
{"sl", "iso-8859-2"},
{"sr", "windows-1251"},
{"tg", "windows-1251"},
{"th", "windows-874"},
{"tr", "windows-1254"},
{"tt", "windows-1251"},
{"uk", "windows-1251"},
{"vi", "windows-1258"},
{"zh-hans", "gb18030"},
{"zh-hant", "big5"},
}

View file

@ -1,142 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"encoding/xml"
"fmt"
"io"
"log"
"strings"
"golang.org/x/text/internal/gen"
)
type registry struct {
XMLName xml.Name `xml:"registry"`
Updated string `xml:"updated"`
Registry []struct {
ID string `xml:"id,attr"`
Record []struct {
Name string `xml:"name"`
Xref []struct {
Type string `xml:"type,attr"`
Data string `xml:"data,attr"`
} `xml:"xref"`
Desc struct {
Data string `xml:",innerxml"`
// Any []struct {
// Data string `xml:",chardata"`
// } `xml:",any"`
// Data string `xml:",chardata"`
} `xml:"description,"`
MIB string `xml:"value"`
Alias []string `xml:"alias"`
MIME string `xml:"preferred_alias"`
} `xml:"record"`
} `xml:"registry"`
}
func main() {
r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
reg := &registry{}
if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
log.Fatalf("Error decoding charset registry: %v", err)
}
if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
}
w := &bytes.Buffer{}
fmt.Fprintf(w, "const (\n")
for _, rec := range reg.Registry[0].Record {
constName := ""
for _, a := range rec.Alias {
if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
// Some of the constant definitions have comments in them. Strip those.
constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
}
}
if constName == "" {
switch rec.MIB {
case "2085":
constName = "HZGB2312" // Not listed as alias for some reason.
default:
log.Fatalf("No cs alias defined for %s.", rec.MIB)
}
}
if rec.MIME != "" {
rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
}
fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
if len(rec.Desc.Data) > 0 {
fmt.Fprint(w, "// ")
d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
inElem := true
attr := ""
for {
t, err := d.Token()
if err != nil {
if err != io.EOF {
log.Fatal(err)
}
break
}
switch x := t.(type) {
case xml.CharData:
attr = "" // Don't need attribute info.
a := bytes.Split([]byte(x), []byte("\n"))
for i, b := range a {
if b = bytes.TrimSpace(b); len(b) != 0 {
if !inElem && i > 0 {
fmt.Fprint(w, "\n// ")
}
inElem = false
fmt.Fprintf(w, "%s ", string(b))
}
}
case xml.StartElement:
if x.Name.Local == "xref" {
inElem = true
use := false
for _, a := range x.Attr {
if a.Name.Local == "type" {
use = use || a.Value != "person"
}
if a.Name.Local == "data" && use {
// Patch up URLs to use https. From some links, the
// https version is different from the http one.
s := a.Value
s = strings.Replace(s, "http://", "https://", -1)
s = strings.Replace(s, "/unicode/", "/", -1)
attr = s + " "
}
}
}
case xml.EndElement:
inElem = false
fmt.Fprint(w, attr)
}
}
fmt.Fprint(w, "\n")
}
for _, x := range rec.Xref {
switch x.Type {
case "rfc":
fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
case "uri":
fmt.Fprintf(w, "// Reference: %s\n", x.Data)
}
}
fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
fmt.Fprintln(w)
}
fmt.Fprintln(w, ")")
gen.WriteGoFile("mib.go", "identifier", w.Bytes())
}

View file

@ -1,161 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
// TODO: Emoji extensions?
// https://www.unicode.org/faq/emoji_dingbats.html
// https://www.unicode.org/Public/UNIDATA/EmojiSources.txt
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
type entry struct {
jisCode, table int
}
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n")
fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n")
reverse := [65536]entry{}
for i := range reverse {
reverse[i].table = -1
}
tables := []struct {
url string
name string
}{
{"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"},
{"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"},
}
for i, table := range tables {
res, err := http.Get(table.url)
if err != nil {
log.Fatalf("%q: Get: %v", table.url, err)
}
defer res.Body.Close()
mapping := [65536]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := 0, uint16(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("%q: could not parse %q", table.url, s)
}
if x < 0 || 120*94 <= x {
log.Fatalf("%q: JIS code %d is out of range", table.url, x)
}
mapping[x] = y
if reverse[y].table == -1 {
reverse[y] = entry{jisCode: x, table: i}
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("%q: scanner error: %v", table.url, err)
}
fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n",
table.name, table.name, table.url)
fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name)
for i, m := range mapping {
if m != 0 {
fmt.Printf("\t%d: 0x%04X,\n", i, m)
}
}
fmt.Printf("}\n\n")
}
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v.table == -1 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const (\n")
fmt.Printf("\tjis0208 = 1\n")
fmt.Printf("\tjis0212 = 2\n")
fmt.Printf("\tcodeMask = 0x7f\n")
fmt.Printf("\tcodeShift = 7\n")
fmt.Printf("\ttableShift = 14\n")
fmt.Printf(")\n\n")
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("//\n")
fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n")
fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n")
fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n")
fmt.Printf("// JIS code (94*j1 + j2) within that table.\n")
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x.table == -1 {
continue
}
fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n",
j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

View file

@ -1,143 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n")
fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n")
res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
mapping := [65536]uint16{}
reverse := [65536]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint16(0), uint16(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x {
log.Fatalf("EUC-KR code %d is out of range", x)
}
mapping[x] = y
if reverse[y] == 0 {
c0, c1 := uint16(0), uint16(0)
if x < 178*(0xc7-0x81) {
c0 = uint16(x/178) + 0x81
c1 = uint16(x % 178)
switch {
case c1 < 1*26:
c1 += 0x41
case c1 < 2*26:
c1 += 0x47
default:
c1 += 0x4d
}
} else {
x -= 178 * (0xc7 - 0x81)
c0 = uint16(x/94) + 0xc7
c1 = uint16(x%94) + 0xa1
}
reverse[y] = c0<<8 | c1
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("scanner error: %v", err)
}
fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n")
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n")
fmt.Printf("var decode = [...]uint16{\n")
for i, v := range mapping {
if v != 0 {
fmt.Printf("\t%d: 0x%04X,\n", i, v)
}
}
fmt.Printf("}\n\n")
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v == 0 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x == 0 {
continue
}
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

View file

@ -1,161 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n")
fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n")
printGB18030()
printGBK()
}
func printGB18030() {
res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n")
fmt.Printf("var gb18030 = [...][2]uint16{\n")
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint32(0), uint32(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0x10000 && y < 0x10000 {
fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y)
}
}
fmt.Printf("}\n\n")
}
func printGBK() {
res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
mapping := [65536]uint16{}
reverse := [65536]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint16(0), uint16(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 126*190 <= x {
log.Fatalf("GBK code %d is out of range", x)
}
mapping[x] = y
if reverse[y] == 0 {
c0, c1 := x/190, x%190
if c1 >= 0x3f {
c1++
}
reverse[y] = (0x81+c0)<<8 | (0x40 + c1)
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("scanner error: %v", err)
}
fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n")
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n")
fmt.Printf("var decode = [...]uint16{\n")
for i, v := range mapping {
if v != 0 {
fmt.Printf("\t%d: 0x%04X,\n", i, v)
}
}
fmt.Printf("}\n\n")
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v == 0 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x == 0 {
continue
}
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

View file

@ -1,140 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n")
fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n")
res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
mapping := [65536]uint32{}
reverse := [65536 * 4]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint16(0), uint32(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 126*157 <= x {
log.Fatalf("Big5 code %d is out of range", x)
}
mapping[x] = y
// The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that
// "The index pointer for code point in index is the first pointer
// corresponding to code point in index", which would normally mean
// that the code below should be guarded by "if reverse[y] == 0", but
// last instead of first seems to match the behavior of
// "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in
// http://encoding.spec.whatwg.org/index-big5.txt, as index 2148
// (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc")
// and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc".
c0, c1 := x/157, x%157
if c1 < 0x3f {
c1 += 0x40
} else {
c1 += 0x62
}
reverse[y] = (0x81+c0)<<8 | c1
}
if err := scanner.Err(); err != nil {
log.Fatalf("scanner error: %v", err)
}
fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n")
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n")
fmt.Printf("var decode = [...]uint32{\n")
for i, v := range mapping {
if v != 0 {
fmt.Printf("\t%d: 0x%08X,\n", i, v)
}
}
fmt.Printf("}\n\n")
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v == 0 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x == 0 {
continue
}
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

View file

@ -1,64 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Language tag table generator.
// Data read from the web.
package main
import (
"flag"
"fmt"
"log"
"golang.org/x/text/internal/gen"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test",
false,
"test existing tables; can be used to compare web data with package data.")
outputFile = flag.String("output",
"tables.go",
"output file for generated tables")
)
func main() {
gen.Init()
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "compact")
fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
b := newBuilder(w)
gen.WriteCLDRVersion(w)
b.writeCompactIndex()
}
type builder struct {
w *gen.CodeWriter
data *cldr.CLDR
supp *cldr.SupplementalData
}
func newBuilder(w *gen.CodeWriter) *builder {
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatal(err)
}
b := builder{
w: w,
data: data,
supp: data.Supplemental(),
}
return &b
}

View file

@ -1,113 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file generates derivative tables based on the language package itself.
import (
"fmt"
"log"
"sort"
"strings"
"golang.org/x/text/internal/language"
)
// Compact indices:
// Note -va-X variants only apply to localization variants.
// BCP variants only ever apply to language.
// The only ambiguity between tags is with regions.
func (b *builder) writeCompactIndex() {
// Collect all language tags for which we have any data in CLDR.
m := map[language.Tag]bool{}
for _, lang := range b.data.Locales() {
// We include all locales unconditionally to be consistent with en_US.
// We want en_US, even though it has no data associated with it.
// TODO: put any of the languages for which no data exists at the end
// of the index. This allows all components based on ICU to use that
// as the cutoff point.
// if x := data.RawLDML(lang); false ||
// x.LocaleDisplayNames != nil ||
// x.Characters != nil ||
// x.Delimiters != nil ||
// x.Measurement != nil ||
// x.Dates != nil ||
// x.Numbers != nil ||
// x.Units != nil ||
// x.ListPatterns != nil ||
// x.Collations != nil ||
// x.Segmentations != nil ||
// x.Rbnf != nil ||
// x.Annotations != nil ||
// x.Metadata != nil {
// TODO: support POSIX natively, albeit non-standard.
tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
m[tag] = true
// }
}
// TODO: plural rules are also defined for the deprecated tags:
// iw mo sh tl
// Consider removing these as compact tags.
// Include locales for plural rules, which uses a different structure.
for _, plurals := range b.supp.Plurals {
for _, rules := range plurals.PluralRules {
for _, lang := range strings.Split(rules.Locales, " ") {
m[language.Make(lang)] = true
}
}
}
var coreTags []language.CompactCoreInfo
var special []string
for t := range m {
if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
log.Fatalf("Unexpected extension %v in %v", x, t)
}
if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
cci, ok := language.GetCompactCore(t)
if !ok {
log.Fatalf("Locale for non-basic language %q", t)
}
coreTags = append(coreTags, cci)
} else {
special = append(special, t.String())
}
}
w := b.w
sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
sort.Strings(special)
w.WriteComment(`
NumCompactTags is the number of common tags. The maximum tag is
NumCompactTags-1.`)
w.WriteConst("NumCompactTags", len(m))
fmt.Fprintln(w, "const (")
for i, t := range coreTags {
fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
}
for i, t := range special {
fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
}
fmt.Fprintln(w, ")")
w.WriteVar("coreTags", coreTags)
w.WriteConst("specialTagsStr", strings.Join(special, " "))
}
func ident(s string) string {
return strings.Replace(s, "-", "", -1) + "Index"
}

View file

@ -1,54 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"log"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/language"
"golang.org/x/text/internal/language/compact"
"golang.org/x/text/unicode/cldr"
)
func main() {
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatalf("DecodeZip: %v", err)
}
w := gen.NewCodeWriter()
defer w.WriteGoFile("parents.go", "compact")
// Create parents table.
type ID uint16
parents := make([]ID, compact.NumCompactTags)
for _, loc := range data.Locales() {
tag := language.MustParse(loc)
index, ok := compact.FromTag(tag)
if !ok {
continue
}
parentIndex := compact.ID(0) // und
for p := tag.Parent(); p != language.Und; p = p.Parent() {
if x, ok := compact.FromTag(p); ok {
parentIndex = x
break
}
}
parents[index] = ID(parentIndex)
}
w.WriteComment(`
parents maps a compact index of a tag to the compact index of the parent of
this tag.`)
w.WriteVar("parents", parents)
}

File diff suppressed because it is too large Load diff

View file

@ -1,20 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file contains code common to the maketables.go and the package code.
// AliasType is the type of an alias in AliasMap.
type AliasType int8
const (
Deprecated AliasType = iota
Macro
Legacy
AliasTypeUnknown AliasType = -1
)

View file

@ -1,305 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Language tag table generator.
// Data read from the web.
package main
import (
"flag"
"fmt"
"io"
"log"
"sort"
"strconv"
"strings"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/language"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test",
false,
"test existing tables; can be used to compare web data with package data.")
outputFile = flag.String("output",
"tables.go",
"output file for generated tables")
)
func main() {
gen.Init()
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "language")
b := newBuilder(w)
gen.WriteCLDRVersion(w)
b.writeConstants()
b.writeMatchData()
}
type builder struct {
w *gen.CodeWriter
hw io.Writer // MultiWriter for w and w.Hash
data *cldr.CLDR
supp *cldr.SupplementalData
}
func (b *builder) langIndex(s string) uint16 {
return uint16(language.MustParseBase(s))
}
func (b *builder) regionIndex(s string) int {
return int(language.MustParseRegion(s))
}
func (b *builder) scriptIndex(s string) int {
return int(language.MustParseScript(s))
}
func newBuilder(w *gen.CodeWriter) *builder {
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatal(err)
}
b := builder{
w: w,
hw: io.MultiWriter(w, w.Hash),
data: data,
supp: data.Supplemental(),
}
return &b
}
// writeConsts computes f(v) for all v in values and writes the results
// as constants named _v to a single constant block.
func (b *builder) writeConsts(f func(string) int, values ...string) {
fmt.Fprintln(b.w, "const (")
for _, v := range values {
fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
}
fmt.Fprintln(b.w, ")")
}
// TODO: region inclusion data will probably not be use used in future matchers.
var langConsts = []string{
"de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und",
}
var scriptConsts = []string{
"Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
"Zzzz",
}
var regionConsts = []string{
"001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
"ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
}
func (b *builder) writeConstants() {
b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
b.writeConsts(b.regionIndex, regionConsts...)
b.writeConsts(b.scriptIndex, scriptConsts...)
}
type mutualIntelligibility struct {
want, have uint16
distance uint8
oneway bool
}
type scriptIntelligibility struct {
wantLang, haveLang uint16
wantScript, haveScript uint8
distance uint8
// Always oneway
}
type regionIntelligibility struct {
lang uint16 // compact language id
script uint8 // 0 means any
group uint8 // 0 means any; if bit 7 is set it means inverse
distance uint8
// Always twoway.
}
// writeMatchData writes tables with languages and scripts for which there is
// mutual intelligibility. The data is based on CLDR's languageMatching data.
// Note that we use a different algorithm than the one defined by CLDR and that
// we slightly modify the data. For example, we convert scores to confidence levels.
// We also drop all region-related data as we use a different algorithm to
// determine region equivalence.
func (b *builder) writeMatchData() {
lm := b.supp.LanguageMatching.LanguageMatches
cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
regionHierarchy := map[string][]string{}
for _, g := range b.supp.TerritoryContainment.Group {
regions := strings.Split(g.Contains, " ")
regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
}
regionToGroups := make([]uint8, language.NumRegions)
idToIndex := map[string]uint8{}
for i, mv := range lm[0].MatchVariable {
if i > 6 {
log.Fatalf("Too many groups: %d", i)
}
idToIndex[mv.Id] = uint8(i + 1)
// TODO: also handle '-'
for _, r := range strings.Split(mv.Value, "+") {
todo := []string{r}
for k := 0; k < len(todo); k++ {
r := todo[k]
regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
todo = append(todo, regionHierarchy[r]...)
}
}
}
b.w.WriteVar("regionToGroups", regionToGroups)
// maps language id to in- and out-of-group region.
paradigmLocales := [][3]uint16{}
locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
for i := 0; i < len(locales); i += 2 {
x := [3]uint16{}
for j := 0; j < 2; j++ {
pc := strings.SplitN(locales[i+j], "-", 2)
x[0] = b.langIndex(pc[0])
if len(pc) == 2 {
x[1+j] = uint16(b.regionIndex(pc[1]))
}
}
paradigmLocales = append(paradigmLocales, x)
}
b.w.WriteVar("paradigmLocales", paradigmLocales)
b.w.WriteType(mutualIntelligibility{})
b.w.WriteType(scriptIntelligibility{})
b.w.WriteType(regionIntelligibility{})
matchLang := []mutualIntelligibility{}
matchScript := []scriptIntelligibility{}
matchRegion := []regionIntelligibility{}
// Convert the languageMatch entries in lists keyed by desired language.
for _, m := range lm[0].LanguageMatch {
// Different versions of CLDR use different separators.
desired := strings.Replace(m.Desired, "-", "_", -1)
supported := strings.Replace(m.Supported, "-", "_", -1)
d := strings.Split(desired, "_")
s := strings.Split(supported, "_")
if len(d) != len(s) {
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
continue
}
distance, _ := strconv.ParseInt(m.Distance, 10, 8)
switch len(d) {
case 2:
if desired == supported && desired == "*_*" {
continue
}
// language-script pair.
matchScript = append(matchScript, scriptIntelligibility{
wantLang: uint16(b.langIndex(d[0])),
haveLang: uint16(b.langIndex(s[0])),
wantScript: uint8(b.scriptIndex(d[1])),
haveScript: uint8(b.scriptIndex(s[1])),
distance: uint8(distance),
})
if m.Oneway != "true" {
matchScript = append(matchScript, scriptIntelligibility{
wantLang: uint16(b.langIndex(s[0])),
haveLang: uint16(b.langIndex(d[0])),
wantScript: uint8(b.scriptIndex(s[1])),
haveScript: uint8(b.scriptIndex(d[1])),
distance: uint8(distance),
})
}
case 1:
if desired == supported && desired == "*" {
continue
}
if distance == 1 {
// nb == no is already handled by macro mapping. Check there
// really is only this case.
if d[0] != "no" || s[0] != "nb" {
log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
}
continue
}
// TODO: consider dropping oneway field and just doubling the entry.
matchLang = append(matchLang, mutualIntelligibility{
want: uint16(b.langIndex(d[0])),
have: uint16(b.langIndex(s[0])),
distance: uint8(distance),
oneway: m.Oneway == "true",
})
case 3:
if desired == supported && desired == "*_*_*" {
continue
}
if desired != supported {
// This is now supported by CLDR, but only one case, which
// should already be covered by paradigm locales. For instance,
// test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
// testdata/CLDRLocaleMatcherTest.txt tests this.
if supported != "en_*_GB" {
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
}
continue
}
ri := regionIntelligibility{
lang: b.langIndex(d[0]),
distance: uint8(distance),
}
if d[1] != "*" {
ri.script = uint8(b.scriptIndex(d[1]))
}
switch {
case d[2] == "*":
ri.group = 0x80 // not contained in anything
case strings.HasPrefix(d[2], "$!"):
ri.group = 0x80
d[2] = "$" + d[2][len("$!"):]
fallthrough
case strings.HasPrefix(d[2], "$"):
ri.group |= idToIndex[d[2]]
}
matchRegion = append(matchRegion, ri)
default:
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
}
}
sort.SliceStable(matchLang, func(i, j int) bool {
return matchLang[i].distance < matchLang[j].distance
})
b.w.WriteComment(`
matchLang holds pairs of langIDs of base languages that are typically
mutually intelligible. Each pair is associated with a confidence and
whether the intelligibility goes one or both ways.`)
b.w.WriteVar("matchLang", matchLang)
b.w.WriteComment(`
matchScript holds pairs of scriptIDs where readers of one script
can typically also read the other. Each is associated with a confidence.`)
sort.SliceStable(matchScript, func(i, j int) bool {
return matchScript[i].distance < matchScript[j].distance
})
b.w.WriteVar("matchScript", matchScript)
sort.SliceStable(matchRegion, func(i, j int) bool {
return matchRegion[i].distance < matchRegion[j].distance
})
b.w.WriteVar("matchRegion", matchRegion)
}

56
vendor/modules.txt vendored Normal file
View file

@ -0,0 +1,56 @@
# github.com/PuerkitoBio/goquery v1.5.1
## explicit
github.com/PuerkitoBio/goquery
# github.com/andybalholm/cascadia v1.1.0
github.com/andybalholm/cascadia
# github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9
## explicit
github.com/chilts/sid
# github.com/jaytaylor/html2text v0.0.0-20200220170450-61d9dc4d7195
## explicit
github.com/jaytaylor/html2text
# github.com/mattn/go-runewidth v0.0.7
github.com/mattn/go-runewidth
# github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2
## explicit
github.com/mattn/go-xmpp
# github.com/mmcdole/gofeed v1.0.0-beta2
## explicit
github.com/mmcdole/gofeed
github.com/mmcdole/gofeed/atom
github.com/mmcdole/gofeed/extensions
github.com/mmcdole/gofeed/internal/shared
github.com/mmcdole/gofeed/rss
# github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf
## explicit
github.com/mmcdole/goxpp
# github.com/olekukonko/tablewriter v0.0.4
## explicit
github.com/olekukonko/tablewriter
# github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf
## explicit
github.com/ssor/bom
# golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e
## explicit
golang.org/x/net/html
golang.org/x/net/html/atom
golang.org/x/net/html/charset
# golang.org/x/text v0.3.2
## explicit
golang.org/x/text/encoding
golang.org/x/text/encoding/charmap
golang.org/x/text/encoding/htmlindex
golang.org/x/text/encoding/internal
golang.org/x/text/encoding/internal/identifier
golang.org/x/text/encoding/japanese
golang.org/x/text/encoding/korean
golang.org/x/text/encoding/simplifiedchinese
golang.org/x/text/encoding/traditionalchinese
golang.org/x/text/encoding/unicode
golang.org/x/text/internal/language
golang.org/x/text/internal/language/compact
golang.org/x/text/internal/tag
golang.org/x/text/internal/utf8internal
golang.org/x/text/language
golang.org/x/text/runes
golang.org/x/text/transform

211
vendor/vendor.json vendored
View file

@ -1,211 +0,0 @@
{
"comment": "",
"ignore": "test",
"package": [
{
"checksumSHA1": "6C3LYFDsp1HNrgzVxCcBlKCifC0=",
"path": "github.com/PuerkitoBio/goquery",
"revision": "3dcf72e6c17f694381a21592651ca1464ded0e10",
"revisionTime": "2019-01-09T23:07:04Z"
},
{
"checksumSHA1": "7DQlhUmi8waYHuk3kvrWiTLVygM=",
"path": "github.com/andybalholm/cascadia",
"revision": "680b6a57bda4f657485ad44bdea42342ead737bc",
"revisionTime": "2018-10-12T15:44:24Z"
},
{
"checksumSHA1": "WoatyvsqqryIUsNP+2fXlbVtAVM=",
"path": "github.com/chilts/sid",
"revision": "660e94789ec9b45634f588d40881e81b56de92a0",
"revisionTime": "2019-06-07T04:24:30Z"
},
{
"checksumSHA1": "6fVGZNc+CHkdCoJCwJwTOSMYHSo=",
"path": "github.com/jaytaylor/html2text",
"revision": "01ec452cbe43774f989516272881441cae40c16b",
"revisionTime": "2019-03-26T19:55:09Z"
},
{
"checksumSHA1": "qR7D38Zmn9TPUvGn64k+r0+Kq1c=",
"path": "github.com/mattn/go-runewidth",
"revision": "703b5e6b11ae25aeb2af9ebb5d5fdf8fa2575211",
"revisionTime": "2018-12-18T00:06:49Z"
},
{
"checksumSHA1": "F0EUIs4TuxSnrzbd/SfZdseDtmc=",
"path": "github.com/mattn/go-xmpp",
"revision": "6093f50721ed2204a87a81109ca5a466a5bec6c1",
"revisionTime": "2019-01-24T09:32:44Z"
},
{
"checksumSHA1": "0etQIvNU8zcr74rQKJLeH5O+MzU=",
"path": "github.com/mmcdole/gofeed",
"revision": "0e68beaf6fdf215bd1fe42a09f6de292c7032359",
"revisionTime": "2019-04-20T15:49:28Z"
},
{
"checksumSHA1": "+plUZFo1eUTODCmc1ptE2VUuWM4=",
"path": "github.com/mmcdole/gofeed/atom",
"revision": "0e68beaf6fdf215bd1fe42a09f6de292c7032359",
"revisionTime": "2019-04-20T15:49:28Z"
},
{
"checksumSHA1": "30XHSW5SZy4yL+ezmw0xyhGdM+Y=",
"path": "github.com/mmcdole/gofeed/extensions",
"revision": "0e68beaf6fdf215bd1fe42a09f6de292c7032359",
"revisionTime": "2019-04-20T15:49:28Z"
},
{
"checksumSHA1": "Aw0Zm44X0crwxj4KZTYqK2C3zT0=",
"path": "github.com/mmcdole/gofeed/internal/shared",
"revision": "0e68beaf6fdf215bd1fe42a09f6de292c7032359",
"revisionTime": "2019-04-20T15:49:28Z"
},
{
"checksumSHA1": "4Em5hO79HIPXmUYb9s/zh6CuQ0c=",
"path": "github.com/mmcdole/gofeed/rss",
"revision": "0e68beaf6fdf215bd1fe42a09f6de292c7032359",
"revisionTime": "2019-04-20T15:49:28Z"
},
{
"checksumSHA1": "dMr0U8HXo4PGD9qDPxvh6Iizzh0=",
"path": "github.com/mmcdole/goxpp",
"revision": "0068e33feabfc0086c7aeb58a9603f91c061c89f",
"revisionTime": "2018-10-12T15:49:47Z"
},
{
"checksumSHA1": "C2jnBks3LctebwDmLMoq/zYZex0=",
"path": "github.com/olekukonko/tablewriter",
"revision": "cc27d85e17cec9768d2ac401ea5d619a9628f16d",
"revisionTime": "2019-06-18T03:32:46Z"
},
{
"checksumSHA1": "qErubHtC7DAFBnEQkMTuKDtfFTU=",
"path": "github.com/ssor/bom",
"revision": "6386211fdfcf24c0bfbdaceafd02849ed9a8a509",
"revisionTime": "2017-07-18T12:35:48Z"
},
{
"checksumSHA1": "bONEZcbkYKiPyABrecOLzHomjPU=",
"path": "golang.org/x/net/html",
"revision": "ba9fcec4b297b415637633c5a6e8fa592e4a16c3",
"revisionTime": "2019-08-26T16:14:39Z"
},
{
"checksumSHA1": "XtSbs1gpyaEsIqf6VRhJsgOQe5U=",
"path": "golang.org/x/net/html/atom",
"revision": "ba9fcec4b297b415637633c5a6e8fa592e4a16c3",
"revisionTime": "2019-08-26T16:14:39Z"
},
{
"checksumSHA1": "barUU39reQ7LdgYLA323hQ/UGy4=",
"path": "golang.org/x/net/html/charset",
"revision": "ba9fcec4b297b415637633c5a6e8fa592e4a16c3",
"revisionTime": "2019-08-26T16:14:39Z"
},
{
"checksumSHA1": "tqqo7DEeFCclb58XbN44WwdpWww=",
"path": "golang.org/x/text/encoding",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "HgcUFTOQF5jOYtTIj5obR3GVN9A=",
"path": "golang.org/x/text/encoding/charmap",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "UYlVRSWAA5srH3iWvrJz++Zhpr0=",
"path": "golang.org/x/text/encoding/htmlindex",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "zeHyHebIZl1tGuwGllIhjfci+wI=",
"path": "golang.org/x/text/encoding/internal",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "46UIK1h/DTupMdRnLkijrEIwzv4=",
"path": "golang.org/x/text/encoding/internal/identifier",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "DhdZROnJq+cEcQ/sHY7GEq5wQ8U=",
"path": "golang.org/x/text/encoding/japanese",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "qHQ79q9peY8ZkCMC8kJAb52BAWg=",
"path": "golang.org/x/text/encoding/korean",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "55UdScb+EMOCPr7OW0hCwDsVxpg=",
"path": "golang.org/x/text/encoding/simplifiedchinese",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "9EZF1SHTpjVmaT9sARitvGKUXOY=",
"path": "golang.org/x/text/encoding/traditionalchinese",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "bAJTZJ3IGJdNmN/PSlRMRxWtxec=",
"path": "golang.org/x/text/encoding/unicode",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "8ea1h1pimPfXc6cE5l3SQTe7SVo=",
"path": "golang.org/x/text/internal/language",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "GxBlFOqWoIsWCMswUHh6dUqM5no=",
"path": "golang.org/x/text/internal/language/compact",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "hyNCcTwMQnV6/MK8uUW9E5H0J0M=",
"path": "golang.org/x/text/internal/tag",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "Qk7dljcrEK1BJkAEZguxAbG9dSo=",
"path": "golang.org/x/text/internal/utf8internal",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "kgODOZdRLWKSppiHzrqOKdtrGHA=",
"path": "golang.org/x/text/language",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "IV4MN7KGBSocu/5NR3le3sxup4Y=",
"path": "golang.org/x/text/runes",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
},
{
"checksumSHA1": "R9iBDY+aPnT+8pyRcqGjXq5QixA=",
"path": "golang.org/x/text/transform",
"revision": "3d0f7978add91030e5e8976ff65ccdd828286cba",
"revisionTime": "2019-08-29T15:11:34Z"
}
],
"rootPath": "salsa.debian.org/mdosch-guest/feed-to-muc"
}