From 03c8373549be6cf1989c65cfb2b308af48e3d9b2 Mon Sep 17 00:00:00 2001 From: Martin Dosch Date: Fri, 30 Aug 2019 09:55:06 +0200 Subject: [PATCH] Updated vendor packages. --- vendor/github.com/chilts/sid/ReadMe.md | 14 +- vendor/github.com/chilts/sid/go.mod | 3 + vendor/github.com/chilts/sid/sid.go | 93 +- .../jaytaylor}/html2text/LICENSE | 0 .../jaytaylor}/html2text/README.md | 0 .../jaytaylor}/html2text/html2text.go | 0 .../olekukonko/tablewriter/README.md | 2 +- .../github.com/olekukonko/tablewriter/go.mod | 8 + .../github.com/olekukonko/tablewriter/go.sum | 4 + .../olekukonko/tablewriter/table.go | 4 +- vendor/golang.org/x/net/html/atom/gen.go | 712 ++++++++ .../x/text/encoding/charmap/maketables.go | 556 ++++++ .../x/text/encoding/htmlindex/gen.go | 173 ++ .../text/encoding/internal/identifier/gen.go | 142 ++ .../x/text/encoding/japanese/maketables.go | 161 ++ .../x/text/encoding/korean/maketables.go | 143 ++ .../encoding/simplifiedchinese/maketables.go | 161 ++ .../encoding/traditionalchinese/maketables.go | 140 ++ .../x/text/internal/language/compact/gen.go | 64 + .../internal/language/compact/gen_index.go | 113 ++ .../internal/language/compact/gen_parents.go | 54 + .../x/text/internal/language/gen.go | 1520 +++++++++++++++++ .../x/text/internal/language/gen_common.go | 20 + vendor/golang.org/x/text/language/gen.go | 305 ++++ vendor/vendor.json | 126 +- 25 files changed, 4441 insertions(+), 77 deletions(-) create mode 100644 vendor/github.com/chilts/sid/go.mod rename vendor/{jaytaylor.com => github.com/jaytaylor}/html2text/LICENSE (100%) rename vendor/{jaytaylor.com => github.com/jaytaylor}/html2text/README.md (100%) rename vendor/{jaytaylor.com => github.com/jaytaylor}/html2text/html2text.go (100%) create mode 100644 vendor/github.com/olekukonko/tablewriter/go.mod create mode 100644 vendor/github.com/olekukonko/tablewriter/go.sum create mode 100644 vendor/golang.org/x/net/html/atom/gen.go create mode 100644 vendor/golang.org/x/text/encoding/charmap/maketables.go create mode 100644 vendor/golang.org/x/text/encoding/htmlindex/gen.go create mode 100644 vendor/golang.org/x/text/encoding/internal/identifier/gen.go create mode 100644 vendor/golang.org/x/text/encoding/japanese/maketables.go create mode 100644 vendor/golang.org/x/text/encoding/korean/maketables.go create mode 100644 vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go create mode 100644 vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go create mode 100644 vendor/golang.org/x/text/internal/language/compact/gen.go create mode 100644 vendor/golang.org/x/text/internal/language/compact/gen_index.go create mode 100644 vendor/golang.org/x/text/internal/language/compact/gen_parents.go create mode 100644 vendor/golang.org/x/text/internal/language/gen.go create mode 100644 vendor/golang.org/x/text/internal/language/gen_common.go create mode 100644 vendor/golang.org/x/text/language/gen.go diff --git a/vendor/github.com/chilts/sid/ReadMe.md b/vendor/github.com/chilts/sid/ReadMe.md index b4653cb..7761928 100644 --- a/vendor/github.com/chilts/sid/ReadMe.md +++ b/vendor/github.com/chilts/sid/ReadMe.md @@ -20,13 +20,19 @@ go get github.com/chilts/sid ``` id1 := sid.Id() -id2 := sid.Id() +id2 := sid.IdHex() +id3 := sid.IdBase32() +id4 := sid.IdBase64() fmt.Printf("id1 = %s\n", id1) fmt.Printf("id2 = %s\n", id2) +fmt.Printf("id3 = %s\n", id3) +fmt.Printf("id4 = %s\n", id4) -// -> "id1 = 1IeSBAWW83j-2wgJ4PUtlAr" -// -> "id2 = 1IeSBAWW9kK-0cDG64GQgGJ" +// -> "id1 = 1559872035903071353-1186579057231285506" +// -> "id2 = 15a5cf57e7d2a837-6eaafe687e7b3ec3" +// -> "id3 = 1b9efqnl51jj7-4u66ikpfq9ugm" +// -> "id4 = 1IeSBAWW9kK-0cDG64GQgGJ" ``` ## Author @@ -37,6 +43,6 @@ For [AppsAttic](https://appsattic.com/), [@AppsAttic](https://twitter.com/AppsAt ## License -[MIT](https://publish.li/mit-qLQqmVTO). +[MIT](https://chilts.mit-license.org/2017/) (Ends) diff --git a/vendor/github.com/chilts/sid/go.mod b/vendor/github.com/chilts/sid/go.mod new file mode 100644 index 0000000..fbe38d8 --- /dev/null +++ b/vendor/github.com/chilts/sid/go.mod @@ -0,0 +1,3 @@ +module github.com/chilts/sid + +go 1.12 diff --git a/vendor/github.com/chilts/sid/sid.go b/vendor/github.com/chilts/sid/sid.go index 7c1f2c8..d3b55e3 100644 --- a/vendor/github.com/chilts/sid/sid.go +++ b/vendor/github.com/chilts/sid/sid.go @@ -12,6 +12,7 @@ package sid import ( + "fmt" "math/rand" "strconv" "strings" @@ -32,17 +33,39 @@ var chars = make([]string, 11, 11) // 64 chars but ordered by ASCII const base64 string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~" -func toStr(now int64) string { +func toStr(n int64) string { // now do the generation (backwards, so we just %64 then /64 along the way) for i := 10; i >= 0; i-- { - index := now % 64 + index := n % 64 chars[i] = string(base64[index]) - now = now / 64 + n = n / 64 } return strings.Join(chars, "") } +func toBase32(n int64) string { + b32 := strconv.FormatInt(n, 32) + + for len(b32) < 13 { + b32 = "0" + b32 + } + + // log.Printf("b32=%s\n", b32) + + return b32 +} + +func toHex(n int64) string { + hex := fmt.Sprintf("%x", n) + + for len(hex) < 16 { + hex = "0" + hex + } + + return hex +} + // IdBase64 returns a 23 char string based on timestamp and a random number. The format is "XXXXXXXXXXX-YYYYYYYYYYY" // where X is the timestamp and Y is the random number. If (by any chance) this is called in the same nanosecond, the // random number is incremented instead of a new one being generated. This makes sure that two consecutive Ids @@ -71,10 +94,66 @@ func IdBase64() string { return toStr(now) + "-" + toStr(r) } -// Id returns a 23 char string based on timestamp and a random number. The format is "XXXXXXXXXXX-YYYYYYYYYYY" where X -// is the timestamp and Y is the random number. If (by any chance) this is called in the same nanosecond, the random -// number is incremented instead of a new one being generated. This makes sure that two consecutive Ids generated in -// the same goroutine also ensure those Ids are also sortable. +// IdBase32 returns a 27 char string based on timestamp and a random number. The format is +// "XXXXXXXXXXXXX-YYYYYYYYYYYYY" where X is the timestamp and Y is the random number. If (by any chance) this is called +// in the same nanosecond, the random number is incremented instead of a new one being generated. This makes sure that +// two consecutive Ids generated in the same goroutine also ensure those Ids are also sortable. +// +// It is safe to call from different goroutines since it has it's own locking. +func IdBase32() string { + // lock for lastTime, lastRand, and chars + mu.Lock() + defer mu.Unlock() + + now := time.Now().UTC().UnixNano() + var r int64 + + // if we have the same time, just inc lastRand, else create a new one + if now == lastTime { + lastRand++ + } else { + lastRand = rand.Int63() + } + r = lastRand + + // remember this for next time + lastTime = now + + return toBase32(now) + "-" + toBase32(r) +} + +// IdHex returns a char string based on timestamp and a random number. The format is +// "XXXXXXXXXXXXXXXX-YYYYYYYYYYYYYYYY" where X is the timestamp and Y is the random number. If (by any chance) this is +// called in the same nanosecond, the random number is incremented instead of a new one being generated. This makes +// sure that two consecutive Ids generated in the same goroutine also ensure those Ids are also sortable. +// +// It is safe to call from different goroutines since it has it's own locking. +func IdHex() string { + // lock for lastTime, lastRand, and chars + mu.Lock() + defer mu.Unlock() + + now := time.Now().UTC().UnixNano() + var r int64 + + // if we have the same time, just inc lastRand, else create a new one + if now == lastTime { + lastRand++ + } else { + lastRand = rand.Int63() + } + r = lastRand + + // remember this for next time + lastTime = now + + return toHex(now) + "-" + toHex(r) +} + +// Id returns a 39 char string based on timestamp and a random number. The format is +// "XXXXXXXXXXXXXXXXXXX-YYYYYYYYYYYYYYYYYYY" where X is the timestamp and Y is the random number. If (by any chance) +// this is called in the same nanosecond, the random number is incremented instead of a new one being generated. This +// makes sure that two consecutive Ids generated in the same goroutine also ensure those Ids are also sortable. // // It is safe to call from different goroutines since it has it's own locking. func Id() string { diff --git a/vendor/jaytaylor.com/html2text/LICENSE b/vendor/github.com/jaytaylor/html2text/LICENSE similarity index 100% rename from vendor/jaytaylor.com/html2text/LICENSE rename to vendor/github.com/jaytaylor/html2text/LICENSE diff --git a/vendor/jaytaylor.com/html2text/README.md b/vendor/github.com/jaytaylor/html2text/README.md similarity index 100% rename from vendor/jaytaylor.com/html2text/README.md rename to vendor/github.com/jaytaylor/html2text/README.md diff --git a/vendor/jaytaylor.com/html2text/html2text.go b/vendor/github.com/jaytaylor/html2text/html2text.go similarity index 100% rename from vendor/jaytaylor.com/html2text/html2text.go rename to vendor/github.com/jaytaylor/html2text/html2text.go diff --git a/vendor/github.com/olekukonko/tablewriter/README.md b/vendor/github.com/olekukonko/tablewriter/README.md index 92d71ed..cb5e137 100644 --- a/vendor/github.com/olekukonko/tablewriter/README.md +++ b/vendor/github.com/olekukonko/tablewriter/README.md @@ -283,7 +283,7 @@ import ( func main() { tableString := &strings.Builder{} - table := tablewriter.NewWriter(tableString) + table := tablewriter.NewWriter(tableString) /* * Code to fill the table diff --git a/vendor/github.com/olekukonko/tablewriter/go.mod b/vendor/github.com/olekukonko/tablewriter/go.mod new file mode 100644 index 0000000..84b405d --- /dev/null +++ b/vendor/github.com/olekukonko/tablewriter/go.mod @@ -0,0 +1,8 @@ +module github.com/olekukonko/tablewriter + +go 1.12 + +require ( + github.com/mattn/go-runewidth v0.0.4 + github.com/olekukonko/tablewriter v0.0.1 +) diff --git a/vendor/github.com/olekukonko/tablewriter/go.sum b/vendor/github.com/olekukonko/tablewriter/go.sum new file mode 100644 index 0000000..9d5e335 --- /dev/null +++ b/vendor/github.com/olekukonko/tablewriter/go.sum @@ -0,0 +1,4 @@ +github.com/mattn/go-runewidth v0.0.4 h1:2BvfKmzob6Bmd4YsL0zygOqfdFnK7GR4QL06Do4/p7Y= +github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= +github.com/olekukonko/tablewriter v0.0.1 h1:b3iUnf1v+ppJiOfNX4yxxqfWKMQPZR5yoh8urCTFX88= +github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= diff --git a/vendor/github.com/olekukonko/tablewriter/table.go b/vendor/github.com/olekukonko/tablewriter/table.go index 3cf0996..06341bc 100644 --- a/vendor/github.com/olekukonko/tablewriter/table.go +++ b/vendor/github.com/olekukonko/tablewriter/table.go @@ -766,7 +766,7 @@ func (t *Table) printRowMergeCells(writer io.Writer, columns [][]string, rowIdx if t.autoMergeCells { //Store the full line to merge mutli-lines cells - fullLine := strings.Join(columns[y], " ") + fullLine := strings.TrimRight(strings.Join(columns[y], " "), " ") if len(previousLine) > y && fullLine == previousLine[y] && fullLine != "" { // If this cell is identical to the one above but not empty, we don't display the border and keep the cell empty. displayCellBorder = append(displayCellBorder, false) @@ -804,7 +804,7 @@ func (t *Table) printRowMergeCells(writer io.Writer, columns [][]string, rowIdx //The new previous line is the current one previousLine = make([]string, total) for y := 0; y < total; y++ { - previousLine[y] = strings.Join(columns[y], " ") //Store the full line for multi-lines cells + previousLine[y] = strings.TrimRight(strings.Join(columns[y], " ")," ") //Store the full line for multi-lines cells } //Returns the newly added line and wether or not a border should be displayed above. return previousLine, displayCellBorder diff --git a/vendor/golang.org/x/net/html/atom/gen.go b/vendor/golang.org/x/net/html/atom/gen.go new file mode 100644 index 0000000..5d05278 --- /dev/null +++ b/vendor/golang.org/x/net/html/atom/gen.go @@ -0,0 +1,712 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +//go:generate go run gen.go +//go:generate go run gen.go -test + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "io/ioutil" + "math/rand" + "os" + "sort" + "strings" +) + +// identifier converts s to a Go exported identifier. +// It converts "div" to "Div" and "accept-charset" to "AcceptCharset". +func identifier(s string) string { + b := make([]byte, 0, len(s)) + cap := true + for _, c := range s { + if c == '-' { + cap = true + continue + } + if cap && 'a' <= c && c <= 'z' { + c -= 'a' - 'A' + } + cap = false + b = append(b, byte(c)) + } + return string(b) +} + +var test = flag.Bool("test", false, "generate table_test.go") + +func genFile(name string, buf *bytes.Buffer) { + b, err := format.Source(buf.Bytes()) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + if err := ioutil.WriteFile(name, b, 0644); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} + +func main() { + flag.Parse() + + var all []string + all = append(all, elements...) + all = append(all, attributes...) + all = append(all, eventHandlers...) + all = append(all, extra...) + sort.Strings(all) + + // uniq - lists have dups + w := 0 + for _, s := range all { + if w == 0 || all[w-1] != s { + all[w] = s + w++ + } + } + all = all[:w] + + if *test { + var buf bytes.Buffer + fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") + fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n") + fmt.Fprintln(&buf, "package atom\n") + fmt.Fprintln(&buf, "var testAtomList = []string{") + for _, s := range all { + fmt.Fprintf(&buf, "\t%q,\n", s) + } + fmt.Fprintln(&buf, "}") + + genFile("table_test.go", &buf) + return + } + + // Find hash that minimizes table size. + var best *table + for i := 0; i < 1000000; i++ { + if best != nil && 1<<(best.k-1) < len(all) { + break + } + h := rand.Uint32() + for k := uint(0); k <= 16; k++ { + if best != nil && k >= best.k { + break + } + var t table + if t.init(h, k, all) { + best = &t + break + } + } + } + if best == nil { + fmt.Fprintf(os.Stderr, "failed to construct string table\n") + os.Exit(1) + } + + // Lay out strings, using overlaps when possible. + layout := append([]string{}, all...) + + // Remove strings that are substrings of other strings + for changed := true; changed; { + changed = false + for i, s := range layout { + if s == "" { + continue + } + for j, t := range layout { + if i != j && t != "" && strings.Contains(s, t) { + changed = true + layout[j] = "" + } + } + } + } + + // Join strings where one suffix matches another prefix. + for { + // Find best i, j, k such that layout[i][len-k:] == layout[j][:k], + // maximizing overlap length k. + besti := -1 + bestj := -1 + bestk := 0 + for i, s := range layout { + if s == "" { + continue + } + for j, t := range layout { + if i == j { + continue + } + for k := bestk + 1; k <= len(s) && k <= len(t); k++ { + if s[len(s)-k:] == t[:k] { + besti = i + bestj = j + bestk = k + } + } + } + } + if bestk > 0 { + layout[besti] += layout[bestj][bestk:] + layout[bestj] = "" + continue + } + break + } + + text := strings.Join(layout, "") + + atom := map[string]uint32{} + for _, s := range all { + off := strings.Index(text, s) + if off < 0 { + panic("lost string " + s) + } + atom[s] = uint32(off<<8 | len(s)) + } + + var buf bytes.Buffer + // Generate the Go code. + fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") + fmt.Fprintln(&buf, "//go:generate go run gen.go\n") + fmt.Fprintln(&buf, "package atom\n\nconst (") + + // compute max len + maxLen := 0 + for _, s := range all { + if maxLen < len(s) { + maxLen = len(s) + } + fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s]) + } + fmt.Fprintln(&buf, ")\n") + + fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0) + fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen) + + fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k) + for i, s := range best.tab { + if s == "" { + continue + } + fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s) + } + fmt.Fprintf(&buf, "}\n") + datasize := (1 << best.k) * 4 + + fmt.Fprintln(&buf, "const atomText =") + textsize := len(text) + for len(text) > 60 { + fmt.Fprintf(&buf, "\t%q +\n", text[:60]) + text = text[60:] + } + fmt.Fprintf(&buf, "\t%q\n\n", text) + + genFile("table.go", &buf) + + fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize) +} + +type byLen []string + +func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) } +func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] } +func (x byLen) Len() int { return len(x) } + +// fnv computes the FNV hash with an arbitrary starting value h. +func fnv(h uint32, s string) uint32 { + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= 16777619 + } + return h +} + +// A table represents an attempt at constructing the lookup table. +// The lookup table uses cuckoo hashing, meaning that each string +// can be found in one of two positions. +type table struct { + h0 uint32 + k uint + mask uint32 + tab []string +} + +// hash returns the two hashes for s. +func (t *table) hash(s string) (h1, h2 uint32) { + h := fnv(t.h0, s) + h1 = h & t.mask + h2 = (h >> 16) & t.mask + return +} + +// init initializes the table with the given parameters. +// h0 is the initial hash value, +// k is the number of bits of hash value to use, and +// x is the list of strings to store in the table. +// init returns false if the table cannot be constructed. +func (t *table) init(h0 uint32, k uint, x []string) bool { + t.h0 = h0 + t.k = k + t.tab = make([]string, 1< len(t.tab) { + return false + } + s := t.tab[i] + h1, h2 := t.hash(s) + j := h1 + h2 - i + if t.tab[j] != "" && !t.push(j, depth+1) { + return false + } + t.tab[j] = s + return true +} + +// The lists of element names and attribute keys were taken from +// https://html.spec.whatwg.org/multipage/indices.html#index +// as of the "HTML Living Standard - Last Updated 16 April 2018" version. + +// "command", "keygen" and "menuitem" have been removed from the spec, +// but are kept here for backwards compatibility. +var elements = []string{ + "a", + "abbr", + "address", + "area", + "article", + "aside", + "audio", + "b", + "base", + "bdi", + "bdo", + "blockquote", + "body", + "br", + "button", + "canvas", + "caption", + "cite", + "code", + "col", + "colgroup", + "command", + "data", + "datalist", + "dd", + "del", + "details", + "dfn", + "dialog", + "div", + "dl", + "dt", + "em", + "embed", + "fieldset", + "figcaption", + "figure", + "footer", + "form", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "header", + "hgroup", + "hr", + "html", + "i", + "iframe", + "img", + "input", + "ins", + "kbd", + "keygen", + "label", + "legend", + "li", + "link", + "main", + "map", + "mark", + "menu", + "menuitem", + "meta", + "meter", + "nav", + "noscript", + "object", + "ol", + "optgroup", + "option", + "output", + "p", + "param", + "picture", + "pre", + "progress", + "q", + "rp", + "rt", + "ruby", + "s", + "samp", + "script", + "section", + "select", + "slot", + "small", + "source", + "span", + "strong", + "style", + "sub", + "summary", + "sup", + "table", + "tbody", + "td", + "template", + "textarea", + "tfoot", + "th", + "thead", + "time", + "title", + "tr", + "track", + "u", + "ul", + "var", + "video", + "wbr", +} + +// https://html.spec.whatwg.org/multipage/indices.html#attributes-3 +// +// "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup", +// "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec, +// but are kept here for backwards compatibility. +var attributes = []string{ + "abbr", + "accept", + "accept-charset", + "accesskey", + "action", + "allowfullscreen", + "allowpaymentrequest", + "allowusermedia", + "alt", + "as", + "async", + "autocomplete", + "autofocus", + "autoplay", + "challenge", + "charset", + "checked", + "cite", + "class", + "color", + "cols", + "colspan", + "command", + "content", + "contenteditable", + "contextmenu", + "controls", + "coords", + "crossorigin", + "data", + "datetime", + "default", + "defer", + "dir", + "dirname", + "disabled", + "download", + "draggable", + "dropzone", + "enctype", + "for", + "form", + "formaction", + "formenctype", + "formmethod", + "formnovalidate", + "formtarget", + "headers", + "height", + "hidden", + "high", + "href", + "hreflang", + "http-equiv", + "icon", + "id", + "inputmode", + "integrity", + "is", + "ismap", + "itemid", + "itemprop", + "itemref", + "itemscope", + "itemtype", + "keytype", + "kind", + "label", + "lang", + "list", + "loop", + "low", + "manifest", + "max", + "maxlength", + "media", + "mediagroup", + "method", + "min", + "minlength", + "multiple", + "muted", + "name", + "nomodule", + "nonce", + "novalidate", + "open", + "optimum", + "pattern", + "ping", + "placeholder", + "playsinline", + "poster", + "preload", + "radiogroup", + "readonly", + "referrerpolicy", + "rel", + "required", + "reversed", + "rows", + "rowspan", + "sandbox", + "spellcheck", + "scope", + "scoped", + "seamless", + "selected", + "shape", + "size", + "sizes", + "sortable", + "sorted", + "slot", + "span", + "spellcheck", + "src", + "srcdoc", + "srclang", + "srcset", + "start", + "step", + "style", + "tabindex", + "target", + "title", + "translate", + "type", + "typemustmatch", + "updateviacache", + "usemap", + "value", + "width", + "workertype", + "wrap", +} + +// "onautocomplete", "onautocompleteerror", "onmousewheel", +// "onshow" and "onsort" have been removed from the spec, +// but are kept here for backwards compatibility. +var eventHandlers = []string{ + "onabort", + "onautocomplete", + "onautocompleteerror", + "onauxclick", + "onafterprint", + "onbeforeprint", + "onbeforeunload", + "onblur", + "oncancel", + "oncanplay", + "oncanplaythrough", + "onchange", + "onclick", + "onclose", + "oncontextmenu", + "oncopy", + "oncuechange", + "oncut", + "ondblclick", + "ondrag", + "ondragend", + "ondragenter", + "ondragexit", + "ondragleave", + "ondragover", + "ondragstart", + "ondrop", + "ondurationchange", + "onemptied", + "onended", + "onerror", + "onfocus", + "onhashchange", + "oninput", + "oninvalid", + "onkeydown", + "onkeypress", + "onkeyup", + "onlanguagechange", + "onload", + "onloadeddata", + "onloadedmetadata", + "onloadend", + "onloadstart", + "onmessage", + "onmessageerror", + "onmousedown", + "onmouseenter", + "onmouseleave", + "onmousemove", + "onmouseout", + "onmouseover", + "onmouseup", + "onmousewheel", + "onwheel", + "onoffline", + "ononline", + "onpagehide", + "onpageshow", + "onpaste", + "onpause", + "onplay", + "onplaying", + "onpopstate", + "onprogress", + "onratechange", + "onreset", + "onresize", + "onrejectionhandled", + "onscroll", + "onsecuritypolicyviolation", + "onseeked", + "onseeking", + "onselect", + "onshow", + "onsort", + "onstalled", + "onstorage", + "onsubmit", + "onsuspend", + "ontimeupdate", + "ontoggle", + "onunhandledrejection", + "onunload", + "onvolumechange", + "onwaiting", +} + +// extra are ad-hoc values not covered by any of the lists above. +var extra = []string{ + "acronym", + "align", + "annotation", + "annotation-xml", + "applet", + "basefont", + "bgsound", + "big", + "blink", + "center", + "color", + "desc", + "face", + "font", + "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive. + "foreignobject", + "frame", + "frameset", + "image", + "isindex", + "listing", + "malignmark", + "marquee", + "math", + "mglyph", + "mi", + "mn", + "mo", + "ms", + "mtext", + "nobr", + "noembed", + "noframes", + "plaintext", + "prompt", + "public", + "rb", + "rtc", + "spacer", + "strike", + "svg", + "system", + "tt", + "xmp", +} diff --git a/vendor/golang.org/x/text/encoding/charmap/maketables.go b/vendor/golang.org/x/text/encoding/charmap/maketables.go new file mode 100644 index 0000000..f794170 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/charmap/maketables.go @@ -0,0 +1,556 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +import ( + "bufio" + "fmt" + "log" + "net/http" + "sort" + "strings" + "unicode/utf8" + + "golang.org/x/text/encoding" + "golang.org/x/text/internal/gen" +) + +const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + + ` !"#$%&'()*+,-./0123456789:;<=>?` + + `@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` + + "`abcdefghijklmnopqrstuvwxyz{|}~\u007f" + +var encodings = []struct { + name string + mib string + comment string + varName string + replacement byte + mapping string +}{ + { + "IBM Code Page 037", + "IBM037", + "", + "CodePage037", + 0x3f, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm", + }, + { + "IBM Code Page 437", + "PC8CodePage437", + "", + "CodePage437", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm", + }, + { + "IBM Code Page 850", + "PC850Multilingual", + "", + "CodePage850", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm", + }, + { + "IBM Code Page 852", + "PCp852", + "", + "CodePage852", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm", + }, + { + "IBM Code Page 855", + "IBM855", + "", + "CodePage855", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm", + }, + { + "Windows Code Page 858", // PC latin1 with Euro + "IBM00858", + "", + "CodePage858", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm", + }, + { + "IBM Code Page 860", + "IBM860", + "", + "CodePage860", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm", + }, + { + "IBM Code Page 862", + "PC862LatinHebrew", + "", + "CodePage862", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm", + }, + { + "IBM Code Page 863", + "IBM863", + "", + "CodePage863", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm", + }, + { + "IBM Code Page 865", + "IBM865", + "", + "CodePage865", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm", + }, + { + "IBM Code Page 866", + "IBM866", + "", + "CodePage866", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-ibm866.txt", + }, + { + "IBM Code Page 1047", + "IBM1047", + "", + "CodePage1047", + 0x3f, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm", + }, + { + "IBM Code Page 1140", + "IBM01140", + "", + "CodePage1140", + 0x3f, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm", + }, + { + "ISO 8859-1", + "ISOLatin1", + "", + "ISO8859_1", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm", + }, + { + "ISO 8859-2", + "ISOLatin2", + "", + "ISO8859_2", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-2.txt", + }, + { + "ISO 8859-3", + "ISOLatin3", + "", + "ISO8859_3", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-3.txt", + }, + { + "ISO 8859-4", + "ISOLatin4", + "", + "ISO8859_4", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-4.txt", + }, + { + "ISO 8859-5", + "ISOLatinCyrillic", + "", + "ISO8859_5", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-5.txt", + }, + { + "ISO 8859-6", + "ISOLatinArabic", + "", + "ISO8859_6,ISO8859_6E,ISO8859_6I", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-6.txt", + }, + { + "ISO 8859-7", + "ISOLatinGreek", + "", + "ISO8859_7", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-7.txt", + }, + { + "ISO 8859-8", + "ISOLatinHebrew", + "", + "ISO8859_8,ISO8859_8E,ISO8859_8I", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-8.txt", + }, + { + "ISO 8859-9", + "ISOLatin5", + "", + "ISO8859_9", + encoding.ASCIISub, + "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm", + }, + { + "ISO 8859-10", + "ISOLatin6", + "", + "ISO8859_10", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-10.txt", + }, + { + "ISO 8859-13", + "ISO885913", + "", + "ISO8859_13", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-13.txt", + }, + { + "ISO 8859-14", + "ISO885914", + "", + "ISO8859_14", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-14.txt", + }, + { + "ISO 8859-15", + "ISO885915", + "", + "ISO8859_15", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-15.txt", + }, + { + "ISO 8859-16", + "ISO885916", + "", + "ISO8859_16", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-iso-8859-16.txt", + }, + { + "KOI8-R", + "KOI8R", + "", + "KOI8R", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-koi8-r.txt", + }, + { + "KOI8-U", + "KOI8U", + "", + "KOI8U", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-koi8-u.txt", + }, + { + "Macintosh", + "Macintosh", + "", + "Macintosh", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-macintosh.txt", + }, + { + "Macintosh Cyrillic", + "MacintoshCyrillic", + "", + "MacintoshCyrillic", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt", + }, + { + "Windows 874", + "Windows874", + "", + "Windows874", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-874.txt", + }, + { + "Windows 1250", + "Windows1250", + "", + "Windows1250", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1250.txt", + }, + { + "Windows 1251", + "Windows1251", + "", + "Windows1251", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1251.txt", + }, + { + "Windows 1252", + "Windows1252", + "", + "Windows1252", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1252.txt", + }, + { + "Windows 1253", + "Windows1253", + "", + "Windows1253", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1253.txt", + }, + { + "Windows 1254", + "Windows1254", + "", + "Windows1254", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1254.txt", + }, + { + "Windows 1255", + "Windows1255", + "", + "Windows1255", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1255.txt", + }, + { + "Windows 1256", + "Windows1256", + "", + "Windows1256", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1256.txt", + }, + { + "Windows 1257", + "Windows1257", + "", + "Windows1257", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1257.txt", + }, + { + "Windows 1258", + "Windows1258", + "", + "Windows1258", + encoding.ASCIISub, + "http://encoding.spec.whatwg.org/index-windows-1258.txt", + }, + { + "X-User-Defined", + "XUserDefined", + "It is defined at http://encoding.spec.whatwg.org/#x-user-defined", + "XUserDefined", + encoding.ASCIISub, + ascii + + "\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" + + "\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" + + "\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" + + "\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" + + "\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" + + "\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" + + "\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" + + "\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" + + "\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" + + "\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" + + "\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" + + "\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" + + "\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" + + "\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" + + "\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" + + "\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff", + }, +} + +func getWHATWG(url string) string { + res, err := http.Get(url) + if err != nil { + log.Fatalf("%q: Get: %v", url, err) + } + defer res.Body.Close() + + mapping := make([]rune, 128) + for i := range mapping { + mapping[i] = '\ufffd' + } + + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := 0, 0 + if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil { + log.Fatalf("could not parse %q", s) + } + if x < 0 || 128 <= x { + log.Fatalf("code %d is out of range", x) + } + if 0x80 <= y && y < 0xa0 { + // We diverge from the WHATWG spec by mapping control characters + // in the range [0x80, 0xa0) to U+FFFD. + continue + } + mapping[x] = rune(y) + } + return ascii + string(mapping) +} + +func getUCM(url string) string { + res, err := http.Get(url) + if err != nil { + log.Fatalf("%q: Get: %v", url, err) + } + defer res.Body.Close() + + mapping := make([]rune, 256) + for i := range mapping { + mapping[i] = '\ufffd' + } + + charsFound := 0 + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + var c byte + var r rune + if _, err := fmt.Sscanf(s, ` \x%x |0`, &r, &c); err != nil { + continue + } + mapping[c] = r + charsFound++ + } + + if charsFound < 200 { + log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound) + } + + return string(mapping) +} + +func main() { + mibs := map[string]bool{} + all := []string{} + + w := gen.NewCodeWriter() + defer w.WriteGoFile("tables.go", "charmap") + + printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) } + + printf("import (\n") + printf("\t\"golang.org/x/text/encoding\"\n") + printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n") + printf(")\n\n") + for _, e := range encodings { + varNames := strings.Split(e.varName, ",") + all = append(all, varNames...) + varName := varNames[0] + switch { + case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"): + e.mapping = getWHATWG(e.mapping) + case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"): + e.mapping = getUCM(e.mapping) + } + + asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00 + if asciiSuperset { + low = 0x80 + } + lvn := 1 + if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") { + lvn = 3 + } + lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:] + printf("// %s is the %s encoding.\n", varName, e.name) + if e.comment != "" { + printf("//\n// %s\n", e.comment) + } + printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n", + varName, lowerVarName, lowerVarName, e.name) + if mibs[e.mib] { + log.Fatalf("MIB type %q declared multiple times.", e.mib) + } + printf("mib: identifier.%s,\n", e.mib) + printf("asciiSuperset: %t,\n", asciiSuperset) + printf("low: 0x%02x,\n", low) + printf("replacement: 0x%02x,\n", e.replacement) + + printf("decode: [256]utf8Enc{\n") + i, backMapping := 0, map[rune]byte{} + for _, c := range e.mapping { + if _, ok := backMapping[c]; !ok && c != utf8.RuneError { + backMapping[c] = byte(i) + } + var buf [8]byte + n := utf8.EncodeRune(buf[:], c) + if n > 3 { + panic(fmt.Sprintf("rune %q (%U) is too long", c, c)) + } + printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2]) + if i%2 == 1 { + printf("\n") + } + i++ + } + printf("},\n") + + printf("encode: [256]uint32{\n") + encode := make([]uint32, 0, 256) + for c, i := range backMapping { + encode = append(encode, uint32(i)<<24|uint32(c)) + } + sort.Sort(byRune(encode)) + for len(encode) < cap(encode) { + encode = append(encode, encode[len(encode)-1]) + } + for i, enc := range encode { + printf("0x%08x,", enc) + if i%8 == 7 { + printf("\n") + } + } + printf("},\n}\n") + + // Add an estimate of the size of a single Charmap{} struct value, which + // includes two 256 elem arrays of 4 bytes and some extra fields, which + // align to 3 uint64s on 64-bit architectures. + w.Size += 2*4*256 + 3*8 + } + // TODO: add proper line breaking. + printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n")) +} + +type byRune []uint32 + +func (b byRune) Len() int { return len(b) } +func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff } +func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/htmlindex/gen.go b/vendor/golang.org/x/text/encoding/htmlindex/gen.go new file mode 100644 index 0000000..ac6b4a7 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/htmlindex/gen.go @@ -0,0 +1,173 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "log" + "strings" + + "golang.org/x/text/internal/gen" +) + +type group struct { + Encodings []struct { + Labels []string + Name string + } +} + +func main() { + gen.Init() + + r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json") + var groups []group + if err := json.NewDecoder(r).Decode(&groups); err != nil { + log.Fatalf("Error reading encodings.json: %v", err) + } + + w := &bytes.Buffer{} + fmt.Fprintln(w, "type htmlEncoding byte") + fmt.Fprintln(w, "const (") + for i, g := range groups { + for _, e := range g.Encodings { + key := strings.ToLower(e.Name) + name := consts[key] + if name == "" { + log.Fatalf("No const defined for %s.", key) + } + if i == 0 { + fmt.Fprintf(w, "%s htmlEncoding = iota\n", name) + } else { + fmt.Fprintf(w, "%s\n", name) + } + } + } + fmt.Fprintln(w, "numEncodings") + fmt.Fprint(w, ")\n\n") + + fmt.Fprintln(w, "var canonical = [numEncodings]string{") + for _, g := range groups { + for _, e := range g.Encodings { + fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name)) + } + } + fmt.Fprint(w, "}\n\n") + + fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{") + for _, g := range groups { + for _, e := range g.Encodings { + for _, l := range e.Labels { + key := strings.ToLower(e.Name) + name := consts[key] + fmt.Fprintf(w, "%q: %s,\n", l, name) + } + } + } + fmt.Fprint(w, "}\n\n") + + var tags []string + fmt.Fprintln(w, "var localeMap = []htmlEncoding{") + for _, loc := range locales { + tags = append(tags, loc.tag) + fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag) + } + fmt.Fprint(w, "}\n\n") + + fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " ")) + + gen.WriteGoFile("tables.go", "htmlindex", w.Bytes()) +} + +// consts maps canonical encoding name to internal constant. +var consts = map[string]string{ + "utf-8": "utf8", + "ibm866": "ibm866", + "iso-8859-2": "iso8859_2", + "iso-8859-3": "iso8859_3", + "iso-8859-4": "iso8859_4", + "iso-8859-5": "iso8859_5", + "iso-8859-6": "iso8859_6", + "iso-8859-7": "iso8859_7", + "iso-8859-8": "iso8859_8", + "iso-8859-8-i": "iso8859_8I", + "iso-8859-10": "iso8859_10", + "iso-8859-13": "iso8859_13", + "iso-8859-14": "iso8859_14", + "iso-8859-15": "iso8859_15", + "iso-8859-16": "iso8859_16", + "koi8-r": "koi8r", + "koi8-u": "koi8u", + "macintosh": "macintosh", + "windows-874": "windows874", + "windows-1250": "windows1250", + "windows-1251": "windows1251", + "windows-1252": "windows1252", + "windows-1253": "windows1253", + "windows-1254": "windows1254", + "windows-1255": "windows1255", + "windows-1256": "windows1256", + "windows-1257": "windows1257", + "windows-1258": "windows1258", + "x-mac-cyrillic": "macintoshCyrillic", + "gbk": "gbk", + "gb18030": "gb18030", + // "hz-gb-2312": "hzgb2312", // Was removed from WhatWG + "big5": "big5", + "euc-jp": "eucjp", + "iso-2022-jp": "iso2022jp", + "shift_jis": "shiftJIS", + "euc-kr": "euckr", + "replacement": "replacement", + "utf-16be": "utf16be", + "utf-16le": "utf16le", + "x-user-defined": "xUserDefined", +} + +// locales is taken from +// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm. +var locales = []struct{ tag, name string }{ + // The default value. Explicitly state latin to benefit from the exact + // script option, while still making 1252 the default encoding for languages + // written in Latin script. + {"und_Latn", "windows-1252"}, + {"ar", "windows-1256"}, + {"ba", "windows-1251"}, + {"be", "windows-1251"}, + {"bg", "windows-1251"}, + {"cs", "windows-1250"}, + {"el", "iso-8859-7"}, + {"et", "windows-1257"}, + {"fa", "windows-1256"}, + {"he", "windows-1255"}, + {"hr", "windows-1250"}, + {"hu", "iso-8859-2"}, + {"ja", "shift_jis"}, + {"kk", "windows-1251"}, + {"ko", "euc-kr"}, + {"ku", "windows-1254"}, + {"ky", "windows-1251"}, + {"lt", "windows-1257"}, + {"lv", "windows-1257"}, + {"mk", "windows-1251"}, + {"pl", "iso-8859-2"}, + {"ru", "windows-1251"}, + {"sah", "windows-1251"}, + {"sk", "windows-1250"}, + {"sl", "iso-8859-2"}, + {"sr", "windows-1251"}, + {"tg", "windows-1251"}, + {"th", "windows-874"}, + {"tr", "windows-1254"}, + {"tt", "windows-1251"}, + {"uk", "windows-1251"}, + {"vi", "windows-1258"}, + {"zh-hans", "gb18030"}, + {"zh-hant", "big5"}, +} diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go new file mode 100644 index 0000000..26cfef9 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go @@ -0,0 +1,142 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +import ( + "bytes" + "encoding/xml" + "fmt" + "io" + "log" + "strings" + + "golang.org/x/text/internal/gen" +) + +type registry struct { + XMLName xml.Name `xml:"registry"` + Updated string `xml:"updated"` + Registry []struct { + ID string `xml:"id,attr"` + Record []struct { + Name string `xml:"name"` + Xref []struct { + Type string `xml:"type,attr"` + Data string `xml:"data,attr"` + } `xml:"xref"` + Desc struct { + Data string `xml:",innerxml"` + // Any []struct { + // Data string `xml:",chardata"` + // } `xml:",any"` + // Data string `xml:",chardata"` + } `xml:"description,"` + MIB string `xml:"value"` + Alias []string `xml:"alias"` + MIME string `xml:"preferred_alias"` + } `xml:"record"` + } `xml:"registry"` +} + +func main() { + r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml") + reg := ®istry{} + if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF { + log.Fatalf("Error decoding charset registry: %v", err) + } + if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" { + log.Fatalf("Unexpected ID %s", reg.Registry[0].ID) + } + + w := &bytes.Buffer{} + fmt.Fprintf(w, "const (\n") + for _, rec := range reg.Registry[0].Record { + constName := "" + for _, a := range rec.Alias { + if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 { + // Some of the constant definitions have comments in them. Strip those. + constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0]) + } + } + if constName == "" { + switch rec.MIB { + case "2085": + constName = "HZGB2312" // Not listed as alias for some reason. + default: + log.Fatalf("No cs alias defined for %s.", rec.MIB) + } + } + if rec.MIME != "" { + rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME) + } + fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME) + if len(rec.Desc.Data) > 0 { + fmt.Fprint(w, "// ") + d := xml.NewDecoder(strings.NewReader(rec.Desc.Data)) + inElem := true + attr := "" + for { + t, err := d.Token() + if err != nil { + if err != io.EOF { + log.Fatal(err) + } + break + } + switch x := t.(type) { + case xml.CharData: + attr = "" // Don't need attribute info. + a := bytes.Split([]byte(x), []byte("\n")) + for i, b := range a { + if b = bytes.TrimSpace(b); len(b) != 0 { + if !inElem && i > 0 { + fmt.Fprint(w, "\n// ") + } + inElem = false + fmt.Fprintf(w, "%s ", string(b)) + } + } + case xml.StartElement: + if x.Name.Local == "xref" { + inElem = true + use := false + for _, a := range x.Attr { + if a.Name.Local == "type" { + use = use || a.Value != "person" + } + if a.Name.Local == "data" && use { + // Patch up URLs to use https. From some links, the + // https version is different from the http one. + s := a.Value + s = strings.Replace(s, "http://", "https://", -1) + s = strings.Replace(s, "/unicode/", "/", -1) + attr = s + " " + } + } + } + case xml.EndElement: + inElem = false + fmt.Fprint(w, attr) + } + } + fmt.Fprint(w, "\n") + } + for _, x := range rec.Xref { + switch x.Type { + case "rfc": + fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data)) + case "uri": + fmt.Fprintf(w, "// Reference: %s\n", x.Data) + } + } + fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB) + fmt.Fprintln(w) + } + fmt.Fprintln(w, ")") + + gen.WriteGoFile("mib.go", "identifier", w.Bytes()) +} diff --git a/vendor/golang.org/x/text/encoding/japanese/maketables.go b/vendor/golang.org/x/text/encoding/japanese/maketables.go new file mode 100644 index 0000000..023957a --- /dev/null +++ b/vendor/golang.org/x/text/encoding/japanese/maketables.go @@ -0,0 +1,161 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This program generates tables.go: +// go run maketables.go | gofmt > tables.go + +// TODO: Emoji extensions? +// https://www.unicode.org/faq/emoji_dingbats.html +// https://www.unicode.org/Public/UNIDATA/EmojiSources.txt + +import ( + "bufio" + "fmt" + "log" + "net/http" + "sort" + "strings" +) + +type entry struct { + jisCode, table int +} + +func main() { + fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") + fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n") + fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n") + + reverse := [65536]entry{} + for i := range reverse { + reverse[i].table = -1 + } + + tables := []struct { + url string + name string + }{ + {"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"}, + {"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"}, + } + for i, table := range tables { + res, err := http.Get(table.url) + if err != nil { + log.Fatalf("%q: Get: %v", table.url, err) + } + defer res.Body.Close() + + mapping := [65536]uint16{} + + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := 0, uint16(0) + if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { + log.Fatalf("%q: could not parse %q", table.url, s) + } + if x < 0 || 120*94 <= x { + log.Fatalf("%q: JIS code %d is out of range", table.url, x) + } + mapping[x] = y + if reverse[y].table == -1 { + reverse[y] = entry{jisCode: x, table: i} + } + } + if err := scanner.Err(); err != nil { + log.Fatalf("%q: scanner error: %v", table.url, err) + } + + fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n", + table.name, table.name, table.url) + fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name) + for i, m := range mapping { + if m != 0 { + fmt.Printf("\t%d: 0x%04X,\n", i, m) + } + } + fmt.Printf("}\n\n") + } + + // Any run of at least separation continuous zero entries in the reverse map will + // be a separate encode table. + const separation = 1024 + + intervals := []interval(nil) + low, high := -1, -1 + for i, v := range reverse { + if v.table == -1 { + continue + } + if low < 0 { + low = i + } else if i-high >= separation { + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + low = i + } + high = i + 1 + } + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + sort.Sort(byDecreasingLength(intervals)) + + fmt.Printf("const (\n") + fmt.Printf("\tjis0208 = 1\n") + fmt.Printf("\tjis0212 = 2\n") + fmt.Printf("\tcodeMask = 0x7f\n") + fmt.Printf("\tcodeShift = 7\n") + fmt.Printf("\ttableShift = 14\n") + fmt.Printf(")\n\n") + + fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) + fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n") + fmt.Printf("// sorted by decreasing length.\n") + for i, v := range intervals { + fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) + } + fmt.Printf("//\n") + fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n") + fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n") + fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n") + fmt.Printf("// JIS code (94*j1 + j2) within that table.\n") + fmt.Printf("\n") + + for i, v := range intervals { + fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) + fmt.Printf("var encode%d = [...]uint16{\n", i) + for j := v.low; j < v.high; j++ { + x := reverse[j] + if x.table == -1 { + continue + } + fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n", + j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94) + } + fmt.Printf("}\n\n") + } +} + +// interval is a half-open interval [low, high). +type interval struct { + low, high int +} + +func (i interval) len() int { return i.high - i.low } + +// byDecreasingLength sorts intervals by decreasing length. +type byDecreasingLength []interval + +func (b byDecreasingLength) Len() int { return len(b) } +func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } +func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/korean/maketables.go b/vendor/golang.org/x/text/encoding/korean/maketables.go new file mode 100644 index 0000000..c84034f --- /dev/null +++ b/vendor/golang.org/x/text/encoding/korean/maketables.go @@ -0,0 +1,143 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This program generates tables.go: +// go run maketables.go | gofmt > tables.go + +import ( + "bufio" + "fmt" + "log" + "net/http" + "sort" + "strings" +) + +func main() { + fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") + fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n") + fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n") + + res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt") + if err != nil { + log.Fatalf("Get: %v", err) + } + defer res.Body.Close() + + mapping := [65536]uint16{} + reverse := [65536]uint16{} + + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := uint16(0), uint16(0) + if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { + log.Fatalf("could not parse %q", s) + } + if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x { + log.Fatalf("EUC-KR code %d is out of range", x) + } + mapping[x] = y + if reverse[y] == 0 { + c0, c1 := uint16(0), uint16(0) + if x < 178*(0xc7-0x81) { + c0 = uint16(x/178) + 0x81 + c1 = uint16(x % 178) + switch { + case c1 < 1*26: + c1 += 0x41 + case c1 < 2*26: + c1 += 0x47 + default: + c1 += 0x4d + } + } else { + x -= 178 * (0xc7 - 0x81) + c0 = uint16(x/94) + 0xc7 + c1 = uint16(x%94) + 0xa1 + } + reverse[y] = c0<<8 | c1 + } + } + if err := scanner.Err(); err != nil { + log.Fatalf("scanner error: %v", err) + } + + fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n") + fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n") + fmt.Printf("var decode = [...]uint16{\n") + for i, v := range mapping { + if v != 0 { + fmt.Printf("\t%d: 0x%04X,\n", i, v) + } + } + fmt.Printf("}\n\n") + + // Any run of at least separation continuous zero entries in the reverse map will + // be a separate encode table. + const separation = 1024 + + intervals := []interval(nil) + low, high := -1, -1 + for i, v := range reverse { + if v == 0 { + continue + } + if low < 0 { + low = i + } else if i-high >= separation { + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + low = i + } + high = i + 1 + } + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + sort.Sort(byDecreasingLength(intervals)) + + fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) + fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n") + fmt.Printf("// sorted by decreasing length.\n") + for i, v := range intervals { + fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) + } + fmt.Printf("\n") + + for i, v := range intervals { + fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) + fmt.Printf("var encode%d = [...]uint16{\n", i) + for j := v.low; j < v.high; j++ { + x := reverse[j] + if x == 0 { + continue + } + fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) + } + fmt.Printf("}\n\n") + } +} + +// interval is a half-open interval [low, high). +type interval struct { + low, high int +} + +func (i interval) len() int { return i.high - i.low } + +// byDecreasingLength sorts intervals by decreasing length. +type byDecreasingLength []interval + +func (b byDecreasingLength) Len() int { return len(b) } +func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } +func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go b/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go new file mode 100644 index 0000000..55016c7 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go @@ -0,0 +1,161 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This program generates tables.go: +// go run maketables.go | gofmt > tables.go + +import ( + "bufio" + "fmt" + "log" + "net/http" + "sort" + "strings" +) + +func main() { + fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") + fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n") + fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n") + + printGB18030() + printGBK() +} + +func printGB18030() { + res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt") + if err != nil { + log.Fatalf("Get: %v", err) + } + defer res.Body.Close() + + fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n") + fmt.Printf("var gb18030 = [...][2]uint16{\n") + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := uint32(0), uint32(0) + if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { + log.Fatalf("could not parse %q", s) + } + if x < 0x10000 && y < 0x10000 { + fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y) + } + } + fmt.Printf("}\n\n") +} + +func printGBK() { + res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt") + if err != nil { + log.Fatalf("Get: %v", err) + } + defer res.Body.Close() + + mapping := [65536]uint16{} + reverse := [65536]uint16{} + + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := uint16(0), uint16(0) + if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { + log.Fatalf("could not parse %q", s) + } + if x < 0 || 126*190 <= x { + log.Fatalf("GBK code %d is out of range", x) + } + mapping[x] = y + if reverse[y] == 0 { + c0, c1 := x/190, x%190 + if c1 >= 0x3f { + c1++ + } + reverse[y] = (0x81+c0)<<8 | (0x40 + c1) + } + } + if err := scanner.Err(); err != nil { + log.Fatalf("scanner error: %v", err) + } + + fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n") + fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n") + fmt.Printf("var decode = [...]uint16{\n") + for i, v := range mapping { + if v != 0 { + fmt.Printf("\t%d: 0x%04X,\n", i, v) + } + } + fmt.Printf("}\n\n") + + // Any run of at least separation continuous zero entries in the reverse map will + // be a separate encode table. + const separation = 1024 + + intervals := []interval(nil) + low, high := -1, -1 + for i, v := range reverse { + if v == 0 { + continue + } + if low < 0 { + low = i + } else if i-high >= separation { + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + low = i + } + high = i + 1 + } + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + sort.Sort(byDecreasingLength(intervals)) + + fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) + fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n") + fmt.Printf("// sorted by decreasing length.\n") + for i, v := range intervals { + fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) + } + fmt.Printf("\n") + + for i, v := range intervals { + fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) + fmt.Printf("var encode%d = [...]uint16{\n", i) + for j := v.low; j < v.high; j++ { + x := reverse[j] + if x == 0 { + continue + } + fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) + } + fmt.Printf("}\n\n") + } +} + +// interval is a half-open interval [low, high). +type interval struct { + low, high int +} + +func (i interval) len() int { return i.high - i.low } + +// byDecreasingLength sorts intervals by decreasing length. +type byDecreasingLength []interval + +func (b byDecreasingLength) Len() int { return len(b) } +func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } +func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go b/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go new file mode 100644 index 0000000..cf7fdb3 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go @@ -0,0 +1,140 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This program generates tables.go: +// go run maketables.go | gofmt > tables.go + +import ( + "bufio" + "fmt" + "log" + "net/http" + "sort" + "strings" +) + +func main() { + fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") + fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n") + fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n") + + res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt") + if err != nil { + log.Fatalf("Get: %v", err) + } + defer res.Body.Close() + + mapping := [65536]uint32{} + reverse := [65536 * 4]uint16{} + + scanner := bufio.NewScanner(res.Body) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if s == "" || s[0] == '#' { + continue + } + x, y := uint16(0), uint32(0) + if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { + log.Fatalf("could not parse %q", s) + } + if x < 0 || 126*157 <= x { + log.Fatalf("Big5 code %d is out of range", x) + } + mapping[x] = y + + // The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that + // "The index pointer for code point in index is the first pointer + // corresponding to code point in index", which would normally mean + // that the code below should be guarded by "if reverse[y] == 0", but + // last instead of first seems to match the behavior of + // "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in + // http://encoding.spec.whatwg.org/index-big5.txt, as index 2148 + // (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc") + // and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc". + c0, c1 := x/157, x%157 + if c1 < 0x3f { + c1 += 0x40 + } else { + c1 += 0x62 + } + reverse[y] = (0x81+c0)<<8 | c1 + } + if err := scanner.Err(); err != nil { + log.Fatalf("scanner error: %v", err) + } + + fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n") + fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n") + fmt.Printf("var decode = [...]uint32{\n") + for i, v := range mapping { + if v != 0 { + fmt.Printf("\t%d: 0x%08X,\n", i, v) + } + } + fmt.Printf("}\n\n") + + // Any run of at least separation continuous zero entries in the reverse map will + // be a separate encode table. + const separation = 1024 + + intervals := []interval(nil) + low, high := -1, -1 + for i, v := range reverse { + if v == 0 { + continue + } + if low < 0 { + low = i + } else if i-high >= separation { + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + low = i + } + high = i + 1 + } + if high >= 0 { + intervals = append(intervals, interval{low, high}) + } + sort.Sort(byDecreasingLength(intervals)) + + fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) + fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n") + fmt.Printf("// sorted by decreasing length.\n") + for i, v := range intervals { + fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high) + } + fmt.Printf("\n") + + for i, v := range intervals { + fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) + fmt.Printf("var encode%d = [...]uint16{\n", i) + for j := v.low; j < v.high; j++ { + x := reverse[j] + if x == 0 { + continue + } + fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) + } + fmt.Printf("}\n\n") + } +} + +// interval is a half-open interval [low, high). +type interval struct { + low, high int +} + +func (i interval) len() int { return i.high - i.low } + +// byDecreasingLength sorts intervals by decreasing length. +type byDecreasingLength []interval + +func (b byDecreasingLength) Len() int { return len(b) } +func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } +func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/internal/language/compact/gen.go b/vendor/golang.org/x/text/internal/language/compact/gen.go new file mode 100644 index 0000000..0c36a05 --- /dev/null +++ b/vendor/golang.org/x/text/internal/language/compact/gen.go @@ -0,0 +1,64 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// Language tag table generator. +// Data read from the web. + +package main + +import ( + "flag" + "fmt" + "log" + + "golang.org/x/text/internal/gen" + "golang.org/x/text/unicode/cldr" +) + +var ( + test = flag.Bool("test", + false, + "test existing tables; can be used to compare web data with package data.") + outputFile = flag.String("output", + "tables.go", + "output file for generated tables") +) + +func main() { + gen.Init() + + w := gen.NewCodeWriter() + defer w.WriteGoFile("tables.go", "compact") + + fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`) + + b := newBuilder(w) + gen.WriteCLDRVersion(w) + + b.writeCompactIndex() +} + +type builder struct { + w *gen.CodeWriter + data *cldr.CLDR + supp *cldr.SupplementalData +} + +func newBuilder(w *gen.CodeWriter) *builder { + r := gen.OpenCLDRCoreZip() + defer r.Close() + d := &cldr.Decoder{} + data, err := d.DecodeZip(r) + if err != nil { + log.Fatal(err) + } + b := builder{ + w: w, + data: data, + supp: data.Supplemental(), + } + return &b +} diff --git a/vendor/golang.org/x/text/internal/language/compact/gen_index.go b/vendor/golang.org/x/text/internal/language/compact/gen_index.go new file mode 100644 index 0000000..136cefa --- /dev/null +++ b/vendor/golang.org/x/text/internal/language/compact/gen_index.go @@ -0,0 +1,113 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This file generates derivative tables based on the language package itself. + +import ( + "fmt" + "log" + "sort" + "strings" + + "golang.org/x/text/internal/language" +) + +// Compact indices: +// Note -va-X variants only apply to localization variants. +// BCP variants only ever apply to language. +// The only ambiguity between tags is with regions. + +func (b *builder) writeCompactIndex() { + // Collect all language tags for which we have any data in CLDR. + m := map[language.Tag]bool{} + for _, lang := range b.data.Locales() { + // We include all locales unconditionally to be consistent with en_US. + // We want en_US, even though it has no data associated with it. + + // TODO: put any of the languages for which no data exists at the end + // of the index. This allows all components based on ICU to use that + // as the cutoff point. + // if x := data.RawLDML(lang); false || + // x.LocaleDisplayNames != nil || + // x.Characters != nil || + // x.Delimiters != nil || + // x.Measurement != nil || + // x.Dates != nil || + // x.Numbers != nil || + // x.Units != nil || + // x.ListPatterns != nil || + // x.Collations != nil || + // x.Segmentations != nil || + // x.Rbnf != nil || + // x.Annotations != nil || + // x.Metadata != nil { + + // TODO: support POSIX natively, albeit non-standard. + tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1)) + m[tag] = true + // } + } + + // TODO: plural rules are also defined for the deprecated tags: + // iw mo sh tl + // Consider removing these as compact tags. + + // Include locales for plural rules, which uses a different structure. + for _, plurals := range b.supp.Plurals { + for _, rules := range plurals.PluralRules { + for _, lang := range strings.Split(rules.Locales, " ") { + m[language.Make(lang)] = true + } + } + } + + var coreTags []language.CompactCoreInfo + var special []string + + for t := range m { + if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" { + log.Fatalf("Unexpected extension %v in %v", x, t) + } + if len(t.Variants()) == 0 && len(t.Extensions()) == 0 { + cci, ok := language.GetCompactCore(t) + if !ok { + log.Fatalf("Locale for non-basic language %q", t) + } + coreTags = append(coreTags, cci) + } else { + special = append(special, t.String()) + } + } + + w := b.w + + sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] }) + sort.Strings(special) + + w.WriteComment(` + NumCompactTags is the number of common tags. The maximum tag is + NumCompactTags-1.`) + w.WriteConst("NumCompactTags", len(m)) + + fmt.Fprintln(w, "const (") + for i, t := range coreTags { + fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i) + } + for i, t := range special { + fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags)) + } + fmt.Fprintln(w, ")") + + w.WriteVar("coreTags", coreTags) + + w.WriteConst("specialTagsStr", strings.Join(special, " ")) +} + +func ident(s string) string { + return strings.Replace(s, "-", "", -1) + "Index" +} diff --git a/vendor/golang.org/x/text/internal/language/compact/gen_parents.go b/vendor/golang.org/x/text/internal/language/compact/gen_parents.go new file mode 100644 index 0000000..9543d58 --- /dev/null +++ b/vendor/golang.org/x/text/internal/language/compact/gen_parents.go @@ -0,0 +1,54 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +import ( + "log" + + "golang.org/x/text/internal/gen" + "golang.org/x/text/internal/language" + "golang.org/x/text/internal/language/compact" + "golang.org/x/text/unicode/cldr" +) + +func main() { + r := gen.OpenCLDRCoreZip() + defer r.Close() + + d := &cldr.Decoder{} + data, err := d.DecodeZip(r) + if err != nil { + log.Fatalf("DecodeZip: %v", err) + } + + w := gen.NewCodeWriter() + defer w.WriteGoFile("parents.go", "compact") + + // Create parents table. + type ID uint16 + parents := make([]ID, compact.NumCompactTags) + for _, loc := range data.Locales() { + tag := language.MustParse(loc) + index, ok := compact.FromTag(tag) + if !ok { + continue + } + parentIndex := compact.ID(0) // und + for p := tag.Parent(); p != language.Und; p = p.Parent() { + if x, ok := compact.FromTag(p); ok { + parentIndex = x + break + } + } + parents[index] = ID(parentIndex) + } + + w.WriteComment(` + parents maps a compact index of a tag to the compact index of the parent of + this tag.`) + w.WriteVar("parents", parents) +} diff --git a/vendor/golang.org/x/text/internal/language/gen.go b/vendor/golang.org/x/text/internal/language/gen.go new file mode 100644 index 0000000..cdcc7fe --- /dev/null +++ b/vendor/golang.org/x/text/internal/language/gen.go @@ -0,0 +1,1520 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +// Language tag table generator. +// Data read from the web. + +package main + +import ( + "bufio" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "math" + "reflect" + "regexp" + "sort" + "strconv" + "strings" + + "golang.org/x/text/internal/gen" + "golang.org/x/text/internal/tag" + "golang.org/x/text/unicode/cldr" +) + +var ( + test = flag.Bool("test", + false, + "test existing tables; can be used to compare web data with package data.") + outputFile = flag.String("output", + "tables.go", + "output file for generated tables") +) + +var comment = []string{ + ` +lang holds an alphabetically sorted list of ISO-639 language identifiers. +All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag. +For 2-byte language identifiers, the two successive bytes have the following meaning: + - if the first letter of the 2- and 3-letter ISO codes are the same: + the second and third letter of the 3-letter ISO code. + - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. +For 3-byte language identifiers the 4th byte is 0.`, + ` +langNoIndex is a bit vector of all 3-letter language codes that are not used as an index +in lookup tables. The language ids for these language codes are derived directly +from the letters and are not consecutive.`, + ` +altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives +to 2-letter language codes that cannot be derived using the method described above. +Each 3-letter code is followed by its 1-byte langID.`, + ` +altLangIndex is used to convert indexes in altLangISO3 to langIDs.`, + ` +AliasMap maps langIDs to their suggested replacements.`, + ` +script is an alphabetically sorted list of ISO 15924 codes. The index +of the script in the string, divided by 4, is the internal scriptID.`, + ` +isoRegionOffset needs to be added to the index of regionISO to obtain the regionID +for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for +the UN.M49 codes used for groups.)`, + ` +regionISO holds a list of alphabetically sorted 2-letter ISO region codes. +Each 2-letter codes is followed by two bytes with the following meaning: + - [A-Z}{2}: the first letter of the 2-letter code plus these two + letters form the 3-letter ISO code. + - 0, n: index into altRegionISO3.`, + ` +regionTypes defines the status of a region for various standards.`, + ` +m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are +codes indicating collections of regions.`, + ` +m49Index gives indexes into fromM49 based on the three most significant bits +of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in + fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] +for an entry where the first 7 bits match the 7 lsb of the UN.M49 code. +The region code is stored in the 9 lsb of the indexed value.`, + ` +fromM49 contains entries to map UN.M49 codes to regions. See m49Index for details.`, + ` +altRegionISO3 holds a list of 3-letter region codes that cannot be +mapped to 2-letter codes using the default algorithm. This is a short list.`, + ` +altRegionIDs holds a list of regionIDs the positions of which match those +of the 3-letter ISO codes in altRegionISO3.`, + ` +variantNumSpecialized is the number of specialized variants in variants.`, + ` +suppressScript is an index from langID to the dominant script for that language, +if it exists. If a script is given, it should be suppressed from the language tag.`, + ` +likelyLang is a lookup table, indexed by langID, for the most likely +scripts and regions given incomplete information. If more entries exist for a +given language, region and script are the index and size respectively +of the list in likelyLangList.`, + ` +likelyLangList holds lists info associated with likelyLang.`, + ` +likelyRegion is a lookup table, indexed by regionID, for the most likely +languages and scripts given incomplete information. If more entries exist +for a given regionID, lang and script are the index and size respectively +of the list in likelyRegionList. +TODO: exclude containers and user-definable regions from the list.`, + ` +likelyRegionList holds lists info associated with likelyRegion.`, + ` +likelyScript is a lookup table, indexed by scriptID, for the most likely +languages and regions given a script.`, + ` +nRegionGroups is the number of region groups.`, + ` +regionInclusion maps region identifiers to sets of regions in regionInclusionBits, +where each set holds all groupings that are directly connected in a region +containment graph.`, + ` +regionInclusionBits is an array of bit vectors where every vector represents +a set of region groupings. These sets are used to compute the distance +between two regions for the purpose of language matching.`, + ` +regionInclusionNext marks, for each entry in regionInclusionBits, the set of +all groups that are reachable from the groups set in the respective entry.`, +} + +// TODO: consider changing some of these structures to tries. This can reduce +// memory, but may increase the need for memory allocations. This could be +// mitigated if we can piggyback on language tags for common cases. + +func failOnError(e error) { + if e != nil { + log.Panic(e) + } +} + +type setType int + +const ( + Indexed setType = 1 + iota // all elements must be of same size + Linear +) + +type stringSet struct { + s []string + sorted, frozen bool + + // We often need to update values after the creation of an index is completed. + // We include a convenience map for keeping track of this. + update map[string]string + typ setType // used for checking. +} + +func (ss *stringSet) clone() stringSet { + c := *ss + c.s = append([]string(nil), c.s...) + return c +} + +func (ss *stringSet) setType(t setType) { + if ss.typ != t && ss.typ != 0 { + log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ) + } +} + +// parse parses a whitespace-separated string and initializes ss with its +// components. +func (ss *stringSet) parse(s string) { + scan := bufio.NewScanner(strings.NewReader(s)) + scan.Split(bufio.ScanWords) + for scan.Scan() { + ss.add(scan.Text()) + } +} + +func (ss *stringSet) assertChangeable() { + if ss.frozen { + log.Panic("attempt to modify a frozen stringSet") + } +} + +func (ss *stringSet) add(s string) { + ss.assertChangeable() + ss.s = append(ss.s, s) + ss.sorted = ss.frozen +} + +func (ss *stringSet) freeze() { + ss.compact() + ss.frozen = true +} + +func (ss *stringSet) compact() { + if ss.sorted { + return + } + a := ss.s + sort.Strings(a) + k := 0 + for i := 1; i < len(a); i++ { + if a[k] != a[i] { + a[k+1] = a[i] + k++ + } + } + ss.s = a[:k+1] + ss.sorted = ss.frozen +} + +type funcSorter struct { + fn func(a, b string) bool + sort.StringSlice +} + +func (s funcSorter) Less(i, j int) bool { + return s.fn(s.StringSlice[i], s.StringSlice[j]) +} + +func (ss *stringSet) sortFunc(f func(a, b string) bool) { + ss.compact() + sort.Sort(funcSorter{f, sort.StringSlice(ss.s)}) +} + +func (ss *stringSet) remove(s string) { + ss.assertChangeable() + if i, ok := ss.find(s); ok { + copy(ss.s[i:], ss.s[i+1:]) + ss.s = ss.s[:len(ss.s)-1] + } +} + +func (ss *stringSet) replace(ol, nu string) { + ss.s[ss.index(ol)] = nu + ss.sorted = ss.frozen +} + +func (ss *stringSet) index(s string) int { + ss.setType(Indexed) + i, ok := ss.find(s) + if !ok { + if i < len(ss.s) { + log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i]) + } + log.Panicf("find: item %q is not in list", s) + + } + return i +} + +func (ss *stringSet) find(s string) (int, bool) { + ss.compact() + i := sort.SearchStrings(ss.s, s) + return i, i != len(ss.s) && ss.s[i] == s +} + +func (ss *stringSet) slice() []string { + ss.compact() + return ss.s +} + +func (ss *stringSet) updateLater(v, key string) { + if ss.update == nil { + ss.update = map[string]string{} + } + ss.update[v] = key +} + +// join joins the string and ensures that all entries are of the same length. +func (ss *stringSet) join() string { + ss.setType(Indexed) + n := len(ss.s[0]) + for _, s := range ss.s { + if len(s) != n { + log.Panicf("join: not all entries are of the same length: %q", s) + } + } + ss.s = append(ss.s, strings.Repeat("\xff", n)) + return strings.Join(ss.s, "") +} + +// ianaEntry holds information for an entry in the IANA Language Subtag Repository. +// All types use the same entry. +// See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various +// fields. +type ianaEntry struct { + typ string + description []string + scope string + added string + preferred string + deprecated string + suppressScript string + macro string + prefix []string +} + +type builder struct { + w *gen.CodeWriter + hw io.Writer // MultiWriter for w and w.Hash + data *cldr.CLDR + supp *cldr.SupplementalData + + // indices + locale stringSet // common locales + lang stringSet // canonical language ids (2 or 3 letter ISO codes) with data + langNoIndex stringSet // 3-letter ISO codes with no associated data + script stringSet // 4-letter ISO codes + region stringSet // 2-letter ISO or 3-digit UN M49 codes + variant stringSet // 4-8-alphanumeric variant code. + + // Region codes that are groups with their corresponding group IDs. + groups map[int]index + + // langInfo + registry map[string]*ianaEntry +} + +type index uint + +func newBuilder(w *gen.CodeWriter) *builder { + r := gen.OpenCLDRCoreZip() + defer r.Close() + d := &cldr.Decoder{} + data, err := d.DecodeZip(r) + failOnError(err) + b := builder{ + w: w, + hw: io.MultiWriter(w, w.Hash), + data: data, + supp: data.Supplemental(), + } + b.parseRegistry() + return &b +} + +func (b *builder) parseRegistry() { + r := gen.OpenIANAFile("assignments/language-subtag-registry") + defer r.Close() + b.registry = make(map[string]*ianaEntry) + + scan := bufio.NewScanner(r) + scan.Split(bufio.ScanWords) + var record *ianaEntry + for more := scan.Scan(); more; { + key := scan.Text() + more = scan.Scan() + value := scan.Text() + switch key { + case "Type:": + record = &ianaEntry{typ: value} + case "Subtag:", "Tag:": + if s := strings.SplitN(value, "..", 2); len(s) > 1 { + for a := s[0]; a <= s[1]; a = inc(a) { + b.addToRegistry(a, record) + } + } else { + b.addToRegistry(value, record) + } + case "Suppress-Script:": + record.suppressScript = value + case "Added:": + record.added = value + case "Deprecated:": + record.deprecated = value + case "Macrolanguage:": + record.macro = value + case "Preferred-Value:": + record.preferred = value + case "Prefix:": + record.prefix = append(record.prefix, value) + case "Scope:": + record.scope = value + case "Description:": + buf := []byte(value) + for more = scan.Scan(); more; more = scan.Scan() { + b := scan.Bytes() + if b[0] == '%' || b[len(b)-1] == ':' { + break + } + buf = append(buf, ' ') + buf = append(buf, b...) + } + record.description = append(record.description, string(buf)) + continue + default: + continue + } + more = scan.Scan() + } + if scan.Err() != nil { + log.Panic(scan.Err()) + } +} + +func (b *builder) addToRegistry(key string, entry *ianaEntry) { + if info, ok := b.registry[key]; ok { + if info.typ != "language" || entry.typ != "extlang" { + log.Fatalf("parseRegistry: tag %q already exists", key) + } + } else { + b.registry[key] = entry + } +} + +var commentIndex = make(map[string]string) + +func init() { + for _, s := range comment { + key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0]) + commentIndex[key] = s + } +} + +func (b *builder) comment(name string) { + if s := commentIndex[name]; len(s) > 0 { + b.w.WriteComment(s) + } else { + fmt.Fprintln(b.w) + } +} + +func (b *builder) pf(f string, x ...interface{}) { + fmt.Fprintf(b.hw, f, x...) + fmt.Fprint(b.hw, "\n") +} + +func (b *builder) p(x ...interface{}) { + fmt.Fprintln(b.hw, x...) +} + +func (b *builder) addSize(s int) { + b.w.Size += s + b.pf("// Size: %d bytes", s) +} + +func (b *builder) writeConst(name string, x interface{}) { + b.comment(name) + b.w.WriteConst(name, x) +} + +// writeConsts computes f(v) for all v in values and writes the results +// as constants named _v to a single constant block. +func (b *builder) writeConsts(f func(string) int, values ...string) { + b.pf("const (") + for _, v := range values { + b.pf("\t_%s = %v", v, f(v)) + } + b.pf(")") +} + +// writeType writes the type of the given value, which must be a struct. +func (b *builder) writeType(value interface{}) { + b.comment(reflect.TypeOf(value).Name()) + b.w.WriteType(value) +} + +func (b *builder) writeSlice(name string, ss interface{}) { + b.writeSliceAddSize(name, 0, ss) +} + +func (b *builder) writeSliceAddSize(name string, extraSize int, ss interface{}) { + b.comment(name) + b.w.Size += extraSize + v := reflect.ValueOf(ss) + t := v.Type().Elem() + b.pf("// Size: %d bytes, %d elements", v.Len()*int(t.Size())+extraSize, v.Len()) + + fmt.Fprintf(b.w, "var %s = ", name) + b.w.WriteArray(ss) + b.p() +} + +type FromTo struct { + From, To uint16 +} + +func (b *builder) writeSortedMap(name string, ss *stringSet, index func(s string) uint16) { + ss.sortFunc(func(a, b string) bool { + return index(a) < index(b) + }) + m := []FromTo{} + for _, s := range ss.s { + m = append(m, FromTo{index(s), index(ss.update[s])}) + } + b.writeSlice(name, m) +} + +const base = 'z' - 'a' + 1 + +func strToInt(s string) uint { + v := uint(0) + for i := 0; i < len(s); i++ { + v *= base + v += uint(s[i] - 'a') + } + return v +} + +// converts the given integer to the original ASCII string passed to strToInt. +// len(s) must match the number of characters obtained. +func intToStr(v uint, s []byte) { + for i := len(s) - 1; i >= 0; i-- { + s[i] = byte(v%base) + 'a' + v /= base + } +} + +func (b *builder) writeBitVector(name string, ss []string) { + vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8))) + for _, s := range ss { + v := strToInt(s) + vec[v/8] |= 1 << (v % 8) + } + b.writeSlice(name, vec) +} + +// TODO: convert this type into a list or two-stage trie. +func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) { + b.comment(name) + v := reflect.ValueOf(m) + sz := v.Len() * (2 + int(v.Type().Key().Size())) + for _, k := range m { + sz += len(k) + } + b.addSize(sz) + keys := []string{} + b.pf(`var %s = map[string]uint16{`, name) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + b.pf("\t%q: %v,", k, f(m[k])) + } + b.p("}") +} + +func (b *builder) writeMap(name string, m interface{}) { + b.comment(name) + v := reflect.ValueOf(m) + sz := v.Len() * (2 + int(v.Type().Key().Size()) + int(v.Type().Elem().Size())) + b.addSize(sz) + f := strings.FieldsFunc(fmt.Sprintf("%#v", m), func(r rune) bool { + return strings.IndexRune("{}, ", r) != -1 + }) + sort.Strings(f[1:]) + b.pf(`var %s = %s{`, name, f[0]) + for _, kv := range f[1:] { + b.pf("\t%s,", kv) + } + b.p("}") +} + +func (b *builder) langIndex(s string) uint16 { + if s == "und" { + return 0 + } + if i, ok := b.lang.find(s); ok { + return uint16(i) + } + return uint16(strToInt(s)) + uint16(len(b.lang.s)) +} + +// inc advances the string to its lexicographical successor. +func inc(s string) string { + const maxTagLength = 4 + var buf [maxTagLength]byte + intToStr(strToInt(strings.ToLower(s))+1, buf[:len(s)]) + for i := 0; i < len(s); i++ { + if s[i] <= 'Z' { + buf[i] -= 'a' - 'A' + } + } + return string(buf[:len(s)]) +} + +func (b *builder) parseIndices() { + meta := b.supp.Metadata + + for k, v := range b.registry { + var ss *stringSet + switch v.typ { + case "language": + if len(k) == 2 || v.suppressScript != "" || v.scope == "special" { + b.lang.add(k) + continue + } else { + ss = &b.langNoIndex + } + case "region": + ss = &b.region + case "script": + ss = &b.script + case "variant": + ss = &b.variant + default: + continue + } + ss.add(k) + } + // Include any language for which there is data. + for _, lang := range b.data.Locales() { + if x := b.data.RawLDML(lang); false || + x.LocaleDisplayNames != nil || + x.Characters != nil || + x.Delimiters != nil || + x.Measurement != nil || + x.Dates != nil || + x.Numbers != nil || + x.Units != nil || + x.ListPatterns != nil || + x.Collations != nil || + x.Segmentations != nil || + x.Rbnf != nil || + x.Annotations != nil || + x.Metadata != nil { + + from := strings.Split(lang, "_") + if lang := from[0]; lang != "root" { + b.lang.add(lang) + } + } + } + // Include locales for plural rules, which uses a different structure. + for _, plurals := range b.data.Supplemental().Plurals { + for _, rules := range plurals.PluralRules { + for _, lang := range strings.Split(rules.Locales, " ") { + if lang = strings.Split(lang, "_")[0]; lang != "root" { + b.lang.add(lang) + } + } + } + } + // Include languages in likely subtags. + for _, m := range b.supp.LikelySubtags.LikelySubtag { + from := strings.Split(m.From, "_") + b.lang.add(from[0]) + } + // Include ISO-639 alpha-3 bibliographic entries. + for _, a := range meta.Alias.LanguageAlias { + if a.Reason == "bibliographic" { + b.langNoIndex.add(a.Type) + } + } + // Include regions in territoryAlias (not all are in the IANA registry!) + for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { + if len(reg.Type) == 2 { + b.region.add(reg.Type) + } + } + + for _, s := range b.lang.s { + if len(s) == 3 { + b.langNoIndex.remove(s) + } + } + b.writeConst("NumLanguages", len(b.lang.slice())+len(b.langNoIndex.slice())) + b.writeConst("NumScripts", len(b.script.slice())) + b.writeConst("NumRegions", len(b.region.slice())) + + // Add dummy codes at the start of each list to represent "unspecified". + b.lang.add("---") + b.script.add("----") + b.region.add("---") + + // common locales + b.locale.parse(meta.DefaultContent.Locales) +} + +// TODO: region inclusion data will probably not be use used in future matchers. + +func (b *builder) computeRegionGroups() { + b.groups = make(map[int]index) + + // Create group indices. + for i := 1; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID. + b.groups[i] = index(len(b.groups)) + } + for _, g := range b.supp.TerritoryContainment.Group { + // Skip UN and EURO zone as they are flattening the containment + // relationship. + if g.Type == "EZ" || g.Type == "UN" { + continue + } + group := b.region.index(g.Type) + if _, ok := b.groups[group]; !ok { + b.groups[group] = index(len(b.groups)) + } + } + if len(b.groups) > 64 { + log.Fatalf("only 64 groups supported, found %d", len(b.groups)) + } + b.writeConst("nRegionGroups", len(b.groups)) +} + +var langConsts = []string{ + "af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es", + "et", "fa", "fi", "fil", "fr", "gu", "he", "hi", "hr", "hu", "hy", "id", "is", + "it", "ja", "ka", "kk", "km", "kn", "ko", "ky", "lo", "lt", "lv", "mk", "ml", + "mn", "mo", "mr", "ms", "mul", "my", "nb", "ne", "nl", "no", "pa", "pl", "pt", + "ro", "ru", "sh", "si", "sk", "sl", "sq", "sr", "sv", "sw", "ta", "te", "th", + "tl", "tn", "tr", "uk", "ur", "uz", "vi", "zh", "zu", + + // constants for grandfathered tags (if not already defined) + "jbo", "ami", "bnn", "hak", "tlh", "lb", "nv", "pwn", "tao", "tay", "tsu", + "nn", "sfb", "vgt", "sgg", "cmn", "nan", "hsn", +} + +// writeLanguage generates all tables needed for language canonicalization. +func (b *builder) writeLanguage() { + meta := b.supp.Metadata + + b.writeConst("nonCanonicalUnd", b.lang.index("und")) + b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...) + b.writeConst("langPrivateStart", b.langIndex("qaa")) + b.writeConst("langPrivateEnd", b.langIndex("qtz")) + + // Get language codes that need to be mapped (overlong 3-letter codes, + // deprecated 2-letter codes, legacy and grandfathered tags.) + langAliasMap := stringSet{} + aliasTypeMap := map[string]AliasType{} + + // altLangISO3 get the alternative ISO3 names that need to be mapped. + altLangISO3 := stringSet{} + // Add dummy start to avoid the use of index 0. + altLangISO3.add("---") + altLangISO3.updateLater("---", "aa") + + lang := b.lang.clone() + for _, a := range meta.Alias.LanguageAlias { + if a.Replacement == "" { + a.Replacement = "und" + } + // TODO: support mapping to tags + repl := strings.SplitN(a.Replacement, "_", 2)[0] + if a.Reason == "overlong" { + if len(a.Replacement) == 2 && len(a.Type) == 3 { + lang.updateLater(a.Replacement, a.Type) + } + } else if len(a.Type) <= 3 { + switch a.Reason { + case "macrolanguage": + aliasTypeMap[a.Type] = Macro + case "deprecated": + // handled elsewhere + continue + case "bibliographic", "legacy": + if a.Type == "no" { + continue + } + aliasTypeMap[a.Type] = Legacy + default: + log.Fatalf("new %s alias: %s", a.Reason, a.Type) + } + langAliasMap.add(a.Type) + langAliasMap.updateLater(a.Type, repl) + } + } + // Manually add the mapping of "nb" (Norwegian) to its macro language. + // This can be removed if CLDR adopts this change. + langAliasMap.add("nb") + langAliasMap.updateLater("nb", "no") + aliasTypeMap["nb"] = Macro + + for k, v := range b.registry { + // Also add deprecated values for 3-letter ISO codes, which CLDR omits. + if v.typ == "language" && v.deprecated != "" && v.preferred != "" { + langAliasMap.add(k) + langAliasMap.updateLater(k, v.preferred) + aliasTypeMap[k] = Deprecated + } + } + // Fix CLDR mappings. + lang.updateLater("tl", "tgl") + lang.updateLater("sh", "hbs") + lang.updateLater("mo", "mol") + lang.updateLater("no", "nor") + lang.updateLater("tw", "twi") + lang.updateLater("nb", "nob") + lang.updateLater("ak", "aka") + lang.updateLater("bh", "bih") + + // Ensure that each 2-letter code is matched with a 3-letter code. + for _, v := range lang.s[1:] { + s, ok := lang.update[v] + if !ok { + if s, ok = lang.update[langAliasMap.update[v]]; !ok { + continue + } + lang.update[v] = s + } + if v[0] != s[0] { + altLangISO3.add(s) + altLangISO3.updateLater(s, v) + } + } + + // Complete canonicalized language tags. + lang.freeze() + for i, v := range lang.s { + // We can avoid these manual entries by using the IANA registry directly. + // Seems easier to update the list manually, as changes are rare. + // The panic in this loop will trigger if we miss an entry. + add := "" + if s, ok := lang.update[v]; ok { + if s[0] == v[0] { + add = s[1:] + } else { + add = string([]byte{0, byte(altLangISO3.index(s))}) + } + } else if len(v) == 3 { + add = "\x00" + } else { + log.Panicf("no data for long form of %q", v) + } + lang.s[i] += add + } + b.writeConst("lang", tag.Index(lang.join())) + + b.writeConst("langNoIndexOffset", len(b.lang.s)) + + // space of all valid 3-letter language identifiers. + b.writeBitVector("langNoIndex", b.langNoIndex.slice()) + + altLangIndex := []uint16{} + for i, s := range altLangISO3.slice() { + altLangISO3.s[i] += string([]byte{byte(len(altLangIndex))}) + if i > 0 { + idx := b.lang.index(altLangISO3.update[s]) + altLangIndex = append(altLangIndex, uint16(idx)) + } + } + b.writeConst("altLangISO3", tag.Index(altLangISO3.join())) + b.writeSlice("altLangIndex", altLangIndex) + + b.writeSortedMap("AliasMap", &langAliasMap, b.langIndex) + types := make([]AliasType, len(langAliasMap.s)) + for i, s := range langAliasMap.s { + types[i] = aliasTypeMap[s] + } + b.writeSlice("AliasTypes", types) +} + +var scriptConsts = []string{ + "Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy", + "Zzzz", +} + +func (b *builder) writeScript() { + b.writeConsts(b.script.index, scriptConsts...) + b.writeConst("script", tag.Index(b.script.join())) + + supp := make([]uint8, len(b.lang.slice())) + for i, v := range b.lang.slice()[1:] { + if sc := b.registry[v].suppressScript; sc != "" { + supp[i+1] = uint8(b.script.index(sc)) + } + } + b.writeSlice("suppressScript", supp) + + // There is only one deprecated script in CLDR. This value is hard-coded. + // We check here if the code must be updated. + for _, a := range b.supp.Metadata.Alias.ScriptAlias { + if a.Type != "Qaai" { + log.Panicf("unexpected deprecated stript %q", a.Type) + } + } +} + +func parseM49(s string) int16 { + if len(s) == 0 { + return 0 + } + v, err := strconv.ParseUint(s, 10, 10) + failOnError(err) + return int16(v) +} + +var regionConsts = []string{ + "001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US", + "ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo. +} + +func (b *builder) writeRegion() { + b.writeConsts(b.region.index, regionConsts...) + + isoOffset := b.region.index("AA") + m49map := make([]int16, len(b.region.slice())) + fromM49map := make(map[int16]int) + altRegionISO3 := "" + altRegionIDs := []uint16{} + + b.writeConst("isoRegionOffset", isoOffset) + + // 2-letter region lookup and mapping to numeric codes. + regionISO := b.region.clone() + regionISO.s = regionISO.s[isoOffset:] + regionISO.sorted = false + + regionTypes := make([]byte, len(b.region.s)) + + // Is the region valid BCP 47? + for s, e := range b.registry { + if len(s) == 2 && s == strings.ToUpper(s) { + i := b.region.index(s) + for _, d := range e.description { + if strings.Contains(d, "Private use") { + regionTypes[i] = iso3166UserAssigned + } + } + regionTypes[i] |= bcp47Region + } + } + + // Is the region a valid ccTLD? + r := gen.OpenIANAFile("domains/root/db") + defer r.Close() + + buf, err := ioutil.ReadAll(r) + failOnError(err) + re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`) + for _, m := range re.FindAllSubmatch(buf, -1) { + i := b.region.index(strings.ToUpper(string(m[1]))) + regionTypes[i] |= ccTLD + } + + b.writeSlice("regionTypes", regionTypes) + + iso3Set := make(map[string]int) + update := func(iso2, iso3 string) { + i := regionISO.index(iso2) + if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] { + regionISO.s[i] += iso3[1:] + iso3Set[iso3] = -1 + } else { + if ok && j >= 0 { + regionISO.s[i] += string([]byte{0, byte(j)}) + } else { + iso3Set[iso3] = len(altRegionISO3) + regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))}) + altRegionISO3 += iso3 + altRegionIDs = append(altRegionIDs, uint16(isoOffset+i)) + } + } + } + for _, tc := range b.supp.CodeMappings.TerritoryCodes { + i := regionISO.index(tc.Type) + isoOffset + if d := m49map[i]; d != 0 { + log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d) + } + m49 := parseM49(tc.Numeric) + m49map[i] = m49 + if r := fromM49map[m49]; r == 0 { + fromM49map[m49] = i + } else if r != i { + dep := b.registry[regionISO.s[r-isoOffset]].deprecated + if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" || t.deprecated > dep) { + fromM49map[m49] = i + } + } + } + for _, ta := range b.supp.Metadata.Alias.TerritoryAlias { + if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 { + from := parseM49(ta.Type) + if r := fromM49map[from]; r == 0 { + fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset + } + } + } + for _, tc := range b.supp.CodeMappings.TerritoryCodes { + if len(tc.Alpha3) == 3 { + update(tc.Type, tc.Alpha3) + } + } + // This entries are not included in territoryCodes. Mostly 3-letter variants + // of deleted codes and an entry for QU. + for _, m := range []struct{ iso2, iso3 string }{ + {"CT", "CTE"}, + {"DY", "DHY"}, + {"HV", "HVO"}, + {"JT", "JTN"}, + {"MI", "MID"}, + {"NH", "NHB"}, + {"NQ", "ATN"}, + {"PC", "PCI"}, + {"PU", "PUS"}, + {"PZ", "PCZ"}, + {"RH", "RHO"}, + {"VD", "VDR"}, + {"WK", "WAK"}, + // These three-letter codes are used for others as well. + {"FQ", "ATF"}, + } { + update(m.iso2, m.iso3) + } + for i, s := range regionISO.s { + if len(s) != 4 { + regionISO.s[i] = s + " " + } + } + b.writeConst("regionISO", tag.Index(regionISO.join())) + b.writeConst("altRegionISO3", altRegionISO3) + b.writeSlice("altRegionIDs", altRegionIDs) + + // Create list of deprecated regions. + // TODO: consider inserting SF -> FI. Not included by CLDR, but is the only + // Transitionally-reserved mapping not included. + regionOldMap := stringSet{} + // Include regions in territoryAlias (not all are in the IANA registry!) + for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { + if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 { + regionOldMap.add(reg.Type) + regionOldMap.updateLater(reg.Type, reg.Replacement) + i, _ := regionISO.find(reg.Type) + j, _ := regionISO.find(reg.Replacement) + if k := m49map[i+isoOffset]; k == 0 { + m49map[i+isoOffset] = m49map[j+isoOffset] + } + } + } + b.writeSortedMap("regionOldMap", ®ionOldMap, func(s string) uint16 { + return uint16(b.region.index(s)) + }) + // 3-digit region lookup, groupings. + for i := 1; i < isoOffset; i++ { + m := parseM49(b.region.s[i]) + m49map[i] = m + fromM49map[m] = i + } + b.writeSlice("m49", m49map) + + const ( + searchBits = 7 + regionBits = 9 + ) + if len(m49map) >= 1< %d", len(m49map), 1<>searchBits] = int16(len(fromM49)) + } + b.writeSlice("m49Index", m49Index) + b.writeSlice("fromM49", fromM49) +} + +const ( + // TODO: put these lists in regionTypes as user data? Could be used for + // various optimizations and refinements and could be exposed in the API. + iso3166Except = "AC CP DG EA EU FX IC SU TA UK" + iso3166Trans = "AN BU CS NT TP YU ZR" // SF is not in our set of Regions. + // DY and RH are actually not deleted, but indeterminately reserved. + iso3166DelCLDR = "CT DD DY FQ HV JT MI NH NQ PC PU PZ RH VD WK YD" +) + +const ( + iso3166UserAssigned = 1 << iota + ccTLD + bcp47Region +) + +func find(list []string, s string) int { + for i, t := range list { + if t == s { + return i + } + } + return -1 +} + +// writeVariants generates per-variant information and creates a map from variant +// name to index value. We assign index values such that sorting multiple +// variants by index value will result in the correct order. +// There are two types of variants: specialized and general. Specialized variants +// are only applicable to certain language or language-script pairs. Generalized +// variants apply to any language. Generalized variants always sort after +// specialized variants. We will therefore always assign a higher index value +// to a generalized variant than any other variant. Generalized variants are +// sorted alphabetically among themselves. +// Specialized variants may also sort after other specialized variants. Such +// variants will be ordered after any of the variants they may follow. +// We assume that if a variant x is followed by a variant y, then for any prefix +// p of x, p-x is a prefix of y. This allows us to order tags based on the +// maximum of the length of any of its prefixes. +// TODO: it is possible to define a set of Prefix values on variants such that +// a total order cannot be defined to the point that this algorithm breaks. +// In other words, we cannot guarantee the same order of variants for the +// future using the same algorithm or for non-compliant combinations of +// variants. For this reason, consider using simple alphabetic sorting +// of variants and ignore Prefix restrictions altogether. +func (b *builder) writeVariant() { + generalized := stringSet{} + specialized := stringSet{} + specializedExtend := stringSet{} + // Collate the variants by type and check assumptions. + for _, v := range b.variant.slice() { + e := b.registry[v] + if len(e.prefix) == 0 { + generalized.add(v) + continue + } + c := strings.Split(e.prefix[0], "-") + hasScriptOrRegion := false + if len(c) > 1 { + _, hasScriptOrRegion = b.script.find(c[1]) + if !hasScriptOrRegion { + _, hasScriptOrRegion = b.region.find(c[1]) + + } + } + if len(c) == 1 || len(c) == 2 && hasScriptOrRegion { + // Variant is preceded by a language. + specialized.add(v) + continue + } + // Variant is preceded by another variant. + specializedExtend.add(v) + prefix := c[0] + "-" + if hasScriptOrRegion { + prefix += c[1] + } + for _, p := range e.prefix { + // Verify that the prefix minus the last element is a prefix of the + // predecessor element. + i := strings.LastIndex(p, "-") + pred := b.registry[p[i+1:]] + if find(pred.prefix, p[:i]) < 0 { + log.Fatalf("prefix %q for variant %q not consistent with predecessor spec", p, v) + } + // The sorting used below does not work in the general case. It works + // if we assume that variants that may be followed by others only have + // prefixes of the same length. Verify this. + count := strings.Count(p[:i], "-") + for _, q := range pred.prefix { + if c := strings.Count(q, "-"); c != count { + log.Fatalf("variant %q preceding %q has a prefix %q of size %d; want %d", p[i+1:], v, q, c, count) + } + } + if !strings.HasPrefix(p, prefix) { + log.Fatalf("prefix %q of variant %q should start with %q", p, v, prefix) + } + } + } + + // Sort extended variants. + a := specializedExtend.s + less := func(v, w string) bool { + // Sort by the maximum number of elements. + maxCount := func(s string) (max int) { + for _, p := range b.registry[s].prefix { + if c := strings.Count(p, "-"); c > max { + max = c + } + } + return + } + if cv, cw := maxCount(v), maxCount(w); cv != cw { + return cv < cw + } + // Sort by name as tie breaker. + return v < w + } + sort.Sort(funcSorter{less, sort.StringSlice(a)}) + specializedExtend.frozen = true + + // Create index from variant name to index. + variantIndex := make(map[string]uint8) + add := func(s []string) { + for _, v := range s { + variantIndex[v] = uint8(len(variantIndex)) + } + } + add(specialized.slice()) + add(specializedExtend.s) + numSpecialized := len(variantIndex) + add(generalized.slice()) + if n := len(variantIndex); n > 255 { + log.Fatalf("maximum number of variants exceeded: was %d; want <= 255", n) + } + b.writeMap("variantIndex", variantIndex) + b.writeConst("variantNumSpecialized", numSpecialized) +} + +func (b *builder) writeLanguageInfo() { +} + +// writeLikelyData writes tables that are used both for finding parent relations and for +// language matching. Each entry contains additional bits to indicate the status of the +// data to know when it cannot be used for parent relations. +func (b *builder) writeLikelyData() { + const ( + isList = 1 << iota + scriptInFrom + regionInFrom + ) + type ( // generated types + likelyScriptRegion struct { + region uint16 + script uint8 + flags uint8 + } + likelyLangScript struct { + lang uint16 + script uint8 + flags uint8 + } + likelyLangRegion struct { + lang uint16 + region uint16 + } + // likelyTag is used for getting likely tags for group regions, where + // the likely region might be a region contained in the group. + likelyTag struct { + lang uint16 + region uint16 + script uint8 + } + ) + var ( // generated variables + likelyRegionGroup = make([]likelyTag, len(b.groups)) + likelyLang = make([]likelyScriptRegion, len(b.lang.s)) + likelyRegion = make([]likelyLangScript, len(b.region.s)) + likelyScript = make([]likelyLangRegion, len(b.script.s)) + likelyLangList = []likelyScriptRegion{} + likelyRegionList = []likelyLangScript{} + ) + type fromTo struct { + from, to []string + } + langToOther := map[int][]fromTo{} + regionToOther := map[int][]fromTo{} + for _, m := range b.supp.LikelySubtags.LikelySubtag { + from := strings.Split(m.From, "_") + to := strings.Split(m.To, "_") + if len(to) != 3 { + log.Fatalf("invalid number of subtags in %q: found %d, want 3", m.To, len(to)) + } + if len(from) > 3 { + log.Fatalf("invalid number of subtags: found %d, want 1-3", len(from)) + } + if from[0] != to[0] && from[0] != "und" { + log.Fatalf("unexpected language change in expansion: %s -> %s", from, to) + } + if len(from) == 3 { + if from[2] != to[2] { + log.Fatalf("unexpected region change in expansion: %s -> %s", from, to) + } + if from[0] != "und" { + log.Fatalf("unexpected fully specified from tag: %s -> %s", from, to) + } + } + if len(from) == 1 || from[0] != "und" { + id := 0 + if from[0] != "und" { + id = b.lang.index(from[0]) + } + langToOther[id] = append(langToOther[id], fromTo{from, to}) + } else if len(from) == 2 && len(from[1]) == 4 { + sid := b.script.index(from[1]) + likelyScript[sid].lang = uint16(b.langIndex(to[0])) + likelyScript[sid].region = uint16(b.region.index(to[2])) + } else { + r := b.region.index(from[len(from)-1]) + if id, ok := b.groups[r]; ok { + if from[0] != "und" { + log.Fatalf("region changed unexpectedly: %s -> %s", from, to) + } + likelyRegionGroup[id].lang = uint16(b.langIndex(to[0])) + likelyRegionGroup[id].script = uint8(b.script.index(to[1])) + likelyRegionGroup[id].region = uint16(b.region.index(to[2])) + } else { + regionToOther[r] = append(regionToOther[r], fromTo{from, to}) + } + } + } + b.writeType(likelyLangRegion{}) + b.writeSlice("likelyScript", likelyScript) + + for id := range b.lang.s { + list := langToOther[id] + if len(list) == 1 { + likelyLang[id].region = uint16(b.region.index(list[0].to[2])) + likelyLang[id].script = uint8(b.script.index(list[0].to[1])) + } else if len(list) > 1 { + likelyLang[id].flags = isList + likelyLang[id].region = uint16(len(likelyLangList)) + likelyLang[id].script = uint8(len(list)) + for _, x := range list { + flags := uint8(0) + if len(x.from) > 1 { + if x.from[1] == x.to[2] { + flags = regionInFrom + } else { + flags = scriptInFrom + } + } + likelyLangList = append(likelyLangList, likelyScriptRegion{ + region: uint16(b.region.index(x.to[2])), + script: uint8(b.script.index(x.to[1])), + flags: flags, + }) + } + } + } + // TODO: merge suppressScript data with this table. + b.writeType(likelyScriptRegion{}) + b.writeSlice("likelyLang", likelyLang) + b.writeSlice("likelyLangList", likelyLangList) + + for id := range b.region.s { + list := regionToOther[id] + if len(list) == 1 { + likelyRegion[id].lang = uint16(b.langIndex(list[0].to[0])) + likelyRegion[id].script = uint8(b.script.index(list[0].to[1])) + if len(list[0].from) > 2 { + likelyRegion[id].flags = scriptInFrom + } + } else if len(list) > 1 { + likelyRegion[id].flags = isList + likelyRegion[id].lang = uint16(len(likelyRegionList)) + likelyRegion[id].script = uint8(len(list)) + for i, x := range list { + if len(x.from) == 2 && i != 0 || i > 0 && len(x.from) != 3 { + log.Fatalf("unspecified script must be first in list: %v at %d", x.from, i) + } + x := likelyLangScript{ + lang: uint16(b.langIndex(x.to[0])), + script: uint8(b.script.index(x.to[1])), + } + if len(list[0].from) > 2 { + x.flags = scriptInFrom + } + likelyRegionList = append(likelyRegionList, x) + } + } + } + b.writeType(likelyLangScript{}) + b.writeSlice("likelyRegion", likelyRegion) + b.writeSlice("likelyRegionList", likelyRegionList) + + b.writeType(likelyTag{}) + b.writeSlice("likelyRegionGroup", likelyRegionGroup) +} + +func (b *builder) writeRegionInclusionData() { + var ( + // mm holds for each group the set of groups with a distance of 1. + mm = make(map[int][]index) + + // containment holds for each group the transitive closure of + // containment of other groups. + containment = make(map[index][]index) + ) + for _, g := range b.supp.TerritoryContainment.Group { + // Skip UN and EURO zone as they are flattening the containment + // relationship. + if g.Type == "EZ" || g.Type == "UN" { + continue + } + group := b.region.index(g.Type) + groupIdx := b.groups[group] + for _, mem := range strings.Split(g.Contains, " ") { + r := b.region.index(mem) + mm[r] = append(mm[r], groupIdx) + if g, ok := b.groups[r]; ok { + mm[group] = append(mm[group], g) + containment[groupIdx] = append(containment[groupIdx], g) + } + } + } + + regionContainment := make([]uint64, len(b.groups)) + for _, g := range b.groups { + l := containment[g] + + // Compute the transitive closure of containment. + for i := 0; i < len(l); i++ { + l = append(l, containment[l[i]]...) + } + + // Compute the bitmask. + regionContainment[g] = 1 << g + for _, v := range l { + regionContainment[g] |= 1 << v + } + } + b.writeSlice("regionContainment", regionContainment) + + regionInclusion := make([]uint8, len(b.region.s)) + bvs := make(map[uint64]index) + // Make the first bitvector positions correspond with the groups. + for r, i := range b.groups { + bv := uint64(1 << i) + for _, g := range mm[r] { + bv |= 1 << g + } + bvs[bv] = i + regionInclusion[r] = uint8(bvs[bv]) + } + for r := 1; r < len(b.region.s); r++ { + if _, ok := b.groups[r]; !ok { + bv := uint64(0) + for _, g := range mm[r] { + bv |= 1 << g + } + if bv == 0 { + // Pick the world for unspecified regions. + bv = 1 << b.groups[b.region.index("001")] + } + if _, ok := bvs[bv]; !ok { + bvs[bv] = index(len(bvs)) + } + regionInclusion[r] = uint8(bvs[bv]) + } + } + b.writeSlice("regionInclusion", regionInclusion) + regionInclusionBits := make([]uint64, len(bvs)) + for k, v := range bvs { + regionInclusionBits[v] = uint64(k) + } + // Add bit vectors for increasingly large distances until a fixed point is reached. + regionInclusionNext := []uint8{} + for i := 0; i < len(regionInclusionBits); i++ { + bits := regionInclusionBits[i] + next := bits + for i := uint(0); i < uint(len(b.groups)); i++ { + if bits&(1< 6 { + log.Fatalf("Too many groups: %d", i) + } + idToIndex[mv.Id] = uint8(i + 1) + // TODO: also handle '-' + for _, r := range strings.Split(mv.Value, "+") { + todo := []string{r} + for k := 0; k < len(todo); k++ { + r := todo[k] + regionToGroups[b.regionIndex(r)] |= 1 << uint8(i) + todo = append(todo, regionHierarchy[r]...) + } + } + } + b.w.WriteVar("regionToGroups", regionToGroups) + + // maps language id to in- and out-of-group region. + paradigmLocales := [][3]uint16{} + locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ") + for i := 0; i < len(locales); i += 2 { + x := [3]uint16{} + for j := 0; j < 2; j++ { + pc := strings.SplitN(locales[i+j], "-", 2) + x[0] = b.langIndex(pc[0]) + if len(pc) == 2 { + x[1+j] = uint16(b.regionIndex(pc[1])) + } + } + paradigmLocales = append(paradigmLocales, x) + } + b.w.WriteVar("paradigmLocales", paradigmLocales) + + b.w.WriteType(mutualIntelligibility{}) + b.w.WriteType(scriptIntelligibility{}) + b.w.WriteType(regionIntelligibility{}) + + matchLang := []mutualIntelligibility{} + matchScript := []scriptIntelligibility{} + matchRegion := []regionIntelligibility{} + // Convert the languageMatch entries in lists keyed by desired language. + for _, m := range lm[0].LanguageMatch { + // Different versions of CLDR use different separators. + desired := strings.Replace(m.Desired, "-", "_", -1) + supported := strings.Replace(m.Supported, "-", "_", -1) + d := strings.Split(desired, "_") + s := strings.Split(supported, "_") + if len(d) != len(s) { + log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) + continue + } + distance, _ := strconv.ParseInt(m.Distance, 10, 8) + switch len(d) { + case 2: + if desired == supported && desired == "*_*" { + continue + } + // language-script pair. + matchScript = append(matchScript, scriptIntelligibility{ + wantLang: uint16(b.langIndex(d[0])), + haveLang: uint16(b.langIndex(s[0])), + wantScript: uint8(b.scriptIndex(d[1])), + haveScript: uint8(b.scriptIndex(s[1])), + distance: uint8(distance), + }) + if m.Oneway != "true" { + matchScript = append(matchScript, scriptIntelligibility{ + wantLang: uint16(b.langIndex(s[0])), + haveLang: uint16(b.langIndex(d[0])), + wantScript: uint8(b.scriptIndex(s[1])), + haveScript: uint8(b.scriptIndex(d[1])), + distance: uint8(distance), + }) + } + case 1: + if desired == supported && desired == "*" { + continue + } + if distance == 1 { + // nb == no is already handled by macro mapping. Check there + // really is only this case. + if d[0] != "no" || s[0] != "nb" { + log.Fatalf("unhandled equivalence %s == %s", s[0], d[0]) + } + continue + } + // TODO: consider dropping oneway field and just doubling the entry. + matchLang = append(matchLang, mutualIntelligibility{ + want: uint16(b.langIndex(d[0])), + have: uint16(b.langIndex(s[0])), + distance: uint8(distance), + oneway: m.Oneway == "true", + }) + case 3: + if desired == supported && desired == "*_*_*" { + continue + } + if desired != supported { + // This is now supported by CLDR, but only one case, which + // should already be covered by paradigm locales. For instance, + // test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in + // testdata/CLDRLocaleMatcherTest.txt tests this. + if supported != "en_*_GB" { + log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) + } + continue + } + ri := regionIntelligibility{ + lang: b.langIndex(d[0]), + distance: uint8(distance), + } + if d[1] != "*" { + ri.script = uint8(b.scriptIndex(d[1])) + } + switch { + case d[2] == "*": + ri.group = 0x80 // not contained in anything + case strings.HasPrefix(d[2], "$!"): + ri.group = 0x80 + d[2] = "$" + d[2][len("$!"):] + fallthrough + case strings.HasPrefix(d[2], "$"): + ri.group |= idToIndex[d[2]] + } + matchRegion = append(matchRegion, ri) + default: + log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) + } + } + sort.SliceStable(matchLang, func(i, j int) bool { + return matchLang[i].distance < matchLang[j].distance + }) + b.w.WriteComment(` + matchLang holds pairs of langIDs of base languages that are typically + mutually intelligible. Each pair is associated with a confidence and + whether the intelligibility goes one or both ways.`) + b.w.WriteVar("matchLang", matchLang) + + b.w.WriteComment(` + matchScript holds pairs of scriptIDs where readers of one script + can typically also read the other. Each is associated with a confidence.`) + sort.SliceStable(matchScript, func(i, j int) bool { + return matchScript[i].distance < matchScript[j].distance + }) + b.w.WriteVar("matchScript", matchScript) + + sort.SliceStable(matchRegion, func(i, j int) bool { + return matchRegion[i].distance < matchRegion[j].distance + }) + b.w.WriteVar("matchRegion", matchRegion) +} diff --git a/vendor/vendor.json b/vendor/vendor.json index e60f391..4dc299f 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -15,10 +15,16 @@ "revisionTime": "2018-10-12T15:44:24Z" }, { - "checksumSHA1": "5UWAX/5F5bqC7tKtDb2QFFRqwAs=", + "checksumSHA1": "WoatyvsqqryIUsNP+2fXlbVtAVM=", "path": "github.com/chilts/sid", - "revision": "250d10e55bf450834d37cb13b7dae8816ada9b28", - "revisionTime": "2018-09-28T23:21:30Z" + "revision": "660e94789ec9b45634f588d40881e81b56de92a0", + "revisionTime": "2019-06-07T04:24:30Z" + }, + { + "checksumSHA1": "6fVGZNc+CHkdCoJCwJwTOSMYHSo=", + "path": "github.com/jaytaylor/html2text", + "revision": "01ec452cbe43774f989516272881441cae40c16b", + "revisionTime": "2019-03-26T19:55:09Z" }, { "checksumSHA1": "qR7D38Zmn9TPUvGn64k+r0+Kq1c=", @@ -69,10 +75,10 @@ "revisionTime": "2018-10-12T15:49:47Z" }, { - "checksumSHA1": "HZJ2dhzXoMi8n+iY80A9vsnyQUk=", + "checksumSHA1": "C2jnBks3LctebwDmLMoq/zYZex0=", "path": "github.com/olekukonko/tablewriter", - "revision": "1c0837c15a0bac7871496dfce5dcdd308e0a330f", - "revisionTime": "2019-05-08T01:39:46Z" + "revision": "cc27d85e17cec9768d2ac401ea5d619a9628f16d", + "revisionTime": "2019-06-18T03:32:46Z" }, { "checksumSHA1": "qErubHtC7DAFBnEQkMTuKDtfFTU=", @@ -83,128 +89,122 @@ { "checksumSHA1": "bONEZcbkYKiPyABrecOLzHomjPU=", "path": "golang.org/x/net/html", - "revision": "f3200d17e092c607f615320ecaad13d87ad9a2b3", - "revisionTime": "2019-05-22T15:39:15Z" + "revision": "ba9fcec4b297b415637633c5a6e8fa592e4a16c3", + "revisionTime": "2019-08-26T16:14:39Z" }, { - "checksumSHA1": "xwhqe/igHQrY3IhqDwzo6j7qpm8=", + "checksumSHA1": "XtSbs1gpyaEsIqf6VRhJsgOQe5U=", "path": "golang.org/x/net/html/atom", - "revision": "f3200d17e092c607f615320ecaad13d87ad9a2b3", - "revisionTime": "2019-05-22T15:39:15Z" + "revision": "ba9fcec4b297b415637633c5a6e8fa592e4a16c3", + "revisionTime": "2019-08-26T16:14:39Z" }, { "checksumSHA1": "barUU39reQ7LdgYLA323hQ/UGy4=", "path": "golang.org/x/net/html/charset", - "revision": "f3200d17e092c607f615320ecaad13d87ad9a2b3", - "revisionTime": "2019-05-22T15:39:15Z" + "revision": "ba9fcec4b297b415637633c5a6e8fa592e4a16c3", + "revisionTime": "2019-08-26T16:14:39Z" }, { "checksumSHA1": "tqqo7DEeFCclb58XbN44WwdpWww=", "path": "golang.org/x/text/encoding", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { - "checksumSHA1": "DSdlK4MKI/a3U8Zaee2XKBe01Fo=", + "checksumSHA1": "HgcUFTOQF5jOYtTIj5obR3GVN9A=", "path": "golang.org/x/text/encoding/charmap", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { - "checksumSHA1": "SbJkfe5G/5tji96Pa15/ePDOCtk=", + "checksumSHA1": "UYlVRSWAA5srH3iWvrJz++Zhpr0=", "path": "golang.org/x/text/encoding/htmlindex", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { "checksumSHA1": "zeHyHebIZl1tGuwGllIhjfci+wI=", "path": "golang.org/x/text/encoding/internal", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { - "checksumSHA1": "hT7VaIBlkm2YpKulgnjqXNdicGQ=", + "checksumSHA1": "46UIK1h/DTupMdRnLkijrEIwzv4=", "path": "golang.org/x/text/encoding/internal/identifier", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { - "checksumSHA1": "2YqVpmvjWGEBATyUphTP1MS34JE=", + "checksumSHA1": "DhdZROnJq+cEcQ/sHY7GEq5wQ8U=", "path": "golang.org/x/text/encoding/japanese", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { - "checksumSHA1": "+ErWCAdaMwO4PLtrk9D/Hh+7oQM=", + "checksumSHA1": "qHQ79q9peY8ZkCMC8kJAb52BAWg=", "path": "golang.org/x/text/encoding/korean", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { - "checksumSHA1": "mTuZi5urYwgDIO8+Gfql2pv8Vwg=", + "checksumSHA1": "55UdScb+EMOCPr7OW0hCwDsVxpg=", "path": "golang.org/x/text/encoding/simplifiedchinese", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { - "checksumSHA1": "D+VI4j0Wjzr8SeupWdOB5KBdFOw=", + "checksumSHA1": "9EZF1SHTpjVmaT9sARitvGKUXOY=", "path": "golang.org/x/text/encoding/traditionalchinese", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { "checksumSHA1": "bAJTZJ3IGJdNmN/PSlRMRxWtxec=", "path": "golang.org/x/text/encoding/unicode", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { - "checksumSHA1": "ybE4kAPmNPV/dvShuG86AmLbhdE=", + "checksumSHA1": "8ea1h1pimPfXc6cE5l3SQTe7SVo=", "path": "golang.org/x/text/internal/language", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { - "checksumSHA1": "VDwNSsZP6KShjTSwGUQUGJVrs1I=", + "checksumSHA1": "GxBlFOqWoIsWCMswUHh6dUqM5no=", "path": "golang.org/x/text/internal/language/compact", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { "checksumSHA1": "hyNCcTwMQnV6/MK8uUW9E5H0J0M=", "path": "golang.org/x/text/internal/tag", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { "checksumSHA1": "Qk7dljcrEK1BJkAEZguxAbG9dSo=", "path": "golang.org/x/text/internal/utf8internal", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { - "checksumSHA1": "oYNlkS+0TimKOScUz3Hn9QWyz6w=", + "checksumSHA1": "kgODOZdRLWKSppiHzrqOKdtrGHA=", "path": "golang.org/x/text/language", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { "checksumSHA1": "IV4MN7KGBSocu/5NR3le3sxup4Y=", "path": "golang.org/x/text/runes", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" }, { "checksumSHA1": "R9iBDY+aPnT+8pyRcqGjXq5QixA=", "path": "golang.org/x/text/transform", - "revision": "342b2e1fbaa52c93f31447ad2c6abc048c63e475", - "revisionTime": "2018-12-15T17:52:45Z" - }, - { - "checksumSHA1": "2/9hMw7Y4I42L/PTobKqVldWUAU=", - "path": "jaytaylor.com/html2text", - "revision": "01ec452cbe43774f989516272881441cae40c16b", - "revisionTime": "2019-03-26T19:55:09Z" + "revision": "3d0f7978add91030e5e8976ff65ccdd828286cba", + "revisionTime": "2019-08-29T15:11:34Z" } ], "rootPath": "salsa.debian.org/mdosch-guest/feed-to-muc"