Update vendored dependencies.

This commit is contained in:
Martin Dosch 2021-12-13 10:29:47 +01:00
parent b377ec7255
commit 1a34016129
82 changed files with 5942 additions and 3000 deletions

18
go.mod
View file

@ -3,17 +3,17 @@ module salsa.debian.org/mdosch/feed-to-muc
go 1.14
require (
github.com/PuerkitoBio/goquery v1.6.0 // indirect
github.com/andybalholm/cascadia v1.2.0 // indirect
github.com/PuerkitoBio/goquery v1.8.0 // indirect
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7
github.com/mattn/go-runewidth v0.0.9 // indirect
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2
github.com/mmcdole/gofeed v1.1.0
github.com/jaytaylor/html2text v0.0.0-20211105163654-bc68cce691ba
github.com/json-iterator/go v1.1.12 // indirect
github.com/mattn/go-runewidth v0.0.13 // indirect
github.com/mattn/go-xmpp v0.0.0-20211029151415-912ba614897a
github.com/mmcdole/gofeed v1.1.3
github.com/mmcdole/goxpp v0.0.0-20200921145534-2f3784f67354 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.1 // indirect
github.com/olekukonko/tablewriter v0.0.4 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
golang.org/x/net v0.0.0-20201024042810-be3efd7ff127 // indirect
golang.org/x/net v0.0.0-20211209124913-491a49abca63 // indirect
golang.org/x/text v0.3.7 // indirect
)

57
go.sum
View file

@ -1,10 +1,10 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/PuerkitoBio/goquery v1.6.0 h1:j7taAbelrdcsOlGeMenZxc2AWXD5fieT1/znArdnx94=
github.com/PuerkitoBio/goquery v1.6.0/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9 h1:z0uK8UQqjMVYzvk4tiiu3obv2B44+XBsvgEJREQfnO8=
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9/go.mod h1:Jl2neWsQaDanWORdqZ4emBl50J4/aRBBS4FyyG9/PFo=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
@ -12,33 +12,33 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 h1:g0fAGBisHaEQ0TRq1iBvemFRf+8AEWEmBESSiWB3Vsc=
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
github.com/jaytaylor/html2text v0.0.0-20211105163654-bc68cce691ba h1:QFQpJdgbON7I0jr2hYW7Bs+XV0qjc3d5tZoDnRFnqTg=
github.com/jaytaylor/html2text v0.0.0-20211105163654-bc68cce691ba/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2 h1:F544zRtDc/pMpFNHN46oeXV2jIAG4DoMH+6zlVSn0Q8=
github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2/go.mod h1:Cs5mF0OsrRRmhkyOod//ldNPOwJsrBvJ+1WRspv0xoc=
github.com/mmcdole/gofeed v1.1.0 h1:T2WrGLVJRV04PY2qwhEJLHCt9JiCtBhb6SmC8ZvJH08=
github.com/mmcdole/gofeed v1.1.0/go.mod h1:PPiVwgDXLlz2N83KB4TrIim2lyYM5Zn7ZWH9Pi4oHUk=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-xmpp v0.0.0-20211029151415-912ba614897a h1:BRuMO9LUDuGp6viOhrEbmuXNlvC78X5QdsnY9Wc+cqM=
github.com/mattn/go-xmpp v0.0.0-20211029151415-912ba614897a/go.mod h1:Cs5mF0OsrRRmhkyOod//ldNPOwJsrBvJ+1WRspv0xoc=
github.com/mmcdole/gofeed v1.1.3 h1:pdrvMb18jMSLidGp8j0pLvc9IGziX4vbmvVqmLH6z8o=
github.com/mmcdole/gofeed v1.1.3/go.mod h1:QQO3maftbOu+hiVOGOZDRLymqGQCos4zxbA4j89gMrE=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/mmcdole/goxpp v0.0.0-20200921145534-2f3784f67354 h1:Z6i7ND25ixRtXFBylIUggqpvLMV1I15yprcqMVB7WZA=
github.com/mmcdole/goxpp v0.0.0-20200921145534-2f3784f67354/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8=
github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
@ -48,20 +48,21 @@ github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/urfave/cli v1.22.3/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201024042810-be3efd7ff127 h1:pZPp9+iYUqwYKLjht0SDBbRCRK/9gAXDy7pz5fRDpjo=
golang.org/x/net v0.0.0-20201024042810-be3efd7ff127/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211209124913-491a49abca63 h1:iocB37TsdFuN6IBRZ+ry36wrkoV51/tl5vOWqkcPGvY=
golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View file

@ -1,17 +0,0 @@
language: go
go:
- 1.2.x
- 1.3.x
- 1.4.x
- 1.5.x
- 1.6.x
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x
- 1.11.x
- 1.12.x
- 1.13.x
- tip

View file

@ -1,4 +1,4 @@
Copyright (c) 2012-2016, Martin Angers & Contributors
Copyright (c) 2012-2021, Martin Angers & Contributors
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

View file

@ -1,5 +1,8 @@
# goquery - a little like that j-thing, only in Go
[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
[![Build Status](https://github.com/PuerkitoBio/goquery/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/PuerkitoBio/goquery/actions)
[![Go Reference](https://pkg.go.dev/badge/github.com/PuerkitoBio/goquery.svg)](https://pkg.go.dev/github.com/PuerkitoBio/goquery)
[![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off.
@ -19,7 +22,7 @@ Syntax-wise, it is as close as possible to jQuery, with the same function names
## Installation
Please note that because of the net/html dependency, goquery requires Go1.1+.
Please note that because of the net/html dependency, goquery requires Go1.1+ and is tested on Go1.7+.
$ go get github.com/PuerkitoBio/goquery
@ -37,6 +40,10 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
**Note that goquery's API is now stable, and will not break.**
* **2021-10-25 (v1.8.0)** : Add `Render` function to render a `Selection` to an `io.Writer` (thanks [@anthonygedeon](https://github.com/anthonygedeon)).
* **2021-07-11 (v1.7.1)** : Update go.mod dependencies and add dependabot config (thanks [@jauderho](https://github.com/jauderho)).
* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)).
* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes.
* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this.
* **2020-02-04 (v1.5.1)** : Update module dependencies.
* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505).
@ -49,7 +56,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
* **2016-08-28 (v1.0.1)** : Optimize performance for large documents.
* **2016-07-27 (v1.0.0)** : Tag version 1.0.0.
* **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object.
* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see godoc for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see [doc][] for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr].
* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone].
* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone].
@ -78,7 +85,7 @@ jQuery often has many variants for the same function (no argument, a selector st
Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour).
The complete [godoc reference documentation can be found here][doc].
The complete [package reference documentation can be found here][doc].
Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string):
@ -122,11 +129,10 @@ func ExampleScrape() {
}
// Find the review items
doc.Find(".sidebar-reviews article .content-block").Each(func(i int, s *goquery.Selection) {
// For each item found, get the band and title
band := s.Find("a").Text()
title := s.Find("i").Text()
fmt.Printf("Review %d: %s - %s\n", i, band, title)
doc.Find(".left-content article .post-title").Each(func(i int, s *goquery.Selection) {
// For each item found, get the title
title := s.Find("a").Text()
fmt.Printf("Review %d: %s\n", i, title)
})
}
@ -147,6 +153,7 @@ func main() {
- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers.
- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering.
- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags.
- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates.
## Support
@ -159,8 +166,9 @@ There are a number of ways you can support the project:
* Pull requests: please discuss new code in an issue first, unless the fix is really trivial.
- Make sure new code is tested.
- Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue.
If you desperately want to send money my way, I have a BuyMeACoffee.com page:
* Sponsor the developer
- See the Github Sponsor button at the top of the repo on github
- or via BuyMeACoffee.com, below
<a href="https://www.buymeacoffee.com/mna" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a>
@ -175,10 +183,10 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia'
[bsd]: http://opensource.org/licenses/BSD-3-Clause
[golic]: http://golang.org/LICENSE
[caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE
[doc]: http://godoc.org/github.com/PuerkitoBio/goquery
[doc]: https://pkg.go.dev/github.com/PuerkitoBio/goquery
[index]: http://api.jquery.com/index/
[gonet]: https://github.com/golang/net/
[html]: http://godoc.org/golang.org/x/net/html
[html]: https://pkg.go.dev/golang.org/x/net/html
[wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks
[thatguystone]: https://github.com/thatguystone
[piotr]: https://github.com/piotrkowalczuk

View file

@ -1,8 +1,8 @@
module github.com/PuerkitoBio/goquery
require (
github.com/andybalholm/cascadia v1.1.0
golang.org/x/net v0.0.0-20200202094626-16171245cfb2
github.com/andybalholm/cascadia v1.3.1
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8
)
go 1.13

View file

@ -1,8 +1,9 @@
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

View file

@ -661,6 +661,9 @@ func (s *Selection) eachNodeHtml(htmlStr string, isParent bool, mergeFn func(n *
if isParent {
context = n.Parent
} else {
if n.Type != html.ElementNode {
continue
}
context = n
}
if context != nil {

View file

@ -7,7 +7,6 @@ import (
"net/url"
"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)
@ -122,6 +121,45 @@ type Matcher interface {
Filter([]*html.Node) []*html.Node
}
// Single compiles a selector string to a Matcher that stops after the first
// match is found.
//
// By default, Selection.Find and other functions that accept a selector string
// to select nodes will use all matches corresponding to that selector. By
// using the Matcher returned by Single, at most the first match will be
// selected.
//
// For example, those two statements are semantically equivalent:
//
// sel1 := doc.Find("a").First()
// sel2 := doc.FindMatcher(goquery.Single("a"))
//
// The one using Single is optimized to be potentially much faster on large
// documents.
//
// Only the behaviour of the MatchAll method of the Matcher interface is
// altered compared to standard Matchers. This means that the single-selection
// property of the Matcher only applies for Selection methods where the Matcher
// is used to select nodes, not to filter or check if a node matches the
// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
// FilterMatcher(Single("div")) will still result in a Selection with multiple
// "div"s if there were many "div"s in the Selection to begin with).
func Single(selector string) Matcher {
return singleMatcher{compileMatcher(selector)}
}
// SingleMatcher returns a Matcher matches the same nodes as m, but that stops
// after the first match is found.
//
// See the documentation of function Single for more details.
func SingleMatcher(m Matcher) Matcher {
if _, ok := m.(singleMatcher); ok {
// m is already a singleMatcher
return m
}
return singleMatcher{m}
}
// compileMatcher compiles the selector string s and returns
// the corresponding Matcher. If s is an invalid selector string,
// it returns a Matcher that fails all matches.
@ -133,6 +171,30 @@ func compileMatcher(s string) Matcher {
return cs
}
type singleMatcher struct {
Matcher
}
func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
// Optimized version - stops finding at the first match (cascadia-compiled
// matchers all use this code path).
if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
node := mm.MatchFirst(n)
if node == nil {
return nil
}
return []*html.Node{node}
}
// Fallback version, for e.g. test mocks that don't provide the MatchFirst
// method.
nodes := m.Matcher.MatchAll(n)
if len(nodes) > 0 {
return nodes[:1:1]
}
return nil
}
// invalidMatcher is a Matcher that always fails to match.
type invalidMatcher struct{}

View file

@ -2,6 +2,7 @@ package goquery
import (
"bytes"
"io"
"golang.org/x/net/html"
)
@ -50,13 +51,24 @@ func nodeName(node *html.Node) string {
case html.ElementNode, html.DoctypeNode:
return node.Data
default:
if node.Type >= 0 && int(node.Type) < len(nodeNames) {
if int(node.Type) < len(nodeNames) {
return nodeNames[node.Type]
}
return ""
}
}
// Render renders the html of the first element from selector and writes it to
// the writer. It behaves the same as OuterHtml but writes to w instead of
// returning the string.
func Render(w io.Writer, s *Selection) error {
if s.Length() == 0 {
return nil
}
n := s.Get(0)
return html.Render(w, n)
}
// OuterHtml returns the outer HTML rendering of the first item in
// the selection - that is, the HTML including the first element's
// tag and attributes.
@ -66,12 +78,7 @@ func nodeName(node *html.Node) string {
// a property provided by the DOM).
func OuterHtml(s *Selection) (string, error) {
var buf bytes.Buffer
if s.Length() == 0 {
return "", nil
}
n := s.Get(0)
if err := html.Render(&buf, n); err != nil {
if err := Render(&buf, s); err != nil {
return "", err
}
return buf.String(), nil

View file

@ -1,5 +1,5 @@
module github.com/andybalholm/cascadia
require golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01
go 1.16
go 1.13
require golang.org/x/net v0.0.0-20210916014120-12bc252f5db8

7
vendor/github.com/andybalholm/cascadia/go.sum generated vendored Normal file
View file

@ -0,0 +1,7 @@
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

View file

@ -36,7 +36,7 @@ func (p *parser) parseEscape() (result string, err error) {
for i = start; i < start+6 && i < len(p.s) && hexDigit(p.s[i]); i++ {
// empty
}
v, _ := strconv.ParseUint(p.s[start:i], 16, 21)
v, _ := strconv.ParseUint(p.s[start:i], 16, 64)
if len(p.s) > i {
switch p.s[i] {
case '\r':
@ -409,6 +409,19 @@ func (p *parser) parseAttributeSelector() (attrSelector, error) {
if p.i >= len(p.s) {
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
// check if the attribute contains an ignore case flag
ignoreCase := false
if p.s[p.i] == 'i' || p.s[p.i] == 'I' {
ignoreCase = true
p.i++
}
p.skipWhitespace()
if p.i >= len(p.s) {
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
if p.s[p.i] != ']' {
return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
}
@ -416,15 +429,17 @@ func (p *parser) parseAttributeSelector() (attrSelector, error) {
switch op {
case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=":
return attrSelector{key: key, val: val, operation: op, regexp: rx}, nil
return attrSelector{key: key, val: val, operation: op, regexp: rx, insensitive: ignoreCase}, nil
default:
return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op)
}
}
var errExpectedParenthesis = errors.New("expected '(' but didn't find it")
var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
var errUnmatchedParenthesis = errors.New("unmatched '('")
var (
errExpectedParenthesis = errors.New("expected '(' but didn't find it")
errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
errUnmatchedParenthesis = errors.New("unmatched '('")
)
// parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element
// For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements.
@ -552,6 +567,37 @@ func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err
out = emptyElementPseudoClassSelector{}
case "root":
out = rootPseudoClassSelector{}
case "link":
out = linkPseudoClassSelector{}
case "lang":
if !p.consumeParenthesis() {
return out, "", errExpectedParenthesis
}
if p.i == len(p.s) {
return out, "", errUnmatchedParenthesis
}
val, err := p.parseIdentifier()
if err != nil {
return out, "", err
}
val = strings.ToLower(val)
p.skipWhitespace()
if p.i >= len(p.s) {
return out, "", errors.New("unexpected EOF in pseudo selector")
}
if !p.consumeClosingParenthesis() {
return out, "", errExpectedClosingParenthesis
}
out = langPseudoClassSelector{lang: val}
case "enabled":
out = enabledPseudoClassSelector{}
case "disabled":
out = disabledPseudoClassSelector{}
case "checked":
out = checkedPseudoClassSelector{}
case "visited", "hover", "active", "focus", "target":
// Not applicable in a static context: never match.
out = neverMatchSelector{value: ":" + name}
case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error":
return nil, name, nil
default:
@ -714,6 +760,9 @@ func (p *parser) parseSimpleSelectorSequence() (Sel, error) {
case '*':
// It's the universal selector. Just skip over it, since it doesn't affect the meaning.
p.i++
if p.i+2 < len(p.s) && p.s[p.i:p.i+2] == "|*" { // other version of universal selector
p.i += 2
}
case '#', '.', '[', ':':
// There's no type selector. Wait to process the other till the main loop.
default:

View file

@ -0,0 +1,474 @@
package cascadia
import (
"bytes"
"fmt"
"regexp"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// This file implements the pseudo classes selectors,
// which share the implementation of PseudoElement() and Specificity()
type abstractPseudoClass struct{}
func (s abstractPseudoClass) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c abstractPseudoClass) PseudoElement() string {
return ""
}
type relativePseudoClassSelector struct {
name string // one of "not", "has", "haschild"
match SelectorGroup
}
func (s relativePseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
switch s.name {
case "not":
// matches elements that do not match a.
return !s.match.Match(n)
case "has":
// matches elements with any descendant that matches a.
return hasDescendantMatch(n, s.match)
case "haschild":
// matches elements with a child that matches a.
return hasChildMatch(n, s.match)
default:
panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name))
}
}
// hasChildMatch returns whether n has any child that matches a.
func hasChildMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) {
return true
}
}
return false
}
// hasDescendantMatch performs a depth-first search of n's descendants,
// testing whether any of them match a. It returns true as soon as a match is
// found, or false if no match is found.
func hasDescendantMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
return true
}
}
return false
}
// Specificity returns the specificity of the most specific selectors
// in the pseudo-class arguments.
// See https://www.w3.org/TR/selectors/#specificity-rules
func (s relativePseudoClassSelector) Specificity() Specificity {
var max Specificity
for _, sel := range s.match {
newSpe := sel.Specificity()
if max.Less(newSpe) {
max = newSpe
}
}
return max
}
func (c relativePseudoClassSelector) PseudoElement() string {
return ""
}
type containsPseudoClassSelector struct {
abstractPseudoClass
value string
own bool
}
func (s containsPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes that directly contain the given text
text = strings.ToLower(nodeOwnText(n))
} else {
// matches nodes that contain the given text.
text = strings.ToLower(nodeText(n))
}
return strings.Contains(text, s.value)
}
type regexpPseudoClassSelector struct {
abstractPseudoClass
regexp *regexp.Regexp
own bool
}
func (s regexpPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes whose text directly matches the specified regular expression
text = nodeOwnText(n)
} else {
// matches nodes whose text matches the specified regular expression
text = nodeText(n)
}
return s.regexp.MatchString(text)
}
// writeNodeText writes the text contained in n and its descendants to b.
func writeNodeText(n *html.Node, b *bytes.Buffer) {
switch n.Type {
case html.TextNode:
b.WriteString(n.Data)
case html.ElementNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
writeNodeText(c, b)
}
}
}
// nodeText returns the text contained in n and its descendants.
func nodeText(n *html.Node) string {
var b bytes.Buffer
writeNodeText(n, &b)
return b.String()
}
// nodeOwnText returns the contents of the text nodes that are direct
// children of n.
func nodeOwnText(n *html.Node) string {
var b bytes.Buffer
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.TextNode {
b.WriteString(c.Data)
}
}
return b.String()
}
type nthPseudoClassSelector struct {
abstractPseudoClass
a, b int
last, ofType bool
}
func (s nthPseudoClassSelector) Match(n *html.Node) bool {
if s.a == 0 {
if s.last {
return simpleNthLastChildMatch(s.b, s.ofType, n)
} else {
return simpleNthChildMatch(s.b, s.ofType, n)
}
}
return nthChildMatch(s.a, s.b, s.last, s.ofType, n)
}
// nthChildMatch implements :nth-child(an+b).
// If last is true, implements :nth-last-child instead.
// If ofType is true, implements :nth-of-type instead.
func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
i := -1
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
i = count
if !last {
break
}
}
}
if i == -1 {
// This shouldn't happen, since n should always be one of its parent's children.
return false
}
if last {
i = count - i + 1
}
i -= b
if a == 0 {
return i == 0
}
return i%a == 0 && i/a >= 0
}
// simpleNthChildMatch implements :nth-child(b).
// If ofType is true, implements :nth-of-type instead.
func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
// simpleNthLastChildMatch implements :nth-last-child(b).
// If ofType is true, implements :nth-last-of-type instead.
func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.LastChild; c != nil; c = c.PrevSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
type onlyChildPseudoClassSelector struct {
abstractPseudoClass
ofType bool
}
// Match implements :only-child.
// If `ofType` is true, it implements :only-of-type instead.
func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) {
continue
}
count++
if count > 1 {
return false
}
}
return count == 1
}
type inputPseudoClassSelector struct {
abstractPseudoClass
}
// Matches input, select, textarea and button elements.
func (s inputPseudoClassSelector) Match(n *html.Node) bool {
return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button")
}
type emptyElementPseudoClassSelector struct {
abstractPseudoClass
}
// Matches empty elements.
func (s emptyElementPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
switch c.Type {
case html.ElementNode:
return false
case html.TextNode:
if strings.TrimSpace(nodeText(c)) == "" {
continue
} else {
return false
}
}
}
return true
}
type rootPseudoClassSelector struct {
abstractPseudoClass
}
// Match implements :root
func (s rootPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
if n.Parent == nil {
return false
}
return n.Parent.Type == html.DocumentNode
}
func hasAttr(n *html.Node, attr string) bool {
return matchAttribute(n, attr, func(string) bool { return true })
}
type linkPseudoClassSelector struct {
abstractPseudoClass
}
// Match implements :link
func (s linkPseudoClassSelector) Match(n *html.Node) bool {
return (n.DataAtom == atom.A || n.DataAtom == atom.Area || n.DataAtom == atom.Link) && hasAttr(n, "href")
}
type langPseudoClassSelector struct {
abstractPseudoClass
lang string
}
func (s langPseudoClassSelector) Match(n *html.Node) bool {
own := matchAttribute(n, "lang", func(val string) bool {
return val == s.lang || strings.HasPrefix(val, s.lang+"-")
})
if n.Parent == nil {
return own
}
return own || s.Match(n.Parent)
}
type enabledPseudoClassSelector struct {
abstractPseudoClass
}
func (s enabledPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
switch n.DataAtom {
case atom.A, atom.Area, atom.Link:
return hasAttr(n, "href")
case atom.Optgroup, atom.Menuitem, atom.Fieldset:
return !hasAttr(n, "disabled")
case atom.Button, atom.Input, atom.Select, atom.Textarea, atom.Option:
return !hasAttr(n, "disabled") && !inDisabledFieldset(n)
}
return false
}
type disabledPseudoClassSelector struct {
abstractPseudoClass
}
func (s disabledPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
switch n.DataAtom {
case atom.Optgroup, atom.Menuitem, atom.Fieldset:
return hasAttr(n, "disabled")
case atom.Button, atom.Input, atom.Select, atom.Textarea, atom.Option:
return hasAttr(n, "disabled") || inDisabledFieldset(n)
}
return false
}
func hasLegendInPreviousSiblings(n *html.Node) bool {
for s := n.PrevSibling; s != nil; s = s.PrevSibling {
if s.DataAtom == atom.Legend {
return true
}
}
return false
}
func inDisabledFieldset(n *html.Node) bool {
if n.Parent == nil {
return false
}
if n.Parent.DataAtom == atom.Fieldset && hasAttr(n.Parent, "disabled") &&
(n.DataAtom != atom.Legend || hasLegendInPreviousSiblings(n)) {
return true
}
return inDisabledFieldset(n.Parent)
}
type checkedPseudoClassSelector struct {
abstractPseudoClass
}
func (s checkedPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
switch n.DataAtom {
case atom.Input, atom.Menuitem:
return hasAttr(n, "checked") && matchAttribute(n, "type", func(val string) bool {
t := toLowerASCII(val)
return t == "checkbox" || t == "radio"
})
case atom.Option:
return hasAttr(n, "selected")
}
return false
}

View file

@ -1,7 +1,6 @@
package cascadia
import (
"bytes"
"fmt"
"regexp"
"strings"
@ -232,7 +231,7 @@ type classSelector struct {
// Matches elements by class attribute.
func (t classSelector) Match(n *html.Node) bool {
return matchAttribute(n, "class", func(s string) bool {
return matchInclude(t.class, s)
return matchInclude(t.class, s, false)
})
}
@ -266,6 +265,7 @@ func (c idSelector) PseudoElement() string {
type attrSelector struct {
key, val, operation string
regexp *regexp.Regexp
insensitive bool
}
// Matches elements by attribute value.
@ -274,20 +274,20 @@ func (t attrSelector) Match(n *html.Node) bool {
case "":
return matchAttribute(n, t.key, func(string) bool { return true })
case "=":
return matchAttribute(n, t.key, func(s string) bool { return s == t.val })
return matchAttribute(n, t.key, func(s string) bool { return matchInsensitiveValue(s, t.val, t.insensitive) })
case "!=":
return attributeNotEqualMatch(t.key, t.val, n)
return attributeNotEqualMatch(t.key, t.val, n, t.insensitive)
case "~=":
// matches elements where the attribute named key is a whitespace-separated list that includes val.
return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s) })
return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s, t.insensitive) })
case "|=":
return attributeDashMatch(t.key, t.val, n)
return attributeDashMatch(t.key, t.val, n, t.insensitive)
case "^=":
return attributePrefixMatch(t.key, t.val, n)
return attributePrefixMatch(t.key, t.val, n, t.insensitive)
case "$=":
return attributeSuffixMatch(t.key, t.val, n)
return attributeSuffixMatch(t.key, t.val, n, t.insensitive)
case "*=":
return attributeSubstringMatch(t.key, t.val, n)
return attributeSubstringMatch(t.key, t.val, n, t.insensitive)
case "#=":
return attributeRegexMatch(t.key, t.regexp, n)
default:
@ -295,6 +295,17 @@ func (t attrSelector) Match(n *html.Node) bool {
}
}
// matches elements where we ignore (or not) the case of the attribute value
// the user attribute is the value set by the user to match elements
// the real attribute is the attribute value found in the code parsed
func matchInsensitiveValue(userAttr string, realAttr string, ignoreCase bool) bool {
if ignoreCase {
return strings.EqualFold(userAttr, realAttr)
}
return userAttr == realAttr
}
// matches elements where the attribute named key satisifes the function f.
func matchAttribute(n *html.Node, key string, f func(string) bool) bool {
if n.Type != html.ElementNode {
@ -310,12 +321,12 @@ func matchAttribute(n *html.Node, key string, f func(string) bool) bool {
// attributeNotEqualMatch matches elements where
// the attribute named key does not have the value val.
func attributeNotEqualMatch(key, val string, n *html.Node) bool {
func attributeNotEqualMatch(key, val string, n *html.Node, ignoreCase bool) bool {
if n.Type != html.ElementNode {
return false
}
for _, a := range n.Attr {
if a.Key == key && a.Val == val {
if a.Key == key && matchInsensitiveValue(a.Val, val, ignoreCase) {
return false
}
}
@ -323,13 +334,13 @@ func attributeNotEqualMatch(key, val string, n *html.Node) bool {
}
// returns true if s is a whitespace-separated list that includes val.
func matchInclude(val, s string) bool {
func matchInclude(val string, s string, ignoreCase bool) bool {
for s != "" {
i := strings.IndexAny(s, " \t\r\n\f")
if i == -1 {
return s == val
return matchInsensitiveValue(s, val, ignoreCase)
}
if s[:i] == val {
if matchInsensitiveValue(s[:i], val, ignoreCase) {
return true
}
s = s[i+1:]
@ -338,16 +349,16 @@ func matchInclude(val, s string) bool {
}
// matches elements where the attribute named key equals val or starts with val plus a hyphen.
func attributeDashMatch(key, val string, n *html.Node) bool {
func attributeDashMatch(key, val string, n *html.Node, ignoreCase bool) bool {
return matchAttribute(n, key,
func(s string) bool {
if s == val {
if matchInsensitiveValue(s, val, ignoreCase) {
return true
}
if len(s) <= len(val) {
return false
}
if s[:len(val)] == val && s[len(val)] == '-' {
if matchInsensitiveValue(s[:len(val)], val, ignoreCase) && s[len(val)] == '-' {
return true
}
return false
@ -356,36 +367,45 @@ func attributeDashMatch(key, val string, n *html.Node) bool {
// attributePrefixMatch returns a Selector that matches elements where
// the attribute named key starts with val.
func attributePrefixMatch(key, val string, n *html.Node) bool {
func attributePrefixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
return matchAttribute(n, key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
if ignoreCase {
return strings.HasPrefix(strings.ToLower(s), strings.ToLower(val))
}
return strings.HasPrefix(s, val)
})
}
// attributeSuffixMatch matches elements where
// the attribute named key ends with val.
func attributeSuffixMatch(key, val string, n *html.Node) bool {
func attributeSuffixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
return matchAttribute(n, key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
if ignoreCase {
return strings.HasSuffix(strings.ToLower(s), strings.ToLower(val))
}
return strings.HasSuffix(s, val)
})
}
// attributeSubstringMatch matches nodes where
// the attribute named key contains val.
func attributeSubstringMatch(key, val string, n *html.Node) bool {
func attributeSubstringMatch(key, val string, n *html.Node, ignoreCase bool) bool {
return matchAttribute(n, key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
if ignoreCase {
return strings.Contains(strings.ToLower(s), strings.ToLower(val))
}
return strings.Contains(s, val)
})
}
@ -407,394 +427,22 @@ func (c attrSelector) PseudoElement() string {
return ""
}
// ---------------- Pseudo class selectors ----------------
// we use severals concrete types of pseudo-class selectors
// see pseudo_classes.go for pseudo classes selectors
type relativePseudoClassSelector struct {
name string // one of "not", "has", "haschild"
match SelectorGroup
}
func (s relativePseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
switch s.name {
case "not":
// matches elements that do not match a.
return !s.match.Match(n)
case "has":
// matches elements with any descendant that matches a.
return hasDescendantMatch(n, s.match)
case "haschild":
// matches elements with a child that matches a.
return hasChildMatch(n, s.match)
default:
panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name))
}
}
// hasChildMatch returns whether n has any child that matches a.
func hasChildMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) {
return true
}
}
return false
}
// hasDescendantMatch performs a depth-first search of n's descendants,
// testing whether any of them match a. It returns true as soon as a match is
// found, or false if no match is found.
func hasDescendantMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
return true
}
}
return false
}
// Specificity returns the specificity of the most specific selectors
// in the pseudo-class arguments.
// See https://www.w3.org/TR/selectors/#specificity-rules
func (s relativePseudoClassSelector) Specificity() Specificity {
var max Specificity
for _, sel := range s.match {
newSpe := sel.Specificity()
if max.Less(newSpe) {
max = newSpe
}
}
return max
}
func (c relativePseudoClassSelector) PseudoElement() string {
return ""
}
type containsPseudoClassSelector struct {
own bool
// on a static context, some selectors can't match anything
type neverMatchSelector struct {
value string
}
func (s containsPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes that directly contain the given text
text = strings.ToLower(nodeOwnText(n))
} else {
// matches nodes that contain the given text.
text = strings.ToLower(nodeText(n))
}
return strings.Contains(text, s.value)
}
func (s containsPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c containsPseudoClassSelector) PseudoElement() string {
return ""
}
type regexpPseudoClassSelector struct {
own bool
regexp *regexp.Regexp
}
func (s regexpPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes whose text directly matches the specified regular expression
text = nodeOwnText(n)
} else {
// matches nodes whose text matches the specified regular expression
text = nodeText(n)
}
return s.regexp.MatchString(text)
}
// writeNodeText writes the text contained in n and its descendants to b.
func writeNodeText(n *html.Node, b *bytes.Buffer) {
switch n.Type {
case html.TextNode:
b.WriteString(n.Data)
case html.ElementNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
writeNodeText(c, b)
}
}
}
// nodeText returns the text contained in n and its descendants.
func nodeText(n *html.Node) string {
var b bytes.Buffer
writeNodeText(n, &b)
return b.String()
}
// nodeOwnText returns the contents of the text nodes that are direct
// children of n.
func nodeOwnText(n *html.Node) string {
var b bytes.Buffer
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.TextNode {
b.WriteString(c.Data)
}
}
return b.String()
}
func (s regexpPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c regexpPseudoClassSelector) PseudoElement() string {
return ""
}
type nthPseudoClassSelector struct {
a, b int
last, ofType bool
}
func (s nthPseudoClassSelector) Match(n *html.Node) bool {
if s.a == 0 {
if s.last {
return simpleNthLastChildMatch(s.b, s.ofType, n)
} else {
return simpleNthChildMatch(s.b, s.ofType, n)
}
}
return nthChildMatch(s.a, s.b, s.last, s.ofType, n)
}
// nthChildMatch implements :nth-child(an+b).
// If last is true, implements :nth-last-child instead.
// If ofType is true, implements :nth-of-type instead.
func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
func (s neverMatchSelector) Match(n *html.Node) bool {
return false
}
parent := n.Parent
if parent == nil {
return false
func (s neverMatchSelector) Specificity() Specificity {
return Specificity{0, 0, 0}
}
if parent.Type == html.DocumentNode {
return false
}
i := -1
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
i = count
if !last {
break
}
}
}
if i == -1 {
// This shouldn't happen, since n should always be one of its parent's children.
return false
}
if last {
i = count - i + 1
}
i -= b
if a == 0 {
return i == 0
}
return i%a == 0 && i/a >= 0
}
// simpleNthChildMatch implements :nth-child(b).
// If ofType is true, implements :nth-of-type instead.
func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
// simpleNthLastChildMatch implements :nth-last-child(b).
// If ofType is true, implements :nth-last-of-type instead.
func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.LastChild; c != nil; c = c.PrevSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
// Specificity for nth-child pseudo-class.
// Does not support a list of selectors
func (s nthPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c nthPseudoClassSelector) PseudoElement() string {
return ""
}
type onlyChildPseudoClassSelector struct {
ofType bool
}
// Match implements :only-child.
// If `ofType` is true, it implements :only-of-type instead.
func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) {
continue
}
count++
if count > 1 {
return false
}
}
return count == 1
}
func (s onlyChildPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c onlyChildPseudoClassSelector) PseudoElement() string {
return ""
}
type inputPseudoClassSelector struct{}
// Matches input, select, textarea and button elements.
func (s inputPseudoClassSelector) Match(n *html.Node) bool {
return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button")
}
func (s inputPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c inputPseudoClassSelector) PseudoElement() string {
return ""
}
type emptyElementPseudoClassSelector struct{}
// Matches empty elements.
func (s emptyElementPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
switch c.Type {
case html.ElementNode, html.TextNode:
return false
}
}
return true
}
func (s emptyElementPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c emptyElementPseudoClassSelector) PseudoElement() string {
return ""
}
type rootPseudoClassSelector struct{}
// Match implements :root
func (s rootPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
if n.Parent == nil {
return false
}
return n.Parent.Type == html.DocumentNode
}
func (s rootPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c rootPseudoClassSelector) PseudoElement() string {
func (c neverMatchSelector) PseudoElement() string {
return ""
}

View file

@ -2,21 +2,35 @@ package cascadia
import (
"fmt"
"strconv"
"strings"
)
// implements the reverse operation Sel -> string
var specialCharReplacer *strings.Replacer
func init() {
var pairs []string
for _, s := range ",!\"#$%&'()*+ -./:;<=>?@[\\]^`{|}~" {
pairs = append(pairs, string(s), "\\"+string(s))
}
specialCharReplacer = strings.NewReplacer(pairs...)
}
// espace special CSS char
func escape(s string) string { return specialCharReplacer.Replace(s) }
func (c tagSelector) String() string {
return c.tag
}
func (c idSelector) String() string {
return "#" + c.id
return "#" + escape(c.id)
}
func (c classSelector) String() string {
return "." + c.class
return "." + escape(c.class)
}
func (c attrSelector) String() string {
@ -26,12 +40,20 @@ func (c attrSelector) String() string {
} else if c.operation != "" {
val = fmt.Sprintf(`"%s"`, val)
}
return fmt.Sprintf(`[%s%s%s]`, c.key, c.operation, val)
ignoreCase := ""
if c.insensitive {
ignoreCase = " i"
}
return fmt.Sprintf(`[%s%s%s%s]`, c.key, c.operation, val, ignoreCase)
}
func (c relativePseudoClassSelector) String() string {
return fmt.Sprintf(":%s(%s)", c.name, c.match.String())
}
func (c containsPseudoClassSelector) String() string {
s := "contains"
if c.own {
@ -39,6 +61,7 @@ func (c containsPseudoClassSelector) String() string {
}
return fmt.Sprintf(`:%s("%s")`, s, c.value)
}
func (c regexpPseudoClassSelector) String() string {
s := "matches"
if c.own {
@ -46,6 +69,7 @@ func (c regexpPseudoClassSelector) String() string {
}
return fmt.Sprintf(":%s(%s)", s, c.regexp.String())
}
func (c nthPseudoClassSelector) String() string {
if c.a == 0 && c.b == 1 { // special cases
s := ":first-"
@ -70,24 +94,56 @@ func (c nthPseudoClassSelector) String() string {
case [2]bool{false, false}:
name = "nth-child"
}
return fmt.Sprintf(":%s(%dn+%d)", name, c.a, c.b)
s := fmt.Sprintf("+%d", c.b)
if c.b < 0 { // avoid +-8 invalid syntax
s = strconv.Itoa(c.b)
}
return fmt.Sprintf(":%s(%dn%s)", name, c.a, s)
}
func (c onlyChildPseudoClassSelector) String() string {
if c.ofType {
return ":only-of-type"
}
return ":only-child"
}
func (c inputPseudoClassSelector) String() string {
return ":input"
}
func (c emptyElementPseudoClassSelector) String() string {
return ":empty"
}
func (c rootPseudoClassSelector) String() string {
return ":root"
}
func (c linkPseudoClassSelector) String() string {
return ":link"
}
func (c langPseudoClassSelector) String() string {
return fmt.Sprintf(":lang(%s)", c.lang)
}
func (c neverMatchSelector) String() string {
return c.value
}
func (c enabledPseudoClassSelector) String() string {
return ":enabled"
}
func (c disabledPseudoClassSelector) String() string {
return ":disabled"
}
func (c checkedPseudoClassSelector) String() string {
return ":checked"
}
func (c compoundSelector) String() string {
if len(c.selectors) == 0 && c.pseudoElement == "" {
return "*"

View file

@ -6,7 +6,6 @@
### Converts HTML into text of the markdown-flavored variety
## Introduction
Ensure your emails are readable by all!
@ -19,7 +18,6 @@ There are still lots of improvements to be had, but FWIW this has worked fine fo
It requires go 1.x or newer ;)
## Download the package
```bash
@ -28,6 +26,8 @@ go get jaytaylor.com/html2text
## Example usage
### Library
```go
package main
@ -110,6 +110,11 @@ Here is some more information:
+-------------+-------------+
```
### Command line
```
echo '<div>hi</div>' | html2text
```
## Unit-tests
@ -119,12 +124,10 @@ Running the unit-tests is straightforward and standard:
go test
```
# License
Permissive MIT license.
## Contact
You are more than welcome to open issues and send pull requests if you find a bug or want a new feature.

View file

@ -18,6 +18,7 @@ type Options struct {
PrettyTables bool // Turns on pretty ASCII rendering for table elements.
PrettyTablesOptions *PrettyTablesOptions // Configures pretty ASCII rendering for table elements.
OmitLinks bool // Turns on omitting links
TextOnly bool // Returns only plain text
}
// PrettyTablesOptions overrides tablewriter behaviors
@ -157,6 +158,9 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
}
str := subCtx.buf.String()
if ctx.options.TextOnly {
return ctx.emit(str + ".\n\n")
}
dividerLen := 0
for _, line := range strings.Split(str, "\n") {
if lineLen := len([]rune(line)); lineLen-1 > dividerLen {
@ -177,7 +181,9 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
case atom.Blockquote:
ctx.blockquoteLevel++
if !ctx.options.TextOnly {
ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel) + " "
}
if err := ctx.emit("\n"); err != nil {
return err
}
@ -190,7 +196,9 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
return err
}
ctx.blockquoteLevel--
if !ctx.options.TextOnly {
ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel)
}
if ctx.blockquoteLevel > 0 {
ctx.prefix += " "
}
@ -213,9 +221,11 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
return err
case atom.Li:
if !ctx.options.TextOnly {
if err := ctx.emit("* "); err != nil {
return err
}
}
if err := ctx.traverseChildren(node); err != nil {
return err
@ -230,6 +240,9 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
return err
}
str := subCtx.buf.String()
if ctx.options.TextOnly {
return ctx.emit(str + ".")
}
return ctx.emit("*" + str + "*")
case atom.A:
@ -254,7 +267,7 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
if attrVal := getAttrVal(node, "href"); attrVal != "" {
attrVal = ctx.normalizeHrefLink(attrVal)
// Don't print link href if it matches link element content or if the link is empty.
if !ctx.options.OmitLinks && attrVal != "" && linkText != attrVal {
if (!ctx.options.OmitLinks && attrVal != "" && linkText != attrVal) || !ctx.options.TextOnly {
hrefLink = "( " + attrVal + " )"
}
}

View file

@ -8,8 +8,6 @@
A high-performance 100% compatible drop-in replacement of "encoding/json"
You can also use thrift like JSON using [thrift-iterator](https://github.com/thrift-iterator/go)
# Benchmark
![benchmark](http://jsoniter.com/benchmarks/go-benchmark.png)

View file

@ -6,6 +6,6 @@ require (
github.com/davecgh/go-spew v1.1.1
github.com/google/gofuzz v1.0.0
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742
github.com/modern-go/reflect2 v1.0.2
github.com/stretchr/testify v1.3.0
)

View file

@ -5,8 +5,8 @@ github.com/google/gofuzz v1.0.0 h1:A8PeW59pxE9IoFRqBp37U+mSNaQoZ46F1f0f863XSXw=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=

View file

@ -288,6 +288,9 @@ non_decimal_loop:
return iter.readFloat64SlowPath()
}
value = (value << 3) + (value << 1) + uint64(ind)
if value > maxFloat64 {
return iter.readFloat64SlowPath()
}
}
}
return iter.readFloat64SlowPath()

View file

@ -9,6 +9,7 @@ var intDigits []int8
const uint32SafeToMultiply10 = uint32(0xffffffff)/10 - 1
const uint64SafeToMultiple10 = uint64(0xffffffffffffffff)/10 - 1
const maxFloat64 = 1<<53 - 1
func init() {
intDigits = make([]int8, 256)
@ -339,7 +340,7 @@ func (iter *Iterator) readUint64(c byte) (ret uint64) {
}
func (iter *Iterator) assertInteger() {
if iter.head < len(iter.buf) && iter.buf[iter.head] == '.' {
if iter.head < iter.tail && iter.buf[iter.head] == '.' {
iter.ReportError("assertInteger", "can not decode float as int")
}
}

View file

@ -65,7 +65,7 @@ func (iter *Iterator) ReadVal(obj interface{}) {
decoder := iter.cfg.getDecoderFromCache(cacheKey)
if decoder == nil {
typ := reflect2.TypeOf(obj)
if typ.Kind() != reflect.Ptr {
if typ == nil || typ.Kind() != reflect.Ptr {
iter.ReportError("ReadVal", "can only unmarshal into pointer")
return
}

View file

@ -33,12 +33,20 @@ type jsonRawMessageCodec struct {
}
func (codec *jsonRawMessageCodec) Decode(ptr unsafe.Pointer, iter *Iterator) {
*((*json.RawMessage)(ptr)) = json.RawMessage(iter.SkipAndReturnBytes())
if iter.ReadNil() {
*((*json.RawMessage)(ptr)) = nil
} else {
*((*json.RawMessage)(ptr)) = iter.SkipAndReturnBytes()
}
}
func (codec *jsonRawMessageCodec) Encode(ptr unsafe.Pointer, stream *Stream) {
if *((*json.RawMessage)(ptr)) == nil {
stream.WriteNil()
} else {
stream.WriteRaw(string(*((*json.RawMessage)(ptr))))
}
}
func (codec *jsonRawMessageCodec) IsEmpty(ptr unsafe.Pointer) bool {
return len(*((*json.RawMessage)(ptr))) == 0
@ -48,12 +56,20 @@ type jsoniterRawMessageCodec struct {
}
func (codec *jsoniterRawMessageCodec) Decode(ptr unsafe.Pointer, iter *Iterator) {
*((*RawMessage)(ptr)) = RawMessage(iter.SkipAndReturnBytes())
if iter.ReadNil() {
*((*RawMessage)(ptr)) = nil
} else {
*((*RawMessage)(ptr)) = iter.SkipAndReturnBytes()
}
}
func (codec *jsoniterRawMessageCodec) Encode(ptr unsafe.Pointer, stream *Stream) {
if *((*RawMessage)(ptr)) == nil {
stream.WriteNil()
} else {
stream.WriteRaw(string(*((*RawMessage)(ptr))))
}
}
func (codec *jsoniterRawMessageCodec) IsEmpty(ptr unsafe.Pointer) bool {
return len(*((*RawMessage)(ptr))) == 0

View file

@ -1075,6 +1075,11 @@ type stringModeNumberDecoder struct {
}
func (decoder *stringModeNumberDecoder) Decode(ptr unsafe.Pointer, iter *Iterator) {
if iter.WhatIsNext() == NilValue {
decoder.elemDecoder.Decode(ptr, iter)
return
}
c := iter.nextToken()
if c != '"' {
iter.ReportError("stringModeNumberDecoder", `expect ", but found `+string([]byte{c}))

View file

@ -1,3 +1,5 @@
module github.com/mattn/go-runewidth
go 1.9
require github.com/rivo/uniseg v0.2.0

2
vendor/github.com/mattn/go-runewidth/go.sum generated vendored Normal file
View file

@ -0,0 +1,2 @@
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=

View file

@ -2,6 +2,8 @@ package runewidth
import (
"os"
"github.com/rivo/uniseg"
)
//go:generate go run script/generate.go
@ -10,11 +12,14 @@ var (
// EastAsianWidth will be set true if the current locale is CJK
EastAsianWidth bool
// ZeroWidthJoiner is flag to set to use UTR#51 ZWJ
ZeroWidthJoiner bool
// StrictEmojiNeutral should be set false if handle broken fonts
StrictEmojiNeutral bool = true
// DefaultCondition is a condition in current locale
DefaultCondition = &Condition{}
DefaultCondition = &Condition{
EastAsianWidth: false,
StrictEmojiNeutral: true,
}
)
func init() {
@ -30,7 +35,6 @@ func handleEnv() {
}
// update DefaultCondition
DefaultCondition.EastAsianWidth = EastAsianWidth
DefaultCondition.ZeroWidthJoiner = ZeroWidthJoiner
}
type interval struct {
@ -86,62 +90,68 @@ var nonprint = table{
// Condition have flag EastAsianWidth whether the current locale is CJK or not.
type Condition struct {
EastAsianWidth bool
ZeroWidthJoiner bool
StrictEmojiNeutral bool
}
// NewCondition return new instance of Condition which is current locale.
func NewCondition() *Condition {
return &Condition{
EastAsianWidth: EastAsianWidth,
ZeroWidthJoiner: ZeroWidthJoiner,
StrictEmojiNeutral: StrictEmojiNeutral,
}
}
// RuneWidth returns the number of cells in r.
// See http://www.unicode.org/reports/tr11/
func (c *Condition) RuneWidth(r rune) int {
// optimized version, verified by TestRuneWidthChecksums()
if !c.EastAsianWidth {
switch {
case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining, notassigned):
case r < 0x20 || r > 0x10FFFF:
return 0
case (c.EastAsianWidth && IsAmbiguousWidth(r)) || inTables(r, doublewidth):
case (r >= 0x7F && r <= 0x9F) || r == 0xAD: // nonprint
return 0
case r < 0x300:
return 1
case inTable(r, narrow):
return 1
case inTables(r, nonprint, combining):
return 0
case inTable(r, doublewidth):
return 2
default:
return 1
}
} else {
switch {
case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining):
return 0
case inTable(r, narrow):
return 1
case inTables(r, ambiguous, doublewidth):
return 2
case !c.StrictEmojiNeutral && inTables(r, ambiguous, emoji, narrow):
return 2
default:
return 1
}
}
func (c *Condition) stringWidth(s string) (width int) {
for _, r := range []rune(s) {
width += c.RuneWidth(r)
}
return width
}
func (c *Condition) stringWidthZeroJoiner(s string) (width int) {
r1, r2 := rune(0), rune(0)
for _, r := range []rune(s) {
if r == 0xFE0E || r == 0xFE0F {
continue
}
w := c.RuneWidth(r)
if r2 == 0x200D && inTables(r, emoji) && inTables(r1, emoji) {
if width < w {
width = w
}
} else {
width += w
}
r1, r2 = r2, r
}
return width
}
// StringWidth return width as you can see
func (c *Condition) StringWidth(s string) (width int) {
if c.ZeroWidthJoiner {
return c.stringWidthZeroJoiner(s)
g := uniseg.NewGraphemes(s)
for g.Next() {
var chWidth int
for _, r := range g.Runes() {
chWidth = c.RuneWidth(r)
if chWidth > 0 {
break // Our best guess at this point is to use the width of the first non-zero-width rune.
}
return c.stringWidth(s)
}
width += chWidth
}
return
}
// Truncate return string truncated with w cells
@ -149,19 +159,25 @@ func (c *Condition) Truncate(s string, w int, tail string) string {
if c.StringWidth(s) <= w {
return s
}
r := []rune(s)
tw := c.StringWidth(tail)
w -= tw
width := 0
i := 0
for ; i < len(r); i++ {
cw := c.RuneWidth(r[i])
if width+cw > w {
w -= c.StringWidth(tail)
var width int
pos := len(s)
g := uniseg.NewGraphemes(s)
for g.Next() {
var chWidth int
for _, r := range g.Runes() {
chWidth = c.RuneWidth(r)
if chWidth > 0 {
break // See StringWidth() for details.
}
}
if width+chWidth > w {
pos, _ = g.Positions()
break
}
width += cw
width += chWidth
}
return string(r[0:i]) + tail
return s[:pos] + tail
}
// Wrap return string wrapped with w cells
@ -169,7 +185,7 @@ func (c *Condition) Wrap(s string, w int) string {
width := 0
out := ""
for _, r := range []rune(s) {
cw := RuneWidth(r)
cw := c.RuneWidth(r)
if r == '\n' {
out += string(r)
width = 0

View file

@ -124,8 +124,10 @@ var ambiguous = table{
{0x1F18F, 0x1F190}, {0x1F19B, 0x1F1AC}, {0xE0100, 0xE01EF},
{0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD},
}
var notassigned = table{
{0x27E6, 0x27ED}, {0x2985, 0x2986},
var narrow = table{
{0x0020, 0x007E}, {0x00A2, 0x00A3}, {0x00A5, 0x00A6},
{0x00AC, 0x00AC}, {0x00AF, 0x00AF}, {0x27E6, 0x27ED},
{0x2985, 0x2986},
}
var neutral = table{

View file

@ -43,7 +43,7 @@ const (
)
// Default TLS configuration options
var DefaultConfig tls.Config
var DefaultConfig = &tls.Config{}
// DebugWriter is the writer used to write debugging output to.
var DebugWriter io.Writer = os.Stderr
@ -76,7 +76,7 @@ func containsIgnoreCase(s, substr string) bool {
return strings.Contains(s, substr)
}
func connect(host, user, passwd string) (net.Conn, error) {
func connect(host, user, passwd string, timeout time.Duration) (net.Conn, error) {
addr := host
if strings.TrimSpace(host) == "" {
@ -117,7 +117,7 @@ func connect(host, user, passwd string) (net.Conn, error) {
}
}
c, err := net.Dial("tcp", addr)
c, err := net.DialTimeout("tcp", addr, timeout)
if err != nil {
return nil, err
}
@ -153,6 +153,10 @@ type Options struct {
// Password supplies the password to use for authentication with the remote server.
Password string
// DialTimeout is the time limit for establishing a connection. A
// DialTimeout of zero means no timeout.
DialTimeout time.Duration
// Resource specifies an XMPP client resource, like "bot", instead of accepting one
// from the server. Use "" to let the server generate one for your client.
Resource string
@ -221,7 +225,7 @@ func (o Options) NewClient() (*Client, error) {
}
}
}
c, err := connect(host, o.User, o.Password)
c, err := connect(host, o.User, o.Password, o.DialTimeout)
if err != nil {
return nil, err
}
@ -237,11 +241,11 @@ func (o Options) NewClient() (*Client, error) {
var tlsconn *tls.Conn
if o.TLSConfig != nil {
tlsconn = tls.Client(c, o.TLSConfig)
host = o.TLSConfig.ServerName
} else {
DefaultConfig.ServerName = host
newconfig := DefaultConfig
newconfig := DefaultConfig.Clone()
newconfig.ServerName = host
tlsconn = tls.Client(c, &newconfig)
tlsconn = tls.Client(c, newconfig)
}
if err = tlsconn.Handshake(); err != nil {
return nil, err
@ -335,13 +339,19 @@ func (c *Client) init(o *Options) error {
var domain string
var user string
a := strings.SplitN(o.User, "@", 2)
if len(o.User) > 0 {
// Check if User is not empty. Otherwise, we'll be attempting ANONYMOUS with Host domain.
switch {
case len(o.User) > 0:
if len(a) != 2 {
return errors.New("xmpp: invalid username (want user@domain): " + o.User)
}
user = a[0]
domain = a[1]
} // Otherwise, we'll be attempting ANONYMOUS
case strings.Contains(o.Host, ":"):
domain = strings.SplitN(o.Host, ":", 2)[0]
default:
domain = o.Host
}
// Declare intent to be a jabber client and gather stream features.
f, err := c.startStream(o, domain)
@ -526,8 +536,7 @@ func (c *Client) startTLSIfRequired(f *streamFeatures, o *Options, domain string
tc := o.TLSConfig
if tc == nil {
tc = new(tls.Config)
*tc = DefaultConfig
tc = DefaultConfig.Clone()
//TODO(scott): we should consider using the server's address or reverse lookup
tc.ServerName = domain
}
@ -642,8 +651,25 @@ func (c *Client) Recv() (stanza interface{}, err error) {
case *clientMessage:
if v.Event.XMLNS == XMPPNS_PUBSUB_EVENT {
// Handle Pubsub notifications
switch v.Event.Items.Node {
case XMPPNS_AVATAR_PEP_METADATA:
if len(v.Event.Items.Items) == 0 {
return AvatarMetadata{}, errors.New("No avatar metadata items available")
}
return handleAvatarMetadata(v.Event.Items.Items[0].Body,
v.From)
// I am not sure whether this can even happen.
// XEP-0084 only specifies a subscription to
// the metadata node.
/*case XMPPNS_AVATAR_PEP_DATA:
return handleAvatarData(v.Event.Items.Items[0].Body,
v.From,
v.Event.Items.Items[0].ID)*/
default:
return pubsubClientToReturn(v.Event), nil
}
}
stamp, _ := time.Parse(
"2006-01-02T15:04:05Z",
@ -741,11 +767,50 @@ func (c *Client) Recv() (stanza interface{}, err error) {
return PubsubItems{}, err
}
switch p.Node {
case XMPPNS_AVATAR_PEP_DATA:
if len(p.Items) == 0 {
return AvatarData{}, errors.New("No avatar data items available")
}
return handleAvatarData(p.Items[0].Body,
v.From,
p.Items[0].ID)
case XMPPNS_AVATAR_PEP_METADATA:
if len(p.Items) == 0 {
return AvatarMetadata{}, errors.New("No avatar metadata items available")
}
return handleAvatarMetadata(p.Items[0].Body,
v.From)
default:
return PubsubItems{
p.Node,
pubsubItemsToReturn(p.Items),
}, nil
}
// Note: XEP-0084 states that metadata and data
// should be fetched with an id of retrieve1.
// Since we already have PubSub implemented, we
// can just use items1 and items3 to do the same
// as an Avatar node is just a PEP (PubSub) node.
/*case "retrieve1":
var p clientPubsubItems
err := xml.Unmarshal([]byte(v.Query.InnerXML), &p)
if err != nil {
return PubsubItems{}, err
}
switch p.Node {
case XMPPNS_AVATAR_PEP_DATA:
return handleAvatarData(p.Items[0].Body,
v.From,
p.Items[0].ID)
case XMPPNS_AVATAR_PEP_METADATA:
return handleAvatarMetadata(p.Items[0].Body,
v
}*/
}
case v.Query.XMLName.Local == "":
return IQ{ID: v.ID, From: v.From, To: v.To, Type: v.Type}, nil
default:

125
vendor/github.com/mattn/go-xmpp/xmpp_avatar.go generated vendored Normal file
View file

@ -0,0 +1,125 @@
package xmpp
import (
"crypto/sha1"
"encoding/base64"
"encoding/hex"
"encoding/xml"
"errors"
"strconv"
)
const (
XMPPNS_AVATAR_PEP_DATA = "urn:xmpp:avatar:data"
XMPPNS_AVATAR_PEP_METADATA = "urn:xmpp:avatar:metadata"
)
type clientAvatarData struct {
XMLName xml.Name `xml:"data"`
Data []byte `xml:",innerxml"`
}
type clientAvatarInfo struct {
XMLName xml.Name `xml:"info"`
Bytes string `xml:"bytes,attr"`
Width string `xml:"width,attr"`
Height string `xml:"height,attr"`
ID string `xml:"id,attr"`
Type string `xml:"type,attr"`
URL string `xml:"url,attr"`
}
type clientAvatarMetadata struct {
XMLName xml.Name `xml:"metadata"`
XMLNS string `xml:"xmlns,attr"`
Info clientAvatarInfo `xml:"info"`
}
type AvatarData struct {
Data []byte
From string
}
type AvatarMetadata struct {
From string
Bytes int
Width int
Height int
ID string
Type string
URL string
}
func handleAvatarData(itemsBody []byte, from, id string) (AvatarData, error) {
var data clientAvatarData
err := xml.Unmarshal(itemsBody, &data)
if err != nil {
return AvatarData{}, err
}
// Base64-decode the avatar data to check its SHA1 hash
dataRaw, err := base64.StdEncoding.DecodeString(
string(data.Data))
if err != nil {
return AvatarData{}, err
}
hash := sha1.Sum(dataRaw)
hashStr := hex.EncodeToString(hash[:])
if hashStr != id {
return AvatarData{}, errors.New("SHA1 hashes do not match")
}
return AvatarData{
Data: dataRaw,
From: from,
}, nil
}
func handleAvatarMetadata(body []byte, from string) (AvatarMetadata, error) {
var meta clientAvatarMetadata
err := xml.Unmarshal(body, &meta)
if err != nil {
return AvatarMetadata{}, err
}
return AvatarMetadata{
From: from,
Bytes: atoiw(meta.Info.Bytes),
Width: atoiw(meta.Info.Width),
Height: atoiw(meta.Info.Height),
ID: meta.Info.ID,
Type: meta.Info.Type,
URL: meta.Info.URL,
}, nil
}
// A wrapper for atoi which just returns -1 if an error occurs
func atoiw(str string) int {
i, err := strconv.Atoi(str)
if err != nil {
return -1
}
return i
}
func (c *Client) AvatarSubscribeMetadata(jid string) {
c.PubsubSubscribeNode(XMPPNS_AVATAR_PEP_METADATA, jid)
}
func (c *Client) AvatarUnsubscribeMetadata(jid string) {
c.PubsubUnsubscribeNode(XMPPNS_AVATAR_PEP_METADATA, jid)
}
func (c *Client) AvatarRequestData(jid string) {
c.PubsubRequestLastItems(XMPPNS_AVATAR_PEP_DATA, jid)
}
func (c *Client) AvatarRequestDataByID(jid, id string) {
c.PubsubRequestItem(XMPPNS_AVATAR_PEP_DATA, jid, id)
}
func (c *Client) AvatarRequestMetadata(jid string) {
c.PubsubRequestLastItems(XMPPNS_AVATAR_PEP_METADATA, jid)
}

View file

@ -23,7 +23,7 @@ func (c *Client) RawInformationQuery(from, to, id, iqType, requestNamespace, bod
return id, err
}
// rawInformation send a IQ request with the the payload body to the server
// rawInformation send a IQ request with the payload body to the server
func (c *Client) RawInformation(from, to, id, iqType, body string) (string, error) {
const xmlIQ = "<iq from='%s' to='%s' id='%s' type='%s'>%s</iq>"
_, err := fmt.Fprintf(c.conn, xmlIQ, xmlEscape(from), xmlEscape(to), id, iqType, body)

View file

@ -32,6 +32,7 @@ The `gofeed` library is a robust feed parser that supports parsing both [RSS](ht
- Atom 0.3
- Atom 1.0
- JSON 1.0
- JSON 1.1
#### Extension Support
@ -113,6 +114,15 @@ feed, _ := fp.ParseURLWithContext("http://feeds.twit.tv/twit.xml", ctx)
fmt.Println(feed.Title)
```
##### Parse a feed from an URL with a custom User-Agent:
```go
fp := gofeed.NewParser()
fp.UserAgent = "MyCustomAgent 1.0"
feed, _ := fp.ParseURL("http://feeds.twit.tv/twit.xml")
fmt.Println(feed.Title)
```
#### Feed Specific Parsers
You can easily use the `rss.Parser`, `atom.Parser` or `json.Parser` directly if you have a usage scenario that requires it:
@ -228,36 +238,36 @@ In addition to the generic handling of extensions, `gofeed` also has built in su
The `DefaultRSSTranslator`, the `DefaultAtomTranslator` and the `DefaultJSONTranslator` map the following `rss.Feed`, `atom.Feed` and `json.Feed` fields to their respective `gofeed.Feed` fields. They are listed in order of precedence (highest to lowest):
`gofeed.Feed` | RSS | Atom | JSON
--- | --- | --- | --
Title | /rss/channel/title<br>/rdf:RDF/channel/title<br>/rss/channel/dc:title<br>/rdf:RDF/channel/dc:title | /feed/title | /title
Description | /rss/channel/description<br>/rdf:RDF/channel/description<br>/rss/channel/itunes:subtitle | /feed/subtitle<br>/feed/tagline | /description
Link | /rss/channel/link<br>/rdf:RDF/channel/link | /feed/link[@rel=”alternate”]/@href<br>/feed/link[not(@rel)]/@href | /home_page_url
FeedLink | /rss/channel/atom:link[@rel="self"]/@href<br>/rdf:RDF/channel/atom:link[@rel="self"]/@href | /feed/link[@rel="self"]/@href | /feed_url
Updated | /rss/channel/lastBuildDate<br>/rss/channel/dc:date<br>/rdf:RDF/channel/dc:date | /feed/updated<br>/feed/modified | /items[0]/date_modified
Published | /rss/channel/pubDate | | /items[0]/date_published
Author | /rss/channel/managingEditor<br>/rss/channel/webMaster<br>/rss/channel/dc:author<br>/rdf:RDF/channel/dc:author<br>/rss/channel/dc:creator<br>/rdf:RDF/channel/dc:creator<br>/rss/channel/itunes:author | /feed/author | /author/name
Language | /rss/channel/language<br>/rss/channel/dc:language<br>/rdf:RDF/channel/dc:language | /feed/@xml:lang |
Image | /rss/channel/image<br>/rdf:RDF/image<br>/rss/channel/itunes:image | /feed/logo | /icon
Copyright | /rss/channel/copyright<br>/rss/channel/dc:rights<br>/rdf:RDF/channel/dc:rights | /feed/rights<br>/feed/copyright |
Generator | /rss/channel/generator | /feed/generator |
Categories | /rss/channel/category<br>/rss/channel/itunes:category<br>/rss/channel/itunes:keywords<br>/rss/channel/dc:subject<br>/rdf:RDF/channel/dc:subject | /feed/category |
`gofeed.Item` | RSS | Atom | JSON
--- | --- | --- | ---
Title | /rss/channel/item/title<br>/rdf:RDF/item/title<br>/rdf:RDF/item/dc:title<br>/rss/channel/item/dc:title | /feed/entry/title | /items/title
Description | /rss/channel/item/description<br>/rdf:RDF/item/description<br>/rss/channel/item/dc:description<br>/rdf:RDF/item/dc:description | /feed/entry/summary | /items/summary
Content | /rss/channel/item/content:encoded | /feed/entry/content | /items/content_html
Link | /rss/channel/item/link<br>/rdf:RDF/item/link | /feed/entry/link[@rel=”alternate”]/@href<br>/feed/entry/link[not(@rel)]/@href | /items/url
Updated | /rss/channel/item/dc:date<br>/rdf:RDF/rdf:item/dc:date | /feed/entry/modified<br>/feed/entry/updated | /items/date_modified
Published | /rss/channel/item/pubDate<br>/rss/channel/item/dc:date | /feed/entry/published<br>/feed/entry/issued | /items/date_published
Author | /rss/channel/item/author<br>/rss/channel/item/dc:author<br>/rdf:RDF/item/dc:author<br>/rss/channel/item/dc:creator<br>/rdf:RDF/item/dc:creator<br>/rss/channel/item/itunes:author | /feed/entry/author | /items/author/name
GUID | /rss/channel/item/guid | /feed/entry/id | /items/id
Image | /rss/channel/item/itunes:image<br>/rss/channel/item/media:image | | /items/image<br>/items/banner_image
Categories | /rss/channel/item/category<br>/rss/channel/item/dc:subject<br>/rss/channel/item/itunes:keywords<br>/rdf:RDF/channel/item/dc:subject | /feed/entry/category | /items/tags
Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”] | /items/attachments
| `gofeed.Feed` | RSS | Atom | JSON |
| ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------- | ------------------------ |
| Title | /rss/channel/title<br>/rdf:RDF/channel/title<br>/rss/channel/dc:title<br>/rdf:RDF/channel/dc:title | /feed/title | /title |
| Description | /rss/channel/description<br>/rdf:RDF/channel/description<br>/rss/channel/itunes:subtitle | /feed/subtitle<br>/feed/tagline | /description |
| Link | /rss/channel/link<br>/rdf:RDF/channel/link | /feed/link[@rel=”alternate”]/@href<br>/feed/link[not(@rel)]/@href | /home_page_url |
| FeedLink | /rss/channel/atom:link[@rel="self"]/@href<br>/rdf:RDF/channel/atom:link[@rel="self"]/@href | /feed/link[@rel="self"]/@href | /feed_url |
| Updated | /rss/channel/lastBuildDate<br>/rss/channel/dc:date<br>/rdf:RDF/channel/dc:date | /feed/updated<br>/feed/modified | /items[0]/date_modified |
| Published | /rss/channel/pubDate | | /items[0]/date_published |
| Author | /rss/channel/managingEditor<br>/rss/channel/webMaster<br>/rss/channel/dc:author<br>/rdf:RDF/channel/dc:author<br>/rss/channel/dc:creator<br>/rdf:RDF/channel/dc:creator<br>/rss/channel/itunes:author | /feed/authors[0] | /author |
| Authors | /rss/channel/managingEditor<br>/rss/channel/webMaster<br>/rss/channel/dc:author<br>/rdf:RDF/channel/dc:author<br>/rss/channel/dc:creator<br>/rdf:RDF/channel/dc:creator<br>/rss/channel/itunes:author | /feed/authors | /authors<br>/author |
| Language | /rss/channel/language<br>/rss/channel/dc:language<br>/rdf:RDF/channel/dc:language | /feed/@xml:lang | /language |
| Image | /rss/channel/image<br>/rdf:RDF/image<br>/rss/channel/itunes:image | /feed/logo | /icon |
| Copyright | /rss/channel/copyright<br>/rss/channel/dc:rights<br>/rdf:RDF/channel/dc:rights | /feed/rights<br>/feed/copyright |
| Generator | /rss/channel/generator | /feed/generator |
| Categories | /rss/channel/category<br>/rss/channel/itunes:category<br>/rss/channel/itunes:keywords<br>/rss/channel/dc:subject<br>/rdf:RDF/channel/dc:subject | /feed/category |
| `gofeed.Item` | RSS | Atom | JSON |
| ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------- |
| Title | /rss/channel/item/title<br>/rdf:RDF/item/title<br>/rdf:RDF/item/dc:title<br>/rss/channel/item/dc:title | /feed/entry/title | /items/title |
| Description | /rss/channel/item/description<br>/rdf:RDF/item/description<br>/rss/channel/item/dc:description<br>/rdf:RDF/item/dc:description | /feed/entry/summary | /items/summary |
| Content | /rss/channel/item/content:encoded | /feed/entry/content | /items/content_html |
| Link | /rss/channel/item/link<br>/rdf:RDF/item/link | /feed/entry/link[@rel=”alternate”]/@href<br>/feed/entry/link[not(@rel)]/@href | /items/url |
| Updated | /rss/channel/item/dc:date<br>/rdf:RDF/rdf:item/dc:date | /feed/entry/modified<br>/feed/entry/updated | /items/date_modified |
| Published | /rss/channel/item/pubDate<br>/rss/channel/item/dc:date | /feed/entry/published<br>/feed/entry/issued | /items/date_published |
| Author | /rss/channel/item/author<br>/rss/channel/item/dc:author<br>/rdf:RDF/item/dc:author<br>/rss/channel/item/dc:creator<br>/rdf:RDF/item/dc:creator<br>/rss/channel/item/itunes:author | /feed/entry/author | /items/author/name |
| Authors | /rss/channel/item/author<br>/rss/channel/item/dc:author<br>/rdf:RDF/item/dc:author<br>/rss/channel/item/dc:creator<br>/rdf:RDF/item/dc:creator<br>/rss/channel/item/itunes:author | /feed/entry/authors[0] | /items/authors<br>/items/author/name |
| GUID | /rss/channel/item/guid | /feed/entry/id | /items/id |
| Image | /rss/channel/item/itunes:image<br>/rss/channel/item/media:image | | /items/image<br>/items/banner_image |
| Categories | /rss/channel/item/category<br>/rss/channel/item/dc:subject<br>/rss/channel/item/itunes:keywords<br>/rdf:RDF/channel/item/dc:subject | /feed/entry/category | /items/tags |
| Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”] | /items/attachments |
## Dependencies

View file

@ -33,17 +33,20 @@ func DetectFeedType(feed io.Reader) FeedType {
buffer := new(bytes.Buffer)
buffer.ReadFrom(feed)
// remove leading whitespace (if exists)
var firstChar byte
for {
loop: for {
ch, err := buffer.ReadByte()
if err != nil {
return FeedTypeUnknown
}
if ch != ' ' && ch != '\t' {
// ignore leading whitespace & byte order marks
switch ch {
case ' ', '\r', '\n', '\t':
case 0xFE, 0xFF, 0x00, 0xEF, 0xBB, 0xBF: // utf 8-16-32 bom
default:
firstChar = ch
buffer.UnreadByte()
break
break loop
}
}

View file

@ -4,7 +4,7 @@ import (
"encoding/json"
"time"
"github.com/mmcdole/gofeed/extensions"
ext "github.com/mmcdole/gofeed/extensions"
)
// Feed is the universal Feed type that atom.Feed
@ -17,11 +17,13 @@ type Feed struct {
Description string `json:"description,omitempty"`
Link string `json:"link,omitempty"`
FeedLink string `json:"feedLink,omitempty"`
Links []string `json:"links,omitempty"`
Updated string `json:"updated,omitempty"`
UpdatedParsed *time.Time `json:"updatedParsed,omitempty"`
Published string `json:"published,omitempty"`
PublishedParsed *time.Time `json:"publishedParsed,omitempty"`
Author *Person `json:"author,omitempty"`
Author *Person `json:"author,omitempty"` // Deprecated: Use feed.Authors instead
Authors []*Person `json:"authors,omitempty"`
Language string `json:"language,omitempty"`
Image *Image `json:"image,omitempty"`
Copyright string `json:"copyright,omitempty"`
@ -49,11 +51,13 @@ type Item struct {
Description string `json:"description,omitempty"`
Content string `json:"content,omitempty"`
Link string `json:"link,omitempty"`
Links []string `json:"links,omitempty"`
Updated string `json:"updated,omitempty"`
UpdatedParsed *time.Time `json:"updatedParsed,omitempty"`
Published string `json:"published,omitempty"`
PublishedParsed *time.Time `json:"publishedParsed,omitempty"`
Author *Person `json:"author,omitempty"`
Author *Person `json:"author,omitempty"` // Deprecated: Use item.Authors instead
Authors []*Person `json:"authors,omitempty"`
GUID string `json:"guid,omitempty"`
Image *Image `json:"image,omitempty"`
Categories []string `json:"categories,omitempty"`

View file

@ -4,7 +4,6 @@ go 1.14
require (
github.com/PuerkitoBio/goquery v1.5.1
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/json-iterator/go v1.1.10
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf
github.com/stretchr/testify v1.3.0

View file

@ -24,8 +24,6 @@ github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/urfave/cli v1.22.3 h1:FpNT6zq26xNpHZy08emi755QwzLPs6Pukqjlc7RfOMU=
@ -36,7 +34,6 @@ golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=

View file

@ -23,6 +23,7 @@ var dateFormats = []string{
"Mon Jan 2 15:04 2006",
"Mon Jan 02, 2006 3:04 pm",
"Mon Jan 02 2006 15:04:05 -0700",
"Mon Jan 02 2006 15:04:05 GMT-0700 (MST)",
"Monday, January 2, 2006 03:04 PM",
"Monday, January 2, 2006",
"Monday, January 02, 2006",
@ -64,6 +65,7 @@ var dateFormats = []string{
"Mon, 02 Jan 2006 15:04:05 -07",
"Mon, 02 Jan 2006 15:04:05 00",
"Mon, 02 Jan 2006 15:04:05",
"Mon, 02 Jan 2006 15:4:5 Z",
"Mon, 02 Jan 2006",
"January 2, 2006 3:04 PM",
"January 2, 2006, 3:04 p.m.",

View file

@ -19,6 +19,10 @@ type Feed struct {
Items []*Item `json:"items"` // items is an array, and is required
// TODO Hubs // hubs (very optional, array of objects) describes endpoints that can be used to subscribe to real-time notifications from the publisher of this feed. Each object has a type and url, both of which are required. See the section “Subscribing to Real-time Notifications” below for details.
// TODO Extensions
// Version 1.1
Authors []*Author `json:"authors,omitempty"`
Language string `json:"language,omitempty"`
}
func (f Feed) String() string {
@ -40,9 +44,14 @@ type Item struct {
DatePublished string `json:"date_published,omitempty"` // date_published (optional, string) specifies the date in RFC 3339 format. (Example: 2010-02-07T14:04:00-05:00.)
DateModified string `json:"date_modified,omitempty"` // date_modified (optional, string) specifies the modification date in RFC 3339 format.
Author *Author `json:"author,omitempty"` // author (optional, object) has the same structure as the top-level author. If not specified in an item, then the top-level author, if present, is the author of the item.
Tags []string `json:"tags,omitempty"` // tags (optional, array of strings) can have any plain text values you want. Tags tend to be just one word, but they may be anything.
Attachments *[]Attachments `json:"attachments,omitempty"` // attachments (optional, array) lists related resources. Podcasts, for instance, would include an attachment thats an audio or video file. An individual item may have one or more attachments.
// TODO Extensions
// Version 1.1
Authors []*Author `json:"authors,omitempty"`
Language string `json:"language,omitempty"`
}
// Author defines the feed author structure. The author object has several members. These are all optional — but if you provide an author object, then at least one is required:

View file

@ -35,6 +35,7 @@ type Parser struct {
AtomTranslator Translator
RSSTranslator Translator
JSONTranslator Translator
UserAgent string
Client *http.Client
rp *rss.Parser
ap *atom.Parser
@ -47,6 +48,7 @@ func NewParser() *Parser {
rp: &rss.Parser{},
ap: &atom.Parser{},
jp: &json.Parser{},
UserAgent: "Gofeed/1.0",
}
return &fp
}
@ -97,7 +99,7 @@ func (f *Parser) ParseURLWithContext(feedURL string, ctx context.Context) (feed
return nil, err
}
req = req.WithContext(ctx)
req.Header.Set("User-Agent", "Gofeed/1.0")
req.Header.Set("User-Agent", f.UserAgent)
resp, err := client.Do(req)
if err != nil {

View file

@ -4,7 +4,7 @@ import (
"encoding/json"
"time"
"github.com/mmcdole/gofeed/extensions"
ext "github.com/mmcdole/gofeed/extensions"
)
// Feed is an RSS Feed
@ -59,6 +59,7 @@ type Item struct {
DublinCoreExt *ext.DublinCoreExtension `json:"dcExt,omitempty"`
ITunesExt *ext.ITunesItemExtension `json:"itunesExt,omitempty"`
Extensions ext.Extensions `json:"extensions,omitempty"`
Custom map[string]string `json:"custom,omitempty"`
}
// Image is an image that represents the feed

View file

@ -415,8 +415,14 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) {
}
categories = append(categories, result)
} else {
// Skip any elements not part of the item spec
p.Skip()
result, err := shared.ParseText(p)
if err != nil {
continue
}
if item.Custom == nil {
item.Custom = make(map[string]string, 0)
}
item.Custom[name] = result
}
}
}

View file

@ -39,11 +39,13 @@ func (t *DefaultRSSTranslator) Translate(feed interface{}) (*Feed, error) {
result.Description = t.translateFeedDescription(rss)
result.Link = t.translateFeedLink(rss)
result.FeedLink = t.translateFeedFeedLink(rss)
result.Links = t.translateFeedLinks(rss)
result.Updated = t.translateFeedUpdated(rss)
result.UpdatedParsed = t.translateFeedUpdatedParsed(rss)
result.Published = t.translateFeedPublished(rss)
result.PublishedParsed = t.translateFeedPublishedParsed(rss)
result.Author = t.translateFeedAuthor(rss)
result.Authors = t.translateFeedAuthors(rss)
result.Language = t.translateFeedLanguage(rss)
result.Image = t.translateFeedImage(rss)
result.Copyright = t.translateFeedCopyright(rss)
@ -64,9 +66,11 @@ func (t *DefaultRSSTranslator) translateFeedItem(rssItem *rss.Item) (item *Item)
item.Description = t.translateItemDescription(rssItem)
item.Content = t.translateItemContent(rssItem)
item.Link = t.translateItemLink(rssItem)
item.Links = t.translateItemLinks(rssItem)
item.Published = t.translateItemPublished(rssItem)
item.PublishedParsed = t.translateItemPublishedParsed(rssItem)
item.Author = t.translateItemAuthor(rssItem)
item.Authors = t.translateItemAuthors(rssItem)
item.GUID = t.translateItemGUID(rssItem)
item.Image = t.translateItemImage(rssItem)
item.Categories = t.translateItemCategories(rssItem)
@ -74,6 +78,7 @@ func (t *DefaultRSSTranslator) translateFeedItem(rssItem *rss.Item) (item *Item)
item.DublinCoreExt = rssItem.DublinCoreExt
item.ITunesExt = rssItem.ITunesExt
item.Extensions = rssItem.Extensions
item.Custom = rssItem.Custom
return
}
@ -113,6 +118,26 @@ func (t *DefaultRSSTranslator) translateFeedFeedLink(rss *rss.Feed) (link string
return
}
func (t *DefaultRSSTranslator) translateFeedLinks(rss *rss.Feed) (links []string) {
if rss.Link != "" {
links = append(links, rss.Link)
}
if rss.ITunesExt != nil && rss.ITunesExt.Subtitle != "" {
links = append(links, rss.ITunesExt.Subtitle)
}
atomExtensions := t.extensionsForKeys([]string{"atom", "atom10", "atom03"}, rss.Extensions)
for _, ex := range atomExtensions {
if lks, ok := ex["link"]; ok {
for _, l := range lks {
if l.Attrs["rel"] == "" || l.Attrs["rel"] == "alternate" || l.Attrs["rel"] == "self" {
links = append(links, l.Attrs["href"])
}
}
}
}
return
}
func (t *DefaultRSSTranslator) translateFeedUpdated(rss *rss.Feed) (updated string) {
if rss.LastBuildDate != "" {
updated = rss.LastBuildDate
@ -175,6 +200,13 @@ func (t *DefaultRSSTranslator) translateFeedAuthor(rss *rss.Feed) (author *Perso
return
}
func (t *DefaultRSSTranslator) translateFeedAuthors(rss *rss.Feed) (authors []*Person) {
if author := t.translateFeedAuthor(rss); author != nil {
authors = []*Person{author}
}
return
}
func (t *DefaultRSSTranslator) translateFeedLanguage(rss *rss.Feed) (language string) {
if rss.Language != "" {
language = rss.Language
@ -279,6 +311,12 @@ func (t *DefaultRSSTranslator) translateItemContent(rssItem *rss.Item) (content
func (t *DefaultRSSTranslator) translateItemLink(rssItem *rss.Item) (link string) {
return rssItem.Link
}
func (t *DefaultRSSTranslator) translateItemLinks(rssItem *rss.Item) (links []string) {
if rssItem.Link == "" {
return nil
}
return []string{rssItem.Link}
}
func (t *DefaultRSSTranslator) translateItemUpdated(rssItem *rss.Item) (updated string) {
if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Date != nil {
@ -347,6 +385,14 @@ func (t *DefaultRSSTranslator) translateItemAuthor(rssItem *rss.Item) (author *P
return
}
func (t *DefaultRSSTranslator) translateItemAuthors(rssItem *rss.Item) (authors []*Person) {
if author := t.translateItemAuthor(rssItem); author != nil {
authors = []*Person{author}
}
return
}
func (t *DefaultRSSTranslator) translateItemGUID(rssItem *rss.Item) (guid string) {
if rssItem.GUID != nil {
guid = rssItem.GUID.Value
@ -449,9 +495,11 @@ func (t *DefaultAtomTranslator) Translate(feed interface{}) (*Feed, error) {
result.Description = t.translateFeedDescription(atom)
result.Link = t.translateFeedLink(atom)
result.FeedLink = t.translateFeedFeedLink(atom)
result.Links = t.translateFeedLinks(atom)
result.Updated = t.translateFeedUpdated(atom)
result.UpdatedParsed = t.translateFeedUpdatedParsed(atom)
result.Author = t.translateFeedAuthor(atom)
result.Authors = t.translateFeedAuthors(atom)
result.Language = t.translateFeedLanguage(atom)
result.Image = t.translateFeedImage(atom)
result.Copyright = t.translateFeedCopyright(atom)
@ -470,11 +518,13 @@ func (t *DefaultAtomTranslator) translateFeedItem(entry *atom.Entry) (item *Item
item.Description = t.translateItemDescription(entry)
item.Content = t.translateItemContent(entry)
item.Link = t.translateItemLink(entry)
item.Links = t.translateItemLinks(entry)
item.Updated = t.translateItemUpdated(entry)
item.UpdatedParsed = t.translateItemUpdatedParsed(entry)
item.Published = t.translateItemPublished(entry)
item.PublishedParsed = t.translateItemPublishedParsed(entry)
item.Author = t.translateItemAuthor(entry)
item.Authors = t.translateItemAuthors(entry)
item.GUID = t.translateItemGUID(entry)
item.Image = t.translateItemImage(entry)
item.Categories = t.translateItemCategories(entry)
@ -507,6 +557,15 @@ func (t *DefaultAtomTranslator) translateFeedFeedLink(atom *atom.Feed) (link str
return
}
func (t *DefaultAtomTranslator) translateFeedLinks(atom *atom.Feed) (links []string) {
for _, l := range atom.Links {
if l.Rel == "" || l.Rel == "alternate" || l.Rel == "self" {
links = append(links, l.Href)
}
}
return
}
func (t *DefaultAtomTranslator) translateFeedUpdated(atom *atom.Feed) (updated string) {
return atom.Updated
}
@ -526,6 +585,22 @@ func (t *DefaultAtomTranslator) translateFeedAuthor(atom *atom.Feed) (author *Pe
return
}
func (t *DefaultAtomTranslator) translateFeedAuthors(atom *atom.Feed) (authors []*Person) {
if atom.Authors != nil {
authors = []*Person{}
for _, a := range atom.Authors {
authors = append(authors, &Person{
Name: a.Name,
Email: a.Email,
})
}
}
return
}
func (t *DefaultAtomTranslator) translateFeedLanguage(atom *atom.Feed) (language string) {
return atom.Language
}
@ -600,6 +675,15 @@ func (t *DefaultAtomTranslator) translateItemLink(entry *atom.Entry) (link strin
return
}
func (t *DefaultAtomTranslator) translateItemLinks(entry *atom.Entry) (links []string) {
for _, l := range entry.Links {
if l.Rel == "" || l.Rel == "alternate" || l.Rel == "self" {
links = append(links, l.Href)
}
}
return
}
func (t *DefaultAtomTranslator) translateItemUpdated(entry *atom.Entry) (updated string) {
return entry.Updated
}
@ -608,12 +692,20 @@ func (t *DefaultAtomTranslator) translateItemUpdatedParsed(entry *atom.Entry) (u
return entry.UpdatedParsed
}
func (t *DefaultAtomTranslator) translateItemPublished(entry *atom.Entry) (updated string) {
return entry.Published
func (t *DefaultAtomTranslator) translateItemPublished(entry *atom.Entry) (published string) {
published = entry.Published
if published == "" {
published = entry.Updated
}
return
}
func (t *DefaultAtomTranslator) translateItemPublishedParsed(entry *atom.Entry) (updated *time.Time) {
return entry.PublishedParsed
func (t *DefaultAtomTranslator) translateItemPublishedParsed(entry *atom.Entry) (published *time.Time) {
published = entry.PublishedParsed
if published == nil {
published = entry.UpdatedParsed
}
return
}
func (t *DefaultAtomTranslator) translateItemAuthor(entry *atom.Entry) (author *Person) {
@ -626,6 +718,19 @@ func (t *DefaultAtomTranslator) translateItemAuthor(entry *atom.Entry) (author *
return
}
func (t *DefaultAtomTranslator) translateItemAuthors(entry *atom.Entry) (authors []*Person) {
if entry.Authors != nil {
authors = []*Person{}
for _, a := range entry.Authors {
authors = append(authors, &Person{
Name: a.Name,
Email: a.Email,
})
}
}
return
}
func (t *DefaultAtomTranslator) translateItemGUID(entry *atom.Entry) (guid string) {
return entry.ID
}
@ -707,9 +812,12 @@ func (t *DefaultJSONTranslator) Translate(feed interface{}) (*Feed, error) {
result.Title = t.translateFeedTitle(json)
result.Link = t.translateFeedLink(json)
result.FeedLink = t.translateFeedFeedLink(json)
result.Links = t.translateFeedLinks(json)
result.Description = t.translateFeedDescription(json)
result.Image = t.translateFeedImage(json)
result.Author = t.translateFeedAuthor(json)
result.Authors = t.translateFeedAuthors(json)
result.Language = t.translateFeedLanguage(json)
result.Items = t.translateFeedItems(json)
result.Updated = t.translateFeedUpdated(json)
result.UpdatedParsed = t.translateFeedUpdatedParsed(json)
@ -729,6 +837,7 @@ func (t *DefaultJSONTranslator) translateFeedItem(jsonItem *json.Item) (item *It
item = &Item{}
item.GUID = t.translateItemGUID(jsonItem)
item.Link = t.translateItemLink(jsonItem)
item.Links = t.translateItemLinks(jsonItem)
item.Title = t.translateItemTitle(jsonItem)
item.Content = t.translateItemContent(jsonItem)
item.Description = t.translateItemDescription(jsonItem)
@ -738,6 +847,7 @@ func (t *DefaultJSONTranslator) translateFeedItem(jsonItem *json.Item) (item *It
item.Updated = t.translateItemUpdated(jsonItem)
item.UpdatedParsed = t.translateItemUpdatedParsed(jsonItem)
item.Author = t.translateItemAuthor(jsonItem)
item.Authors = t.translateItemAuthors(jsonItem)
item.Categories = t.translateItemCategories(jsonItem)
item.Enclosures = t.translateItemEnclosures(jsonItem)
// TODO ExternalURL is missing in global Feed
@ -770,6 +880,16 @@ func (t *DefaultJSONTranslator) translateFeedFeedLink(json *json.Feed) (link str
return
}
func (t *DefaultJSONTranslator) translateFeedLinks(json *json.Feed) (links []string) {
if json.HomePageURL != "" {
links = append(links, json.HomePageURL)
}
if json.FeedURL != "" {
links = append(links, json.FeedURL)
}
return
}
func (t *DefaultJSONTranslator) translateFeedUpdated(json *json.Feed) (updated string) {
if len(json.Items) > 0 {
updated = json.Items[0].DateModified
@ -816,6 +936,31 @@ func (t *DefaultJSONTranslator) translateFeedAuthor(json *json.Feed) (author *Pe
return
}
func (t *DefaultJSONTranslator) translateFeedAuthors(json *json.Feed) (authors []*Person) {
if json.Authors != nil {
authors = []*Person{}
for _, a := range json.Authors {
name, address := shared.ParseNameAddress(a.Name)
author := &Person{}
author.Name = name
author.Email = address
authors = append(authors, author)
}
} else if author := t.translateFeedAuthor(json); author != nil {
authors = []*Person{author}
}
// Author.URL is missing in global feed
// Author.Avatar is missing in global feed
return
}
func (t *DefaultJSONTranslator) translateFeedLanguage(json *json.Feed) (language string) {
language = json.Language
return
}
func (t *DefaultJSONTranslator) translateFeedImage(json *json.Feed) (image *Image) {
// Using the Icon rather than the image
// icon (optional, string) is the URL of an image for the feed suitable to be used in a timeline. It should be square and relatively large — such as 512 x 512
@ -861,6 +1006,16 @@ func (t *DefaultJSONTranslator) translateItemLink(jsonItem *json.Item) (link str
return jsonItem.URL
}
func (t *DefaultJSONTranslator) translateItemLinks(jsonItem *json.Item) (links []string) {
if jsonItem.URL != "" {
links = append(links, jsonItem.URL)
}
if jsonItem.ExternalURL != "" {
links = append(links, jsonItem.ExternalURL)
}
return
}
func (t *DefaultJSONTranslator) translateItemUpdated(jsonItem *json.Item) (updated string) {
if jsonItem.DateModified != "" {
updated = jsonItem.DateModified
@ -907,6 +1062,26 @@ func (t *DefaultJSONTranslator) translateItemAuthor(jsonItem *json.Item) (author
return
}
func (t *DefaultJSONTranslator) translateItemAuthors(jsonItem *json.Item) (authors []*Person) {
if jsonItem.Authors != nil {
authors = []*Person{}
for _, a := range jsonItem.Authors {
name, address := shared.ParseNameAddress(a.Name)
author := &Person{}
author.Name = name
author.Email = address
authors = append(authors, author)
}
} else if author := t.translateItemAuthor(jsonItem); author != nil {
authors = []*Person{author}
}
// Author.URL is missing in global feed
// Author.Avatar is missing in global feed
return
}
func (t *DefaultJSONTranslator) translateItemGUID(jsonItem *json.Item) (guid string) {
if jsonItem.ID != "" {
guid = jsonItem.ID

View file

@ -1,7 +1,7 @@
language: go
go:
- 1.8.x
- 1.9.x
- 1.x
before_install:

View file

@ -1,15 +1,9 @@
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
[[projects]]
name = "github.com/modern-go/concurrent"
packages = ["."]
revision = "e0a39a4cb4216ea8db28e22a69f4ec25610d513a"
version = "1.0.0"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "daee8a88b3498b61c5640056665b8b9eea062006f5e596bbb6a3ed9119a11ec7"
input-imports = []
solver-name = "gps-cdcl"
solver-version = 1

View file

@ -26,10 +26,6 @@
ignored = []
[[constraint]]
name = "github.com/modern-go/concurrent"
version = "1.0.0"
[prune]
go-tests = true
unused-packages = true

3
vendor/github.com/modern-go/reflect2/go.mod generated vendored Normal file
View file

@ -0,0 +1,3 @@
module github.com/modern-go/reflect2
go 1.12

23
vendor/github.com/modern-go/reflect2/go_above_118.go generated vendored Normal file
View file

@ -0,0 +1,23 @@
//+build go1.18
package reflect2
import (
"unsafe"
)
// m escapes into the return value, but the caller of mapiterinit
// doesn't let the return value escape.
//go:noescape
//go:linkname mapiterinit reflect.mapiterinit
func mapiterinit(rtype unsafe.Pointer, m unsafe.Pointer, it *hiter)
func (type2 *UnsafeMapType) UnsafeIterate(obj unsafe.Pointer) MapIterator {
var it hiter
mapiterinit(type2.rtype, *(*unsafe.Pointer)(obj), &it)
return &UnsafeMapIterator{
hiter: &it,
pKeyRType: type2.pKeyRType,
pElemRType: type2.pElemRType,
}
}

View file

@ -1,8 +0,0 @@
//+build go1.7
package reflect2
import "unsafe"
//go:linkname resolveTypeOff reflect.resolveTypeOff
func resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer

View file

@ -6,6 +6,9 @@ import (
"unsafe"
)
//go:linkname resolveTypeOff reflect.resolveTypeOff
func resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer
//go:linkname makemap reflect.makemap
func makemap(rtype unsafe.Pointer, cap int) (m unsafe.Pointer)

21
vendor/github.com/modern-go/reflect2/go_below_118.go generated vendored Normal file
View file

@ -0,0 +1,21 @@
//+build !go1.18
package reflect2
import (
"unsafe"
)
// m escapes into the return value, but the caller of mapiterinit
// doesn't let the return value escape.
//go:noescape
//go:linkname mapiterinit reflect.mapiterinit
func mapiterinit(rtype unsafe.Pointer, m unsafe.Pointer) (val *hiter)
func (type2 *UnsafeMapType) UnsafeIterate(obj unsafe.Pointer) MapIterator {
return &UnsafeMapIterator{
hiter: mapiterinit(type2.rtype, *(*unsafe.Pointer)(obj)),
pKeyRType: type2.pKeyRType,
pElemRType: type2.pElemRType,
}
}

View file

@ -1,9 +0,0 @@
//+build !go1.7
package reflect2
import "unsafe"
func resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer {
return nil
}

View file

@ -1,14 +0,0 @@
//+build !go1.9
package reflect2
import (
"unsafe"
)
//go:linkname makemap reflect.makemap
func makemap(rtype unsafe.Pointer) (m unsafe.Pointer)
func makeMapWithSize(rtype unsafe.Pointer, cap int) unsafe.Pointer {
return makemap(rtype)
}

View file

@ -1,8 +1,9 @@
package reflect2
import (
"github.com/modern-go/concurrent"
"reflect"
"runtime"
"sync"
"unsafe"
)
@ -130,13 +131,13 @@ var ConfigSafe = Config{UseSafeImplementation: true}.Froze()
type frozenConfig struct {
useSafeImplementation bool
cache *concurrent.Map
cache *sync.Map
}
func (cfg Config) Froze() *frozenConfig {
return &frozenConfig{
useSafeImplementation: cfg.UseSafeImplementation,
cache: concurrent.NewMap(),
cache: new(sync.Map),
}
}
@ -288,11 +289,12 @@ func NoEscape(p unsafe.Pointer) unsafe.Pointer {
}
func UnsafeCastString(str string) []byte {
bytes := make([]byte, 0)
stringHeader := (*reflect.StringHeader)(unsafe.Pointer(&str))
sliceHeader := &reflect.SliceHeader{
Data: stringHeader.Data,
Cap: stringHeader.Len,
Len: stringHeader.Len,
}
return *(*[]byte)(unsafe.Pointer(sliceHeader))
sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&bytes))
sliceHeader.Data = stringHeader.Data
sliceHeader.Cap = stringHeader.Len
sliceHeader.Len = stringHeader.Len
runtime.KeepAlive(str)
return bytes
}

View file

@ -1,12 +0,0 @@
#!/usr/bin/env bash
set -e
echo "" > coverage.txt
for d in $(go list github.com/modern-go/reflect2-tests/... | grep -v vendor); do
go test -coverprofile=profile.out -coverpkg=github.com/modern-go/reflect2 $d
if [ -f profile.out ]; then
cat profile.out >> coverage.txt
rm profile.out
fi
done

View file

@ -1,17 +1,13 @@
// +build !gccgo
package reflect2
import (
"reflect"
"runtime"
"strings"
"sync"
"unsafe"
)
// typelinks1 for 1.5 ~ 1.6
//go:linkname typelinks1 reflect.typelinks
func typelinks1() [][]unsafe.Pointer
// typelinks2 for 1.7 ~
//go:linkname typelinks2 reflect.typelinks
func typelinks2() (sections []unsafe.Pointer, offset [][]int32)
@ -27,49 +23,10 @@ func discoverTypes() {
types = make(map[string]reflect.Type)
packages = make(map[string]map[string]reflect.Type)
ver := runtime.Version()
if ver == "go1.5" || strings.HasPrefix(ver, "go1.5.") {
loadGo15Types()
} else if ver == "go1.6" || strings.HasPrefix(ver, "go1.6.") {
loadGo15Types()
} else {
loadGo17Types()
}
loadGoTypes()
}
func loadGo15Types() {
var obj interface{} = reflect.TypeOf(0)
typePtrss := typelinks1()
for _, typePtrs := range typePtrss {
for _, typePtr := range typePtrs {
(*emptyInterface)(unsafe.Pointer(&obj)).word = typePtr
typ := obj.(reflect.Type)
if typ.Kind() == reflect.Ptr && typ.Elem().Kind() == reflect.Struct {
loadedType := typ.Elem()
pkgTypes := packages[loadedType.PkgPath()]
if pkgTypes == nil {
pkgTypes = map[string]reflect.Type{}
packages[loadedType.PkgPath()] = pkgTypes
}
types[loadedType.String()] = loadedType
pkgTypes[loadedType.Name()] = loadedType
}
if typ.Kind() == reflect.Slice && typ.Elem().Kind() == reflect.Ptr &&
typ.Elem().Elem().Kind() == reflect.Struct {
loadedType := typ.Elem().Elem()
pkgTypes := packages[loadedType.PkgPath()]
if pkgTypes == nil {
pkgTypes = map[string]reflect.Type{}
packages[loadedType.PkgPath()] = pkgTypes
}
types[loadedType.String()] = loadedType
pkgTypes[loadedType.Name()] = loadedType
}
}
}
}
func loadGo17Types() {
func loadGoTypes() {
var obj interface{} = reflect.TypeOf(0)
sections, offset := typelinks2()
for i, offs := range offset {

View file

@ -19,18 +19,12 @@ func typedslicecopy(elemType unsafe.Pointer, dst, src sliceHeader) int
//go:linkname mapassign reflect.mapassign
//go:noescape
func mapassign(rtype unsafe.Pointer, m unsafe.Pointer, key, val unsafe.Pointer)
func mapassign(rtype unsafe.Pointer, m unsafe.Pointer, key unsafe.Pointer, val unsafe.Pointer)
//go:linkname mapaccess reflect.mapaccess
//go:noescape
func mapaccess(rtype unsafe.Pointer, m unsafe.Pointer, key unsafe.Pointer) (val unsafe.Pointer)
// m escapes into the return value, but the caller of mapiterinit
// doesn't let the return value escape.
//go:noescape
//go:linkname mapiterinit reflect.mapiterinit
func mapiterinit(rtype unsafe.Pointer, m unsafe.Pointer) *hiter
//go:noescape
//go:linkname mapiternext reflect.mapiternext
func mapiternext(it *hiter)
@ -42,9 +36,21 @@ func ifaceE2I(rtype unsafe.Pointer, src interface{}, dst unsafe.Pointer)
// If you modify hiter, also change cmd/internal/gc/reflect.go to indicate
// the layout of this structure.
type hiter struct {
key unsafe.Pointer // Must be in first position. Write nil to indicate iteration end (see cmd/internal/gc/range.go).
value unsafe.Pointer // Must be in second position (see cmd/internal/gc/range.go).
// rest fields are ignored
key unsafe.Pointer
value unsafe.Pointer
t unsafe.Pointer
h unsafe.Pointer
buckets unsafe.Pointer
bptr unsafe.Pointer
overflow *[]unsafe.Pointer
oldoverflow *[]unsafe.Pointer
startBucket uintptr
offset uint8
wrapped bool
B uint8
i uint8
bucket uintptr
checkBucket uintptr
}
// add returns p+x.

View file

@ -107,14 +107,6 @@ func (type2 *UnsafeMapType) Iterate(obj interface{}) MapIterator {
return type2.UnsafeIterate(objEFace.data)
}
func (type2 *UnsafeMapType) UnsafeIterate(obj unsafe.Pointer) MapIterator {
return &UnsafeMapIterator{
hiter: mapiterinit(type2.rtype, *(*unsafe.Pointer)(obj)),
pKeyRType: type2.pKeyRType,
pElemRType: type2.pElemRType,
}
}
type UnsafeMapIterator struct {
*hiter
pKeyRType unsafe.Pointer

View file

@ -1,8 +1,8 @@
language: go
arch:
- ppc64le
- amd64
go:
- 1.1
- 1.2
- 1.3
- 1.4
- 1.5
@ -12,3 +12,11 @@ go:
- 1.9
- "1.10"
- tip
jobs:
exclude :
- arch : ppc64le
go :
- 1.3
- arch : ppc64le
go :
- 1.4

View file

@ -25,7 +25,7 @@ Generate ASCII table on the fly ... Installation is simple as
- Set custom footer support
- Optional identical cells merging
- Set custom caption
- Optional reflowing of paragrpahs in multi-line cells.
- Optional reflowing of paragraphs in multi-line cells.
#### Example 1 - Basic
```go
@ -197,6 +197,41 @@ table.Render()
+----------+--------------------------+-------+---------+
```
#### Example 7 - Identical cells merging (specify the column index to merge)
```go
data := [][]string{
[]string{"1/1/2014", "Domain name", "1234", "$10.98"},
[]string{"1/1/2014", "January Hosting", "1234", "$10.98"},
[]string{"1/4/2014", "February Hosting", "3456", "$51.00"},
[]string{"1/4/2014", "February Extra Bandwidth", "4567", "$30.00"},
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"Date", "Description", "CV2", "Amount"})
table.SetFooter([]string{"", "", "Total", "$146.93"})
table.SetAutoMergeCellsByColumnIndex([]int{2, 3})
table.SetRowLine(true)
table.AppendBulk(data)
table.Render()
```
##### Output 7
```
+----------+--------------------------+-------+---------+
| DATE | DESCRIPTION | CV2 | AMOUNT |
+----------+--------------------------+-------+---------+
| 1/1/2014 | Domain name | 1234 | $10.98 |
+----------+--------------------------+ + +
| 1/1/2014 | January Hosting | | |
+----------+--------------------------+-------+---------+
| 1/4/2014 | February Hosting | 3456 | $51.00 |
+----------+--------------------------+-------+---------+
| 1/4/2014 | February Extra Bandwidth | 4567 | $30.00 |
+----------+--------------------------+-------+---------+
| TOTAL | $146.93 |
+----------+--------------------------+-------+---------+
```
#### Table with color
```go
@ -233,7 +268,7 @@ table.Render()
#### Table with color Output
![Table with Color](https://cloud.githubusercontent.com/assets/6460392/21101956/bbc7b356-c0a1-11e6-9f36-dba694746efc.png)
#### Example - 7 Table Cells with Color
#### Example - 8 Table Cells with Color
Individual Cell Colors from `func Rich` take precedence over Column Colors
@ -289,7 +324,7 @@ table.Render()
##### Table cells with color Output
![Table cells with Color](https://user-images.githubusercontent.com/9064687/63969376-bcd88d80-ca6f-11e9-9466-c3d954700b25.png)
#### Example 8 - Set table caption
#### Example 9 - Set table caption
```go
data := [][]string{
[]string{"A", "The Good", "500"},
@ -310,7 +345,7 @@ table.Render() // Send output
Note: Caption text will wrap with total width of rendered table.
##### Output 7
##### Output 9
```
+------+-----------------------+--------+
| NAME | SIGN | RATING |
@ -323,7 +358,7 @@ Note: Caption text will wrap with total width of rendered table.
Movie ratings.
```
#### Example 8 - Set NoWhiteSpace and TablePadding option
#### Example 10 - Set NoWhiteSpace and TablePadding option
```go
data := [][]string{
{"node1.example.com", "Ready", "compute", "1.11"},
@ -349,7 +384,7 @@ table.AppendBulk(data) // Add Bulk Data
table.Render()
```
##### Output 8
##### Output 10
```
NAME STATUS ROLE VERSION
node1.example.com Ready compute 1.11

View file

@ -2,4 +2,4 @@ module github.com/olekukonko/tablewriter
go 1.12
require github.com/mattn/go-runewidth v0.0.7
require github.com/mattn/go-runewidth v0.0.9

View file

@ -1,2 +1,2 @@
github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54=
github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=

View file

@ -72,6 +72,7 @@ type Table struct {
newLine string
rowLine bool
autoMergeCells bool
columnsToAutoMergeCells map[int]bool
noWhiteSpace bool
tablePadding string
hdrLine bool
@ -276,6 +277,21 @@ func (t *Table) SetAutoMergeCells(auto bool) {
t.autoMergeCells = auto
}
// Set Auto Merge Cells By Column Index
// This would enable / disable the merge of cells with identical values for specific columns
// If cols is empty, it is the same as `SetAutoMergeCells(true)`.
func (t *Table) SetAutoMergeCellsByColumnIndex(cols []int) {
t.autoMergeCells = true
if len(cols) > 0 {
m := make(map[int]bool)
for _, col := range cols {
m[col] = true
}
t.columnsToAutoMergeCells = m
}
}
// Set Table Border
// This would enable / disable line around the table
func (t *Table) SetBorder(border bool) {
@ -830,9 +846,19 @@ func (t *Table) printRowMergeCells(writer io.Writer, columns [][]string, rowIdx
}
if t.autoMergeCells {
var mergeCell bool
if t.columnsToAutoMergeCells != nil {
// Check to see if the column index is in columnsToAutoMergeCells.
if t.columnsToAutoMergeCells[y] {
mergeCell = true
}
} else {
// columnsToAutoMergeCells was not set.
mergeCell = true
}
//Store the full line to merge mutli-lines cells
fullLine := strings.TrimRight(strings.Join(columns[y], " "), " ")
if len(previousLine) > y && fullLine == previousLine[y] && fullLine != "" {
if len(previousLine) > y && fullLine == previousLine[y] && fullLine != "" && mergeCell {
// If this cell is identical to the one above but not empty, we don't display the border and keep the cell empty.
displayCellBorder = append(displayCellBorder, false)
str = ""

21
vendor/github.com/rivo/uniseg/LICENSE.txt generated vendored Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 Oliver Kuederle
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

62
vendor/github.com/rivo/uniseg/README.md generated vendored Normal file
View file

@ -0,0 +1,62 @@
# Unicode Text Segmentation for Go
[![Godoc Reference](https://img.shields.io/badge/godoc-reference-blue.svg)](https://godoc.org/github.com/rivo/uniseg)
[![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg)
This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](http://unicode.org/reports/tr29/) (Unicode version 12.0.0).
At this point, only the determination of grapheme cluster boundaries is implemented.
## Background
In Go, [strings are read-only slices of bytes](https://blog.golang.org/strings). They can be turned into Unicode code points using the `for` loop or by casting: `[]rune(str)`. However, multiple code points may be combined into one user-perceived character or what the Unicode specification calls "grapheme cluster". Here are some examples:
|String|Bytes (UTF-8)|Code points (runes)|Grapheme clusters|
|-|-|-|-|
|Käse|6 bytes: `4b 61 cc 88 73 65`|5 code points: `4b 61 308 73 65`|4 clusters: `[4b],[61 308],[73],[65]`|
|🏳️‍🌈|14 bytes: `f0 9f 8f b3 ef b8 8f e2 80 8d f0 9f 8c 88`|4 code points: `1f3f3 fe0f 200d 1f308`|1 cluster: `[1f3f3 fe0f 200d 1f308]`|
|🇩🇪|8 bytes: `f0 9f 87 a9 f0 9f 87 aa`|2 code points: `1f1e9 1f1ea`|1 cluster: `[1f1e9 1f1ea]`|
This package provides a tool to iterate over these grapheme clusters. This may be used to determine the number of user-perceived characters, to split strings in their intended places, or to extract individual characters which form a unit.
## Installation
```bash
go get github.com/rivo/uniseg
```
## Basic Example
```go
package uniseg
import (
"fmt"
"github.com/rivo/uniseg"
)
func main() {
gr := uniseg.NewGraphemes("👍🏼!")
for gr.Next() {
fmt.Printf("%x ", gr.Runes())
}
// Output: [1f44d 1f3fc] [21]
}
```
## Documentation
Refer to https://godoc.org/github.com/rivo/uniseg for the package's documentation.
## Dependencies
This package does not depend on any packages outside the standard library.
## Your Feedback
Add your issue here on GitHub. Feel free to get in touch if you have any questions.
## Version
Version tags will be introduced once Golang modules are official. Consider this version 0.1.

8
vendor/github.com/rivo/uniseg/doc.go generated vendored Normal file
View file

@ -0,0 +1,8 @@
/*
Package uniseg implements Unicode Text Segmentation according to Unicode
Standard Annex #29 (http://unicode.org/reports/tr29/).
At this point, only the determination of grapheme cluster boundaries is
implemented.
*/
package uniseg

3
vendor/github.com/rivo/uniseg/go.mod generated vendored Normal file
View file

@ -0,0 +1,3 @@
module github.com/rivo/uniseg
go 1.12

268
vendor/github.com/rivo/uniseg/grapheme.go generated vendored Normal file
View file

@ -0,0 +1,268 @@
package uniseg
import "unicode/utf8"
// The states of the grapheme cluster parser.
const (
grAny = iota
grCR
grControlLF
grL
grLVV
grLVTT
grPrepend
grExtendedPictographic
grExtendedPictographicZWJ
grRIOdd
grRIEven
)
// The grapheme cluster parser's breaking instructions.
const (
grNoBoundary = iota
grBoundary
)
// The grapheme cluster parser's state transitions. Maps (state, property) to
// (new state, breaking instruction, rule number). The breaking instruction
// always refers to the boundary between the last and next code point.
//
// This map is queried as follows:
//
// 1. Find specific state + specific property. Stop if found.
// 2. Find specific state + any property.
// 3. Find any state + specific property.
// 4. If only (2) or (3) (but not both) was found, stop.
// 5. If both (2) and (3) were found, use state and breaking instruction from
// the transition with the lower rule number, prefer (3) if rule numbers
// are equal. Stop.
// 6. Assume grAny and grBoundary.
var grTransitions = map[[2]int][3]int{
// GB5
{grAny, prCR}: {grCR, grBoundary, 50},
{grAny, prLF}: {grControlLF, grBoundary, 50},
{grAny, prControl}: {grControlLF, grBoundary, 50},
// GB4
{grCR, prAny}: {grAny, grBoundary, 40},
{grControlLF, prAny}: {grAny, grBoundary, 40},
// GB3.
{grCR, prLF}: {grAny, grNoBoundary, 30},
// GB6.
{grAny, prL}: {grL, grBoundary, 9990},
{grL, prL}: {grL, grNoBoundary, 60},
{grL, prV}: {grLVV, grNoBoundary, 60},
{grL, prLV}: {grLVV, grNoBoundary, 60},
{grL, prLVT}: {grLVTT, grNoBoundary, 60},
// GB7.
{grAny, prLV}: {grLVV, grBoundary, 9990},
{grAny, prV}: {grLVV, grBoundary, 9990},
{grLVV, prV}: {grLVV, grNoBoundary, 70},
{grLVV, prT}: {grLVTT, grNoBoundary, 70},
// GB8.
{grAny, prLVT}: {grLVTT, grBoundary, 9990},
{grAny, prT}: {grLVTT, grBoundary, 9990},
{grLVTT, prT}: {grLVTT, grNoBoundary, 80},
// GB9.
{grAny, prExtend}: {grAny, grNoBoundary, 90},
{grAny, prZWJ}: {grAny, grNoBoundary, 90},
// GB9a.
{grAny, prSpacingMark}: {grAny, grNoBoundary, 91},
// GB9b.
{grAny, prPreprend}: {grPrepend, grBoundary, 9990},
{grPrepend, prAny}: {grAny, grNoBoundary, 92},
// GB11.
{grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990},
{grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110},
{grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110},
{grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110},
// GB12 / GB13.
{grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990},
{grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120},
{grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120},
}
// Graphemes implements an iterator over Unicode extended grapheme clusters,
// specified in the Unicode Standard Annex #29. Grapheme clusters correspond to
// "user-perceived characters". These characters often consist of multiple
// code points (e.g. the "woman kissing woman" emoji consists of 8 code points:
// woman + ZWJ + heavy black heart (2 code points) + ZWJ + kiss mark + ZWJ +
// woman) and the rules described in Annex #29 must be applied to group those
// code points into clusters perceived by the user as one character.
type Graphemes struct {
// The code points over which this class iterates.
codePoints []rune
// The (byte-based) indices of the code points into the original string plus
// len(original string). Thus, len(indices) = len(codePoints) + 1.
indices []int
// The current grapheme cluster to be returned. These are indices into
// codePoints/indices. If start == end, we either haven't started iterating
// yet (0) or the iteration has already completed (1).
start, end int
// The index of the next code point to be parsed.
pos int
// The current state of the code point parser.
state int
}
// NewGraphemes returns a new grapheme cluster iterator.
func NewGraphemes(s string) *Graphemes {
l := utf8.RuneCountInString(s)
codePoints := make([]rune, l)
indices := make([]int, l+1)
i := 0
for pos, r := range s {
codePoints[i] = r
indices[i] = pos
i++
}
indices[l] = len(s)
g := &Graphemes{
codePoints: codePoints,
indices: indices,
}
g.Next() // Parse ahead.
return g
}
// Next advances the iterator by one grapheme cluster and returns false if no
// clusters are left. This function must be called before the first cluster is
// accessed.
func (g *Graphemes) Next() bool {
g.start = g.end
// The state transition gives us a boundary instruction BEFORE the next code
// point so we always need to stay ahead by one code point.
// Parse the next code point.
for g.pos <= len(g.codePoints) {
// GB2.
if g.pos == len(g.codePoints) {
g.end = g.pos
g.pos++
break
}
// Determine the property of the next character.
nextProperty := property(g.codePoints[g.pos])
g.pos++
// Find the applicable transition.
var boundary bool
transition, ok := grTransitions[[2]int{g.state, nextProperty}]
if ok {
// We have a specific transition. We'll use it.
g.state = transition[0]
boundary = transition[1] == grBoundary
} else {
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := grTransitions[[2]int{g.state, prAny}]
transAnyState, okAnyState := grTransitions[[2]int{grAny, nextProperty}]
if okAnyProp && okAnyState {
// Both apply. We'll use a mix (see comments for grTransitions).
g.state = transAnyState[0]
boundary = transAnyState[1] == grBoundary
if transAnyProp[2] < transAnyState[2] {
g.state = transAnyProp[0]
boundary = transAnyProp[1] == grBoundary
}
} else if okAnyProp {
// We only have a specific state.
g.state = transAnyProp[0]
boundary = transAnyProp[1] == grBoundary
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if okAnyState {
// We only have a specific property.
g.state = transAnyState[0]
boundary = transAnyState[1] == grBoundary
} else {
// No known transition. GB999: Any x Any.
g.state = grAny
boundary = true
}
}
// If we found a cluster boundary, let's stop here. The current cluster will
// be the one that just ended.
if g.pos-1 == 0 /* GB1 */ || boundary {
g.end = g.pos - 1
break
}
}
return g.start != g.end
}
// Runes returns a slice of runes (code points) which corresponds to the current
// grapheme cluster. If the iterator is already past the end or Next() has not
// yet been called, nil is returned.
func (g *Graphemes) Runes() []rune {
if g.start == g.end {
return nil
}
return g.codePoints[g.start:g.end]
}
// Str returns a substring of the original string which corresponds to the
// current grapheme cluster. If the iterator is already past the end or Next()
// has not yet been called, an empty string is returned.
func (g *Graphemes) Str() string {
if g.start == g.end {
return ""
}
return string(g.codePoints[g.start:g.end])
}
// Bytes returns a byte slice which corresponds to the current grapheme cluster.
// If the iterator is already past the end or Next() has not yet been called,
// nil is returned.
func (g *Graphemes) Bytes() []byte {
if g.start == g.end {
return nil
}
return []byte(string(g.codePoints[g.start:g.end]))
}
// Positions returns the interval of the current grapheme cluster as byte
// positions into the original string. The first returned value "from" indexes
// the first byte and the second returned value "to" indexes the first byte that
// is not included anymore, i.e. str[from:to] is the current grapheme cluster of
// the original string "str". If Next() has not yet been called, both values are
// 0. If the iterator is already past the end, both values are 1.
func (g *Graphemes) Positions() (int, int) {
return g.indices[g.start], g.indices[g.end]
}
// Reset puts the iterator into its initial state such that the next call to
// Next() sets it to the first grapheme cluster again.
func (g *Graphemes) Reset() {
g.start, g.end, g.pos, g.state = 0, 0, 0, grAny
g.Next() // Parse ahead again.
}
// GraphemeClusterCount returns the number of user-perceived characters
// (grapheme clusters) for the given string. To calculate this number, it
// iterates through the string using the Graphemes iterator.
func GraphemeClusterCount(s string) (n int) {
g := NewGraphemes(s)
for g.Next() {
n++
}
return
}

1658
vendor/github.com/rivo/uniseg/properties.go generated vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -663,6 +663,24 @@ func inHeadIM(p *parser) bool {
// Ignore the token.
return true
case a.Template:
// TODO: remove this divergence from the HTML5 spec.
//
// We don't handle all of the corner cases when mixing foreign
// content (i.e. <math> or <svg>) with <template>. Without this
// early return, we can get into an infinite loop, possibly because
// of the "TODO... further divergence" a little below.
//
// As a workaround, if we are mixing foreign content and templates,
// just ignore the rest of the HTML. Foreign content is rare and a
// relatively old HTML feature. Templates are also rare and a
// relatively new HTML feature. Their combination is very rare.
for _, e := range p.oe {
if e.Namespace != "" {
p.im = ignoreTheRemainingTokens
return true
}
}
p.addElement()
p.afe = append(p.afe, &scopeMarker)
p.framesetOK = false
@ -683,7 +701,7 @@ func inHeadIM(p *parser) bool {
if !p.oe.contains(a.Template) {
return true
}
// TODO: remove this divergence from the HTML5 spec.
// TODO: remove this further divergence from the HTML5 spec.
//
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
p.generateImpliedEndTags()
@ -728,7 +746,13 @@ func inHeadNoscriptIM(p *parser) bool {
return inBodyIM(p)
case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
return inHeadIM(p)
case a.Head, a.Noscript:
case a.Head:
// Ignore the token.
return true
case a.Noscript:
// Don't let the tokenizer go into raw text mode even when a <noscript>
// tag is in "in head noscript" insertion mode.
p.tokenizer.NextIsNotRawText()
// Ignore the token.
return true
}
@ -1790,6 +1814,13 @@ func inSelectIM(p *parser) bool {
return true
case a.Script, a.Template:
return inHeadIM(p)
case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
// Don't let the tokenizer go into raw text mode when there are raw tags
// to be ignored. These tags should be ignored from the tokenizer
// properly.
p.tokenizer.NextIsNotRawText()
// Ignore the token.
return true
}
case EndTagToken:
switch p.tok.DataAtom {
@ -2114,6 +2145,10 @@ func afterAfterFramesetIM(p *parser) bool {
return true
}
func ignoreTheRemainingTokens(p *parser) bool {
return true
}
const whitespaceOrNUL = whitespace + "\x00"
// Section 12.2.6.5

View file

@ -57,7 +57,7 @@ loop:
err = transform.ErrShortSrc
break loop
}
r = utf8.RuneError
r, size = utf8.RuneError, 1
goto write
}
size = 2

View file

@ -802,16 +802,16 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
0x03a0010b, 0x03a00115, 0x03a00117, 0x03a0011c,
0x03a00120, 0x03a00128, 0x03a0015e, 0x04000000,
0x04300000, 0x04300099, 0x04400000, 0x0440012f,
0x04800000, 0x0480006e, 0x05800000, 0x0581f000,
0x0581f032, 0x05857000, 0x05857032, 0x05e00000,
0x04800000, 0x0480006e, 0x05800000, 0x05820000,
0x05820032, 0x0585a000, 0x0585a032, 0x05e00000,
0x05e00052, 0x07100000, 0x07100047, 0x07500000,
0x07500162, 0x07900000, 0x0790012f, 0x07e00000,
0x07e00038, 0x08200000, 0x0a000000, 0x0a0000c3,
// Entry 40 - 5F
0x0a500000, 0x0a500035, 0x0a500099, 0x0a900000,
0x0a900053, 0x0a900099, 0x0b200000, 0x0b200078,
0x0b500000, 0x0b500099, 0x0b700000, 0x0b71f000,
0x0b71f033, 0x0b757000, 0x0b757033, 0x0d700000,
0x0b500000, 0x0b500099, 0x0b700000, 0x0b720000,
0x0b720033, 0x0b75a000, 0x0b75a033, 0x0d700000,
0x0d700022, 0x0d70006e, 0x0d700078, 0x0d70009e,
0x0db00000, 0x0db00035, 0x0db00099, 0x0dc00000,
0x0dc00106, 0x0df00000, 0x0df00131, 0x0e500000,
@ -947,7 +947,7 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
0x38900000, 0x38900131, 0x39000000, 0x3900006f,
0x390000a4, 0x39500000, 0x39500099, 0x39800000,
0x3980007d, 0x39800106, 0x39d00000, 0x39d05000,
0x39d050e8, 0x39d33000, 0x39d33099, 0x3a100000,
0x39d050e8, 0x39d36000, 0x39d36099, 0x3a100000,
0x3b300000, 0x3b3000e9, 0x3bd00000, 0x3bd00001,
0x3be00000, 0x3be00024, 0x3c000000, 0x3c00002a,
0x3c000041, 0x3c00004e, 0x3c00005a, 0x3c000086,
@ -966,7 +966,7 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
0x3fd00000, 0x3fd00072, 0x3fd000da, 0x3fd0010c,
0x3ff00000, 0x3ff000d1, 0x40100000, 0x401000c3,
0x40200000, 0x4020004c, 0x40700000, 0x40800000,
0x40857000, 0x408570ba, 0x408dc000, 0x408dc0ba,
0x4085a000, 0x4085a0ba, 0x408e3000, 0x408e30ba,
0x40c00000, 0x40c000b3, 0x41200000, 0x41200111,
0x41600000, 0x4160010f, 0x41c00000, 0x41d00000,
// Entry 280 - 29F
@ -974,9 +974,9 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
0x42300000, 0x42300164, 0x42900000, 0x42900062,
0x4290006f, 0x429000a4, 0x42900115, 0x43100000,
0x43100027, 0x431000c2, 0x4310014d, 0x43200000,
0x4321f000, 0x4321f033, 0x4321f0bd, 0x4321f105,
0x4321f14d, 0x43257000, 0x43257033, 0x432570bd,
0x43257105, 0x4325714d, 0x43700000, 0x43a00000,
0x43220000, 0x43220033, 0x432200bd, 0x43220105,
0x4322014d, 0x4325a000, 0x4325a033, 0x4325a0bd,
0x4325a105, 0x4325a14d, 0x43700000, 0x43a00000,
0x43b00000, 0x44400000, 0x44400031, 0x44400072,
// Entry 2A0 - 2BF
0x4440010c, 0x44500000, 0x4450004b, 0x445000a4,
@ -992,24 +992,24 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
0x49400106, 0x4a400000, 0x4a4000d4, 0x4a900000,
0x4a9000ba, 0x4ac00000, 0x4ac00053, 0x4ae00000,
0x4ae00130, 0x4b400000, 0x4b400099, 0x4b4000e8,
0x4bc00000, 0x4bc05000, 0x4bc05024, 0x4bc1f000,
0x4bc1f137, 0x4bc57000, 0x4bc57137, 0x4be00000,
0x4be57000, 0x4be570b4, 0x4bee3000, 0x4bee30b4,
0x4bc00000, 0x4bc05000, 0x4bc05024, 0x4bc20000,
0x4bc20137, 0x4bc5a000, 0x4bc5a137, 0x4be00000,
0x4be5a000, 0x4be5a0b4, 0x4beeb000, 0x4beeb0b4,
0x4c000000, 0x4c300000, 0x4c30013e, 0x4c900000,
// Entry 2E0 - 2FF
0x4c900001, 0x4cc00000, 0x4cc0012f, 0x4ce00000,
0x4cf00000, 0x4cf0004e, 0x4e500000, 0x4e500114,
0x4f200000, 0x4fb00000, 0x4fb00131, 0x50900000,
0x50900052, 0x51200000, 0x51200001, 0x51800000,
0x5180003b, 0x518000d6, 0x51f00000, 0x51f38000,
0x51f38053, 0x51f39000, 0x51f3908d, 0x52800000,
0x528000ba, 0x52900000, 0x52938000, 0x52938053,
0x5293808d, 0x529380c6, 0x5293810d, 0x52939000,
0x5180003b, 0x518000d6, 0x51f00000, 0x51f3b000,
0x51f3b053, 0x51f3c000, 0x51f3c08d, 0x52800000,
0x528000ba, 0x52900000, 0x5293b000, 0x5293b053,
0x5293b08d, 0x5293b0c6, 0x5293b10d, 0x5293c000,
// Entry 300 - 31F
0x5293908d, 0x529390c6, 0x5293912e, 0x52f00000,
0x5293c08d, 0x5293c0c6, 0x5293c12e, 0x52f00000,
0x52f00161,
} // Size: 3116 bytes
const specialTagsStr string = "ca-ES-valencia en-US-u-va-posix"
// Total table size 3147 bytes (3KiB); checksum: F4E57D15
// Total table size 3147 bytes (3KiB); checksum: BE816D44

View file

@ -251,6 +251,13 @@ func (t Tag) Parent() Tag {
// ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (ext string, err error) {
defer func() {
if recover() != nil {
ext = ""
err = ErrSyntax
}
}()
scan := makeScannerString(s)
var end int
if n := len(scan.token); n != 1 {
@ -303,9 +310,17 @@ func (t Tag) Extensions() []string {
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
//
// If there are multiple types associated with a key, only the first will be
// returned. If there is no type associated with a key, it returns the empty
// string.
func (t Tag) TypeForKey(key string) string {
if start, end, _ := t.findTypeForKey(key); end != start {
return t.str[start:end]
if _, start, end, _ := t.findTypeForKey(key); end != start {
s := t.str[start:end]
if p := strings.IndexByte(s, '-'); p >= 0 {
s = s[:p]
}
return s
}
return ""
}
@ -329,13 +344,13 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
// Remove the setting if value is "".
if value == "" {
start, end, _ := t.findTypeForKey(key)
if start != end {
// Remove key tag and leading '-'.
start -= 4
start, sep, end, _ := t.findTypeForKey(key)
if start != sep {
// Remove a possible empty extension.
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
switch {
case t.str[start-2] != '-': // has previous elements.
case end == len(t.str), // end of string
end+2 < len(t.str) && t.str[end+2] == '-': // end of extension
start -= 2
}
if start == int(t.pVariant) && end == len(t.str) {
@ -381,14 +396,14 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
t.str = string(buf[:uStart+len(b)])
} else {
s := t.str
start, end, hasExt := t.findTypeForKey(key)
if start == end {
start, sep, end, hasExt := t.findTypeForKey(key)
if start == sep {
if hasExt {
b = b[2:]
}
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
t.str = fmt.Sprintf("%s-%s%s", s[:sep], b, s[end:])
} else {
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
t.str = fmt.Sprintf("%s-%s%s", s[:start+3], value, s[end:])
}
}
return t, nil
@ -399,10 +414,10 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
// wasn't found. The hasExt return value reports whether an -u extension was present.
// Note: the extensions are typically very small and are likely to contain
// only one key-type pair.
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) {
p := int(t.pExt)
if len(key) != 2 || p == len(t.str) || p == 0 {
return p, p, false
return p, p, p, false
}
s := t.str
@ -410,10 +425,10 @@ func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
for p++; s[p] != 'u'; p++ {
if s[p] > 'u' {
p--
return p, p, false
return p, p, p, false
}
if p = nextExtension(s, p); p == len(s) {
return len(s), len(s), false
return len(s), len(s), len(s), false
}
}
// Proceed to the hyphen following the extension name.
@ -424,40 +439,28 @@ func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
// Iterate over keys until we get the end of a section.
for {
// p points to the hyphen preceding the current token.
if p3 := p + 3; s[p3] == '-' {
// Found a key.
// Check whether we just processed the key that was requested.
if curKey == key {
return start, p, true
end = p
for p++; p < len(s) && s[p] != '-'; p++ {
}
// Set to the next key and continue scanning type tokens.
curKey = s[p+1 : p3]
n := p - end - 1
if n <= 2 && curKey == key {
if sep < end {
sep++
}
return start, sep, end, true
}
switch n {
case 0, // invalid string
1: // next extension
return end, end, end, true
case 2:
// next key
curKey = s[end+1 : p]
if curKey > key {
return p, p, true
return end, end, end, true
}
// Start of the type token sequence.
start = p + 4
// A type is at least 3 characters long.
p += 7 // 4 + 3
} else {
// Attribute or type, which is at least 3 characters long.
p += 4
}
// p points past the third character of a type or attribute.
max := p + 5 // maximum length of token plus hyphen.
if len(s) < max {
max = len(s)
}
for ; p < max && s[p] != '-'; p++ {
}
// Bail if we have exhausted all tokens or if the next token starts
// a new extension.
if p == len(s) || s[p+2] == '-' {
if curKey == key {
return start, p, true
}
return p, p, true
start = end
sep = p
}
}
}
@ -465,7 +468,14 @@ func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
func ParseBase(s string) (Language, error) {
func ParseBase(s string) (l Language, err error) {
defer func() {
if recover() != nil {
l = 0
err = ErrSyntax
}
}()
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
@ -476,7 +486,14 @@ func ParseBase(s string) (Language, error) {
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
func ParseScript(s string) (Script, error) {
func ParseScript(s string) (scr Script, err error) {
defer func() {
if recover() != nil {
scr = 0
err = ErrSyntax
}
}()
if len(s) != 4 {
return 0, ErrSyntax
}
@ -493,7 +510,14 @@ func EncodeM49(r int) (Region, error) {
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
func ParseRegion(s string) (Region, error) {
func ParseRegion(s string) (r Region, err error) {
defer func() {
if recover() != nil {
r = 0
err = ErrSyntax
}
}()
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
@ -582,7 +606,14 @@ type Variant struct {
// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (Variant, error) {
func ParseVariant(s string) (v Variant, err error) {
defer func() {
if recover() != nil {
v = Variant{}
err = ErrSyntax
}
}()
s = strings.ToLower(s)
if id, ok := variantIndex[s]; ok {
return Variant{id, s}, nil

View file

@ -133,14 +133,15 @@ func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
s.start = oldStart
if end := oldStart + newSize; end != oldEnd {
diff := end - oldEnd
if end < cap(s.b) {
b := make([]byte, len(s.b)+diff)
var b []byte
if n := len(s.b) + diff; n > cap(s.b) {
b = make([]byte, n)
copy(b, s.b[:oldStart])
} else {
b = s.b[:n]
}
copy(b[end:], s.b[oldEnd:])
s.b = b
} else {
s.b = append(s.b[end:], s.b[oldEnd:]...)
}
s.next = end + (s.next - s.end)
s.end = end
}
@ -231,6 +232,13 @@ func Parse(s string) (t Tag, err error) {
if s == "" {
return Und, ErrSyntax
}
defer func() {
if recover() != nil {
t = Und
err = ErrSyntax
return
}
}()
if len(s) <= maxAltTaglen {
b := [maxAltTaglen]byte{}
for i, c := range s {
@ -482,7 +490,7 @@ func parseExtensions(scan *scanner) int {
func parseExtension(scan *scanner) int {
start, end := scan.start, scan.end
switch scan.token[0] {
case 'u':
case 'u': // https://www.ietf.org/rfc/rfc6067.txt
attrStart := end
scan.scan()
for last := []byte{}; len(scan.token) > 2; scan.scan() {
@ -502,27 +510,29 @@ func parseExtension(scan *scanner) int {
last = scan.token
end = scan.end
}
// Scan key-type sequences. A key is of length 2 and may be followed
// by 0 or more "type" subtags from 3 to the maximum of 8 letters.
var last, key []byte
for attrEnd := end; len(scan.token) == 2; last = key {
key = scan.token
keyEnd := scan.end
end = scan.acceptMinSize(3)
end = scan.end
for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
end = scan.end
}
// TODO: check key value validity
if keyEnd == end || bytes.Compare(key, last) != 1 {
if bytes.Compare(key, last) != 1 || scan.err != nil {
// We have an invalid key or the keys are not sorted.
// Start scanning keys from scratch and reorder.
p := attrEnd + 1
scan.next = p
keys := [][]byte{}
for scan.scan(); len(scan.token) == 2; {
keyStart, keyEnd := scan.start, scan.end
end = scan.acceptMinSize(3)
if keyEnd != end {
keys = append(keys, scan.b[keyStart:end])
} else {
scan.setError(ErrSyntax)
end = keyStart
keyStart := scan.start
end = scan.end
for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
end = scan.end
}
keys = append(keys, scan.b[keyStart:end])
}
sort.Stable(bytesSort{keys, 2})
if n := len(keys); n > 0 {
@ -546,7 +556,7 @@ func parseExtension(scan *scanner) int {
break
}
}
case 't':
case 't': // https://www.ietf.org/rfc/rfc6497.txt
scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan)

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.2
// +build !go1.2
package language

View file

@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.2
// +build go1.2
package language

View file

@ -412,6 +412,10 @@ func (t Tag) Extensions() []Extension {
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
//
// If there are multiple types associated with a key, only the first will be
// returned. If there is no type associated with a key, it returns the empty
// string.
func (t Tag) TypeForKey(key string) string {
if !compact.Tag(t).MayHaveExtensions() {
if key != "rg" && key != "va" {

View file

@ -43,6 +43,13 @@ func Parse(s string) (t Tag, err error) {
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the canonicalization type c.
func (c CanonType) Parse(s string) (t Tag, err error) {
defer func() {
if recover() != nil {
t = Tag{}
err = language.ErrSyntax
}
}()
tt, err := language.Parse(s)
if err != nil {
return makeTag(tt), err
@ -79,6 +86,13 @@ func Compose(part ...interface{}) (t Tag, err error) {
// tag is returned after canonicalizing using CanonType c. If one or more errors
// are encountered, one of the errors is returned.
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
defer func() {
if recover() != nil {
t = Tag{}
err = language.ErrSyntax
}
}()
var b language.Builder
if err = update(&b, part...); err != nil {
return und, err
@ -142,6 +156,14 @@ var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
// Tags with a weight of zero will be dropped. An error will be returned if the
// input could not be parsed.
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
defer func() {
if recover() != nil {
tag = nil
q = nil
err = language.ErrSyntax
}
}()
var entry string
for s != "" {
if entry, s = split(s, ','); entry == "" {

View file

@ -35,19 +35,19 @@ const (
_XK = 333
)
const (
_Latn = 87
_Hani = 54
_Hans = 56
_Hant = 57
_Qaaa = 139
_Qaai = 147
_Qabx = 188
_Zinh = 236
_Zyyy = 241
_Zzzz = 242
_Latn = 90
_Hani = 57
_Hans = 59
_Hant = 60
_Qaaa = 143
_Qaai = 151
_Qabx = 192
_Zinh = 245
_Zyyy = 250
_Zzzz = 251
)
var regionToGroups = []uint8{ // 357 elements
var regionToGroups = []uint8{ // 358 elements
// Entry 0 - 3F
0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
@ -98,8 +98,8 @@ var regionToGroups = []uint8{ // 357 elements
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00,
} // Size: 381 bytes
0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
} // Size: 382 bytes
var paradigmLocales = [][3]uint16{ // 3 elements
0: [3]uint16{0x139, 0x0, 0x7b},
@ -249,32 +249,32 @@ var matchLang = []mutualIntelligibility{ // 113 elements
// matchScript holds pairs of scriptIDs where readers of one script
// can typically also read the other. Each is associated with a confidence.
var matchScript = []scriptIntelligibility{ // 26 elements
0: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x57, haveScript: 0x1f, distance: 0x5},
1: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x1f, haveScript: 0x57, distance: 0x5},
2: {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
3: {wantLang: 0xa5, haveLang: 0x139, wantScript: 0xe, haveScript: 0x57, distance: 0xa},
4: {wantLang: 0x1d7, haveLang: 0x3e2, wantScript: 0x8, haveScript: 0x1f, distance: 0xa},
5: {wantLang: 0x210, haveLang: 0x139, wantScript: 0x2b, haveScript: 0x57, distance: 0xa},
6: {wantLang: 0x24a, haveLang: 0x139, wantScript: 0x4b, haveScript: 0x57, distance: 0xa},
7: {wantLang: 0x251, haveLang: 0x139, wantScript: 0x4f, haveScript: 0x57, distance: 0xa},
8: {wantLang: 0x2b8, haveLang: 0x139, wantScript: 0x54, haveScript: 0x57, distance: 0xa},
9: {wantLang: 0x304, haveLang: 0x139, wantScript: 0x6b, haveScript: 0x57, distance: 0xa},
10: {wantLang: 0x331, haveLang: 0x139, wantScript: 0x72, haveScript: 0x57, distance: 0xa},
11: {wantLang: 0x351, haveLang: 0x139, wantScript: 0x21, haveScript: 0x57, distance: 0xa},
12: {wantLang: 0x395, haveLang: 0x139, wantScript: 0x7d, haveScript: 0x57, distance: 0xa},
13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x33, haveScript: 0x57, distance: 0xa},
14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xca, haveScript: 0x57, distance: 0xa},
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xd7, haveScript: 0x57, distance: 0xa},
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xda, haveScript: 0x57, distance: 0xa},
19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x29, haveScript: 0x57, distance: 0xa},
20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
22: {wantLang: 0x4bc, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3b, haveScript: 0x57, distance: 0xa},
24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x38, haveScript: 0x39, distance: 0xf},
25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x39, haveScript: 0x38, distance: 0x13},
0: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x5a, haveScript: 0x20, distance: 0x5},
1: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x20, haveScript: 0x5a, distance: 0x5},
2: {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x5a, haveScript: 0x20, distance: 0xa},
3: {wantLang: 0xa5, haveLang: 0x139, wantScript: 0xe, haveScript: 0x5a, distance: 0xa},
4: {wantLang: 0x1d7, haveLang: 0x3e2, wantScript: 0x8, haveScript: 0x20, distance: 0xa},
5: {wantLang: 0x210, haveLang: 0x139, wantScript: 0x2e, haveScript: 0x5a, distance: 0xa},
6: {wantLang: 0x24a, haveLang: 0x139, wantScript: 0x4e, haveScript: 0x5a, distance: 0xa},
7: {wantLang: 0x251, haveLang: 0x139, wantScript: 0x52, haveScript: 0x5a, distance: 0xa},
8: {wantLang: 0x2b8, haveLang: 0x139, wantScript: 0x57, haveScript: 0x5a, distance: 0xa},
9: {wantLang: 0x304, haveLang: 0x139, wantScript: 0x6e, haveScript: 0x5a, distance: 0xa},
10: {wantLang: 0x331, haveLang: 0x139, wantScript: 0x75, haveScript: 0x5a, distance: 0xa},
11: {wantLang: 0x351, haveLang: 0x139, wantScript: 0x22, haveScript: 0x5a, distance: 0xa},
12: {wantLang: 0x395, haveLang: 0x139, wantScript: 0x81, haveScript: 0x5a, distance: 0xa},
13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5a, distance: 0xa},
14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xcf, haveScript: 0x5a, distance: 0xa},
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xde, haveScript: 0x5a, distance: 0xa},
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe1, haveScript: 0x5a, distance: 0xa},
19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5a, distance: 0xa},
20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5a, haveScript: 0x20, distance: 0xa},
21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
22: {wantLang: 0x4bc, haveLang: 0x3e2, wantScript: 0x5a, haveScript: 0x20, distance: 0xa},
23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3e, haveScript: 0x5a, distance: 0xa},
24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x3b, haveScript: 0x3c, distance: 0xf},
25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x3c, haveScript: 0x3b, distance: 0x13},
} // Size: 232 bytes
var matchRegion = []regionIntelligibility{ // 15 elements
@ -286,13 +286,13 @@ var matchRegion = []regionIntelligibility{ // 15 elements
5: {lang: 0x13e, script: 0x0, group: 0x83, distance: 0x4},
6: {lang: 0x3c0, script: 0x0, group: 0x3, distance: 0x4},
7: {lang: 0x3c0, script: 0x0, group: 0x83, distance: 0x4},
8: {lang: 0x529, script: 0x39, group: 0x2, distance: 0x4},
9: {lang: 0x529, script: 0x39, group: 0x82, distance: 0x4},
8: {lang: 0x529, script: 0x3c, group: 0x2, distance: 0x4},
9: {lang: 0x529, script: 0x3c, group: 0x82, distance: 0x4},
10: {lang: 0x3a, script: 0x0, group: 0x80, distance: 0x5},
11: {lang: 0x139, script: 0x0, group: 0x80, distance: 0x5},
12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
14: {lang: 0x529, script: 0x39, group: 0x80, distance: 0x5},
14: {lang: 0x529, script: 0x3c, group: 0x80, distance: 0x5},
} // Size: 114 bytes
// Total table size 1471 bytes (1KiB); checksum: 4CB1CD46
// Total table size 1472 bytes (1KiB); checksum: F86C669

30
vendor/modules.txt vendored
View file

@ -1,24 +1,24 @@
# github.com/PuerkitoBio/goquery v1.6.0
# github.com/PuerkitoBio/goquery v1.8.0
## explicit
github.com/PuerkitoBio/goquery
# github.com/andybalholm/cascadia v1.2.0
## explicit
# github.com/andybalholm/cascadia v1.3.1
github.com/andybalholm/cascadia
# github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9
## explicit
github.com/chilts/sid
# github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7
# github.com/jaytaylor/html2text v0.0.0-20211105163654-bc68cce691ba
## explicit
github.com/jaytaylor/html2text
# github.com/json-iterator/go v1.1.10
# github.com/json-iterator/go v1.1.12
## explicit
github.com/json-iterator/go
# github.com/mattn/go-runewidth v0.0.9
# github.com/mattn/go-runewidth v0.0.13
## explicit
github.com/mattn/go-runewidth
# github.com/mattn/go-xmpp v0.0.0-20200309091041-899ef71e80d2
# github.com/mattn/go-xmpp v0.0.0-20211029151415-912ba614897a
## explicit
github.com/mattn/go-xmpp
# github.com/mmcdole/gofeed v1.1.0
# github.com/mmcdole/gofeed v1.1.3
## explicit
github.com/mmcdole/gofeed
github.com/mmcdole/gofeed/atom
@ -32,21 +32,25 @@ github.com/mmcdole/goxpp
# github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd
## explicit
github.com/modern-go/concurrent
# github.com/modern-go/reflect2 v1.0.1
## explicit
# github.com/modern-go/reflect2 v1.0.2
github.com/modern-go/reflect2
# github.com/olekukonko/tablewriter v0.0.4
# github.com/olekukonko/tablewriter v0.0.5
## explicit
github.com/olekukonko/tablewriter
# github.com/rivo/uniseg v0.2.0
github.com/rivo/uniseg
# github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf
## explicit
github.com/ssor/bom
# golang.org/x/net v0.0.0-20201024042810-be3efd7ff127
# golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9
## explicit
# golang.org/x/net v0.0.0-20211209124913-491a49abca63
## explicit
golang.org/x/net/html
golang.org/x/net/html/atom
golang.org/x/net/html/charset
# golang.org/x/text v0.3.3
# golang.org/x/text v0.3.7
## explicit
golang.org/x/text/encoding
golang.org/x/text/encoding/charmap
golang.org/x/text/encoding/htmlindex