Update vendored dependencies.

This commit is contained in:
Martin Dosch 2021-12-13 10:35:45 +01:00
parent 57eac03993
commit 92ffc395d8
66 changed files with 1439 additions and 840 deletions

18
go.mod
View file

@ -3,20 +3,18 @@ module salsa.debian.org/mdosch/feed-to-muc
go 1.14
require (
github.com/PuerkitoBio/goquery v1.6.1 // indirect
github.com/andybalholm/cascadia v1.2.0 // indirect
github.com/PuerkitoBio/goquery v1.8.0 // indirect
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9
github.com/mattn/go-runewidth v0.0.10 // indirect
github.com/mattn/go-xmpp v0.0.0-20210121082723-b40e1294994d
github.com/mmcdole/gofeed v1.1.0
github.com/json-iterator/go v1.1.12 // indirect
github.com/mattn/go-runewidth v0.0.13 // indirect
github.com/mattn/go-xmpp v0.0.0-20211029151415-912ba614897a
github.com/mmcdole/gofeed v1.1.3
github.com/mmcdole/goxpp v0.0.0-20200921145534-2f3784f67354 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.1 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
github.com/stretchr/testify v1.5.1 // indirect
golang.org/x/net v0.0.0-20210119194325-5f4716e94777 // indirect
golang.org/x/text v0.3.5 // indirect
jaytaylor.com/html2text v0.0.0-20200412013138-3577fbdbcff7
golang.org/x/net v0.0.0-20211209124913-491a49abca63 // indirect
golang.org/x/text v0.3.7 // indirect
jaytaylor.com/html2text v0.0.0-20211105163654-bc68cce691ba
)

52
go.sum
View file

@ -1,47 +1,40 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFGCWpk=
github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9 h1:z0uK8UQqjMVYzvk4tiiu3obv2B44+XBsvgEJREQfnO8=
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9/go.mod h1:Jl2neWsQaDanWORdqZ4emBl50J4/aRBBS4FyyG9/PFo=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.10 h1:CoZ3S2P7pvtP45xOtBw+/mDL2z0RKI576gSkzRRpdGg=
github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-xmpp v0.0.0-20210121082723-b40e1294994d h1:LrXbX6iVhQ3Z50hnhTdyP4K60jevMzk/x2TpMYtOJqg=
github.com/mattn/go-xmpp v0.0.0-20210121082723-b40e1294994d/go.mod h1:Cs5mF0OsrRRmhkyOod//ldNPOwJsrBvJ+1WRspv0xoc=
github.com/mmcdole/gofeed v1.1.0 h1:T2WrGLVJRV04PY2qwhEJLHCt9JiCtBhb6SmC8ZvJH08=
github.com/mmcdole/gofeed v1.1.0/go.mod h1:PPiVwgDXLlz2N83KB4TrIim2lyYM5Zn7ZWH9Pi4oHUk=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-xmpp v0.0.0-20211029151415-912ba614897a h1:BRuMO9LUDuGp6viOhrEbmuXNlvC78X5QdsnY9Wc+cqM=
github.com/mattn/go-xmpp v0.0.0-20211029151415-912ba614897a/go.mod h1:Cs5mF0OsrRRmhkyOod//ldNPOwJsrBvJ+1WRspv0xoc=
github.com/mmcdole/gofeed v1.1.3 h1:pdrvMb18jMSLidGp8j0pLvc9IGziX4vbmvVqmLH6z8o=
github.com/mmcdole/gofeed v1.1.3/go.mod h1:QQO3maftbOu+hiVOGOZDRLymqGQCos4zxbA4j89gMrE=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/mmcdole/goxpp v0.0.0-20200921145534-2f3784f67354 h1:Z6i7ND25ixRtXFBylIUggqpvLMV1I15yprcqMVB7WZA=
github.com/mmcdole/goxpp v0.0.0-20200921145534-2f3784f67354/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY=
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
@ -57,21 +50,22 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210119194325-5f4716e94777 h1:003p0dJM77cxMSyCPFphvZf/Y5/NXf5fzg6ufd1/Oew=
golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211209124913-491a49abca63 h1:iocB37TsdFuN6IBRZ+ry36wrkoV51/tl5vOWqkcPGvY=
golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5 h1:i6eZZ+zk0SOf0xgBpEpPD18qWcJda6q1sxt3S0kzyUQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
jaytaylor.com/html2text v0.0.0-20200412013138-3577fbdbcff7 h1:mub0MmFLOn8XLikZOAhgLD1kXJq8jgftSrrv7m00xFo=
jaytaylor.com/html2text v0.0.0-20200412013138-3577fbdbcff7/go.mod h1:OxvTsCwKosqQ1q7B+8FwXqg4rKZ/UG9dUW+g/VL2xH4=
jaytaylor.com/html2text v0.0.0-20211105163654-bc68cce691ba h1:3xhBI8FZepFq4YtdqlW6Z8YzdKM3nAV9xpOvgzWX+us=
jaytaylor.com/html2text v0.0.0-20211105163654-bc68cce691ba/go.mod h1:OxvTsCwKosqQ1q7B+8FwXqg4rKZ/UG9dUW+g/VL2xH4=

View file

@ -1,44 +0,0 @@
arch:
- amd64
- ppc64le
language: go
go:
- 1.2.x
- 1.3.x
- 1.4.x
- 1.5.x
- 1.6.x
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x
- 1.11.x
- 1.12.x
- 1.13.x
- tip
jobs:
exclude:
- arch: ppc64le
go: 1.2.x
- arch: ppc64le
go: 1.3.x
- arch: ppc64le
go: 1.4.x
- arch: ppc64le
go: 1.5.x
- arch: ppc64le
go: 1.6.x
- arch: ppc64le
go: 1.7.x
- arch: ppc64le
go: 1.8.x
- arch: ppc64le
go: 1.9.x
- arch: ppc64le
go: 1.10.x
- arch: ppc64le
go: 1.11.x
- arch: ppc64le
go: 1.12.x

View file

@ -1,4 +1,4 @@
Copyright (c) 2012-2016, Martin Angers & Contributors
Copyright (c) 2012-2021, Martin Angers & Contributors
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

View file

@ -1,5 +1,8 @@
# goquery - a little like that j-thing, only in Go
[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
[![Build Status](https://github.com/PuerkitoBio/goquery/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/PuerkitoBio/goquery/actions)
[![Go Reference](https://pkg.go.dev/badge/github.com/PuerkitoBio/goquery.svg)](https://pkg.go.dev/github.com/PuerkitoBio/goquery)
[![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off.
@ -19,7 +22,7 @@ Syntax-wise, it is as close as possible to jQuery, with the same function names
## Installation
Please note that because of the net/html dependency, goquery requires Go1.1+.
Please note that because of the net/html dependency, goquery requires Go1.1+ and is tested on Go1.7+.
$ go get github.com/PuerkitoBio/goquery
@ -37,6 +40,9 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
**Note that goquery's API is now stable, and will not break.**
* **2021-10-25 (v1.8.0)** : Add `Render` function to render a `Selection` to an `io.Writer` (thanks [@anthonygedeon](https://github.com/anthonygedeon)).
* **2021-07-11 (v1.7.1)** : Update go.mod dependencies and add dependabot config (thanks [@jauderho](https://github.com/jauderho)).
* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)).
* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes.
* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this.
* **2020-02-04 (v1.5.1)** : Update module dependencies.
@ -50,7 +56,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
* **2016-08-28 (v1.0.1)** : Optimize performance for large documents.
* **2016-07-27 (v1.0.0)** : Tag version 1.0.0.
* **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object.
* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see godoc for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see [doc][] for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr].
* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone].
* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone].
@ -79,7 +85,7 @@ jQuery often has many variants for the same function (no argument, a selector st
Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour).
The complete [godoc reference documentation can be found here][doc].
The complete [package reference documentation can be found here][doc].
Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string):
@ -123,11 +129,10 @@ func ExampleScrape() {
}
// Find the review items
doc.Find(".sidebar-reviews article .content-block").Each(func(i int, s *goquery.Selection) {
// For each item found, get the band and title
band := s.Find("a").Text()
title := s.Find("i").Text()
fmt.Printf("Review %d: %s - %s\n", i, band, title)
doc.Find(".left-content article .post-title").Each(func(i int, s *goquery.Selection) {
// For each item found, get the title
title := s.Find("a").Text()
fmt.Printf("Review %d: %s\n", i, title)
})
}
@ -161,8 +166,9 @@ There are a number of ways you can support the project:
* Pull requests: please discuss new code in an issue first, unless the fix is really trivial.
- Make sure new code is tested.
- Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue.
If you desperately want to send money my way, I have a BuyMeACoffee.com page:
* Sponsor the developer
- See the Github Sponsor button at the top of the repo on github
- or via BuyMeACoffee.com, below
<a href="https://www.buymeacoffee.com/mna" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a>
@ -177,10 +183,10 @@ The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia'
[bsd]: http://opensource.org/licenses/BSD-3-Clause
[golic]: http://golang.org/LICENSE
[caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE
[doc]: http://godoc.org/github.com/PuerkitoBio/goquery
[doc]: https://pkg.go.dev/github.com/PuerkitoBio/goquery
[index]: http://api.jquery.com/index/
[gonet]: https://github.com/golang/net/
[html]: http://godoc.org/golang.org/x/net/html
[html]: https://pkg.go.dev/golang.org/x/net/html
[wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks
[thatguystone]: https://github.com/thatguystone
[piotr]: https://github.com/piotrkowalczuk

View file

@ -1,8 +1,8 @@
module github.com/PuerkitoBio/goquery
require (
github.com/andybalholm/cascadia v1.1.0
golang.org/x/net v0.0.0-20200202094626-16171245cfb2
github.com/andybalholm/cascadia v1.3.1
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8
)
go 1.13

View file

@ -1,8 +1,9 @@
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

View file

@ -7,7 +7,6 @@ import (
"net/url"
"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)
@ -122,6 +121,45 @@ type Matcher interface {
Filter([]*html.Node) []*html.Node
}
// Single compiles a selector string to a Matcher that stops after the first
// match is found.
//
// By default, Selection.Find and other functions that accept a selector string
// to select nodes will use all matches corresponding to that selector. By
// using the Matcher returned by Single, at most the first match will be
// selected.
//
// For example, those two statements are semantically equivalent:
//
// sel1 := doc.Find("a").First()
// sel2 := doc.FindMatcher(goquery.Single("a"))
//
// The one using Single is optimized to be potentially much faster on large
// documents.
//
// Only the behaviour of the MatchAll method of the Matcher interface is
// altered compared to standard Matchers. This means that the single-selection
// property of the Matcher only applies for Selection methods where the Matcher
// is used to select nodes, not to filter or check if a node matches the
// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
// FilterMatcher(Single("div")) will still result in a Selection with multiple
// "div"s if there were many "div"s in the Selection to begin with).
func Single(selector string) Matcher {
return singleMatcher{compileMatcher(selector)}
}
// SingleMatcher returns a Matcher matches the same nodes as m, but that stops
// after the first match is found.
//
// See the documentation of function Single for more details.
func SingleMatcher(m Matcher) Matcher {
if _, ok := m.(singleMatcher); ok {
// m is already a singleMatcher
return m
}
return singleMatcher{m}
}
// compileMatcher compiles the selector string s and returns
// the corresponding Matcher. If s is an invalid selector string,
// it returns a Matcher that fails all matches.
@ -133,6 +171,30 @@ func compileMatcher(s string) Matcher {
return cs
}
type singleMatcher struct {
Matcher
}
func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
// Optimized version - stops finding at the first match (cascadia-compiled
// matchers all use this code path).
if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
node := mm.MatchFirst(n)
if node == nil {
return nil
}
return []*html.Node{node}
}
// Fallback version, for e.g. test mocks that don't provide the MatchFirst
// method.
nodes := m.Matcher.MatchAll(n)
if len(nodes) > 0 {
return nodes[:1:1]
}
return nil
}
// invalidMatcher is a Matcher that always fails to match.
type invalidMatcher struct{}

View file

@ -2,6 +2,7 @@ package goquery
import (
"bytes"
"io"
"golang.org/x/net/html"
)
@ -50,13 +51,24 @@ func nodeName(node *html.Node) string {
case html.ElementNode, html.DoctypeNode:
return node.Data
default:
if node.Type >= 0 && int(node.Type) < len(nodeNames) {
if int(node.Type) < len(nodeNames) {
return nodeNames[node.Type]
}
return ""
}
}
// Render renders the html of the first element from selector and writes it to
// the writer. It behaves the same as OuterHtml but writes to w instead of
// returning the string.
func Render(w io.Writer, s *Selection) error {
if s.Length() == 0 {
return nil
}
n := s.Get(0)
return html.Render(w, n)
}
// OuterHtml returns the outer HTML rendering of the first item in
// the selection - that is, the HTML including the first element's
// tag and attributes.
@ -66,12 +78,7 @@ func nodeName(node *html.Node) string {
// a property provided by the DOM).
func OuterHtml(s *Selection) (string, error) {
var buf bytes.Buffer
if s.Length() == 0 {
return "", nil
}
n := s.Get(0)
if err := html.Render(&buf, n); err != nil {
if err := Render(&buf, s); err != nil {
return "", err
}
return buf.String(), nil

View file

@ -1,5 +1,5 @@
module github.com/andybalholm/cascadia
require golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01
go 1.16
go 1.13
require golang.org/x/net v0.0.0-20210916014120-12bc252f5db8

7
vendor/github.com/andybalholm/cascadia/go.sum generated vendored Normal file
View file

@ -0,0 +1,7 @@
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

View file

@ -36,7 +36,7 @@ func (p *parser) parseEscape() (result string, err error) {
for i = start; i < start+6 && i < len(p.s) && hexDigit(p.s[i]); i++ {
// empty
}
v, _ := strconv.ParseUint(p.s[start:i], 16, 21)
v, _ := strconv.ParseUint(p.s[start:i], 16, 64)
if len(p.s) > i {
switch p.s[i] {
case '\r':
@ -409,6 +409,19 @@ func (p *parser) parseAttributeSelector() (attrSelector, error) {
if p.i >= len(p.s) {
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
// check if the attribute contains an ignore case flag
ignoreCase := false
if p.s[p.i] == 'i' || p.s[p.i] == 'I' {
ignoreCase = true
p.i++
}
p.skipWhitespace()
if p.i >= len(p.s) {
return attrSelector{}, errors.New("unexpected EOF in attribute selector")
}
if p.s[p.i] != ']' {
return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
}
@ -416,15 +429,17 @@ func (p *parser) parseAttributeSelector() (attrSelector, error) {
switch op {
case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=":
return attrSelector{key: key, val: val, operation: op, regexp: rx}, nil
return attrSelector{key: key, val: val, operation: op, regexp: rx, insensitive: ignoreCase}, nil
default:
return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op)
}
}
var errExpectedParenthesis = errors.New("expected '(' but didn't find it")
var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
var errUnmatchedParenthesis = errors.New("unmatched '('")
var (
errExpectedParenthesis = errors.New("expected '(' but didn't find it")
errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
errUnmatchedParenthesis = errors.New("unmatched '('")
)
// parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element
// For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements.
@ -552,6 +567,37 @@ func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err
out = emptyElementPseudoClassSelector{}
case "root":
out = rootPseudoClassSelector{}
case "link":
out = linkPseudoClassSelector{}
case "lang":
if !p.consumeParenthesis() {
return out, "", errExpectedParenthesis
}
if p.i == len(p.s) {
return out, "", errUnmatchedParenthesis
}
val, err := p.parseIdentifier()
if err != nil {
return out, "", err
}
val = strings.ToLower(val)
p.skipWhitespace()
if p.i >= len(p.s) {
return out, "", errors.New("unexpected EOF in pseudo selector")
}
if !p.consumeClosingParenthesis() {
return out, "", errExpectedClosingParenthesis
}
out = langPseudoClassSelector{lang: val}
case "enabled":
out = enabledPseudoClassSelector{}
case "disabled":
out = disabledPseudoClassSelector{}
case "checked":
out = checkedPseudoClassSelector{}
case "visited", "hover", "active", "focus", "target":
// Not applicable in a static context: never match.
out = neverMatchSelector{value: ":" + name}
case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error":
return nil, name, nil
default:
@ -714,6 +760,9 @@ func (p *parser) parseSimpleSelectorSequence() (Sel, error) {
case '*':
// It's the universal selector. Just skip over it, since it doesn't affect the meaning.
p.i++
if p.i+2 < len(p.s) && p.s[p.i:p.i+2] == "|*" { // other version of universal selector
p.i += 2
}
case '#', '.', '[', ':':
// There's no type selector. Wait to process the other till the main loop.
default:

View file

@ -0,0 +1,474 @@
package cascadia
import (
"bytes"
"fmt"
"regexp"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// This file implements the pseudo classes selectors,
// which share the implementation of PseudoElement() and Specificity()
type abstractPseudoClass struct{}
func (s abstractPseudoClass) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c abstractPseudoClass) PseudoElement() string {
return ""
}
type relativePseudoClassSelector struct {
name string // one of "not", "has", "haschild"
match SelectorGroup
}
func (s relativePseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
switch s.name {
case "not":
// matches elements that do not match a.
return !s.match.Match(n)
case "has":
// matches elements with any descendant that matches a.
return hasDescendantMatch(n, s.match)
case "haschild":
// matches elements with a child that matches a.
return hasChildMatch(n, s.match)
default:
panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name))
}
}
// hasChildMatch returns whether n has any child that matches a.
func hasChildMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) {
return true
}
}
return false
}
// hasDescendantMatch performs a depth-first search of n's descendants,
// testing whether any of them match a. It returns true as soon as a match is
// found, or false if no match is found.
func hasDescendantMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
return true
}
}
return false
}
// Specificity returns the specificity of the most specific selectors
// in the pseudo-class arguments.
// See https://www.w3.org/TR/selectors/#specificity-rules
func (s relativePseudoClassSelector) Specificity() Specificity {
var max Specificity
for _, sel := range s.match {
newSpe := sel.Specificity()
if max.Less(newSpe) {
max = newSpe
}
}
return max
}
func (c relativePseudoClassSelector) PseudoElement() string {
return ""
}
type containsPseudoClassSelector struct {
abstractPseudoClass
value string
own bool
}
func (s containsPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes that directly contain the given text
text = strings.ToLower(nodeOwnText(n))
} else {
// matches nodes that contain the given text.
text = strings.ToLower(nodeText(n))
}
return strings.Contains(text, s.value)
}
type regexpPseudoClassSelector struct {
abstractPseudoClass
regexp *regexp.Regexp
own bool
}
func (s regexpPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes whose text directly matches the specified regular expression
text = nodeOwnText(n)
} else {
// matches nodes whose text matches the specified regular expression
text = nodeText(n)
}
return s.regexp.MatchString(text)
}
// writeNodeText writes the text contained in n and its descendants to b.
func writeNodeText(n *html.Node, b *bytes.Buffer) {
switch n.Type {
case html.TextNode:
b.WriteString(n.Data)
case html.ElementNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
writeNodeText(c, b)
}
}
}
// nodeText returns the text contained in n and its descendants.
func nodeText(n *html.Node) string {
var b bytes.Buffer
writeNodeText(n, &b)
return b.String()
}
// nodeOwnText returns the contents of the text nodes that are direct
// children of n.
func nodeOwnText(n *html.Node) string {
var b bytes.Buffer
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.TextNode {
b.WriteString(c.Data)
}
}
return b.String()
}
type nthPseudoClassSelector struct {
abstractPseudoClass
a, b int
last, ofType bool
}
func (s nthPseudoClassSelector) Match(n *html.Node) bool {
if s.a == 0 {
if s.last {
return simpleNthLastChildMatch(s.b, s.ofType, n)
} else {
return simpleNthChildMatch(s.b, s.ofType, n)
}
}
return nthChildMatch(s.a, s.b, s.last, s.ofType, n)
}
// nthChildMatch implements :nth-child(an+b).
// If last is true, implements :nth-last-child instead.
// If ofType is true, implements :nth-of-type instead.
func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
i := -1
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
i = count
if !last {
break
}
}
}
if i == -1 {
// This shouldn't happen, since n should always be one of its parent's children.
return false
}
if last {
i = count - i + 1
}
i -= b
if a == 0 {
return i == 0
}
return i%a == 0 && i/a >= 0
}
// simpleNthChildMatch implements :nth-child(b).
// If ofType is true, implements :nth-of-type instead.
func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
// simpleNthLastChildMatch implements :nth-last-child(b).
// If ofType is true, implements :nth-last-of-type instead.
func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.LastChild; c != nil; c = c.PrevSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
type onlyChildPseudoClassSelector struct {
abstractPseudoClass
ofType bool
}
// Match implements :only-child.
// If `ofType` is true, it implements :only-of-type instead.
func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) {
continue
}
count++
if count > 1 {
return false
}
}
return count == 1
}
type inputPseudoClassSelector struct {
abstractPseudoClass
}
// Matches input, select, textarea and button elements.
func (s inputPseudoClassSelector) Match(n *html.Node) bool {
return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button")
}
type emptyElementPseudoClassSelector struct {
abstractPseudoClass
}
// Matches empty elements.
func (s emptyElementPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
switch c.Type {
case html.ElementNode:
return false
case html.TextNode:
if strings.TrimSpace(nodeText(c)) == "" {
continue
} else {
return false
}
}
}
return true
}
type rootPseudoClassSelector struct {
abstractPseudoClass
}
// Match implements :root
func (s rootPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
if n.Parent == nil {
return false
}
return n.Parent.Type == html.DocumentNode
}
func hasAttr(n *html.Node, attr string) bool {
return matchAttribute(n, attr, func(string) bool { return true })
}
type linkPseudoClassSelector struct {
abstractPseudoClass
}
// Match implements :link
func (s linkPseudoClassSelector) Match(n *html.Node) bool {
return (n.DataAtom == atom.A || n.DataAtom == atom.Area || n.DataAtom == atom.Link) && hasAttr(n, "href")
}
type langPseudoClassSelector struct {
abstractPseudoClass
lang string
}
func (s langPseudoClassSelector) Match(n *html.Node) bool {
own := matchAttribute(n, "lang", func(val string) bool {
return val == s.lang || strings.HasPrefix(val, s.lang+"-")
})
if n.Parent == nil {
return own
}
return own || s.Match(n.Parent)
}
type enabledPseudoClassSelector struct {
abstractPseudoClass
}
func (s enabledPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
switch n.DataAtom {
case atom.A, atom.Area, atom.Link:
return hasAttr(n, "href")
case atom.Optgroup, atom.Menuitem, atom.Fieldset:
return !hasAttr(n, "disabled")
case atom.Button, atom.Input, atom.Select, atom.Textarea, atom.Option:
return !hasAttr(n, "disabled") && !inDisabledFieldset(n)
}
return false
}
type disabledPseudoClassSelector struct {
abstractPseudoClass
}
func (s disabledPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
switch n.DataAtom {
case atom.Optgroup, atom.Menuitem, atom.Fieldset:
return hasAttr(n, "disabled")
case atom.Button, atom.Input, atom.Select, atom.Textarea, atom.Option:
return hasAttr(n, "disabled") || inDisabledFieldset(n)
}
return false
}
func hasLegendInPreviousSiblings(n *html.Node) bool {
for s := n.PrevSibling; s != nil; s = s.PrevSibling {
if s.DataAtom == atom.Legend {
return true
}
}
return false
}
func inDisabledFieldset(n *html.Node) bool {
if n.Parent == nil {
return false
}
if n.Parent.DataAtom == atom.Fieldset && hasAttr(n.Parent, "disabled") &&
(n.DataAtom != atom.Legend || hasLegendInPreviousSiblings(n)) {
return true
}
return inDisabledFieldset(n.Parent)
}
type checkedPseudoClassSelector struct {
abstractPseudoClass
}
func (s checkedPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
switch n.DataAtom {
case atom.Input, atom.Menuitem:
return hasAttr(n, "checked") && matchAttribute(n, "type", func(val string) bool {
t := toLowerASCII(val)
return t == "checkbox" || t == "radio"
})
case atom.Option:
return hasAttr(n, "selected")
}
return false
}

View file

@ -1,7 +1,6 @@
package cascadia
import (
"bytes"
"fmt"
"regexp"
"strings"
@ -232,7 +231,7 @@ type classSelector struct {
// Matches elements by class attribute.
func (t classSelector) Match(n *html.Node) bool {
return matchAttribute(n, "class", func(s string) bool {
return matchInclude(t.class, s)
return matchInclude(t.class, s, false)
})
}
@ -266,6 +265,7 @@ func (c idSelector) PseudoElement() string {
type attrSelector struct {
key, val, operation string
regexp *regexp.Regexp
insensitive bool
}
// Matches elements by attribute value.
@ -274,20 +274,20 @@ func (t attrSelector) Match(n *html.Node) bool {
case "":
return matchAttribute(n, t.key, func(string) bool { return true })
case "=":
return matchAttribute(n, t.key, func(s string) bool { return s == t.val })
return matchAttribute(n, t.key, func(s string) bool { return matchInsensitiveValue(s, t.val, t.insensitive) })
case "!=":
return attributeNotEqualMatch(t.key, t.val, n)
return attributeNotEqualMatch(t.key, t.val, n, t.insensitive)
case "~=":
// matches elements where the attribute named key is a whitespace-separated list that includes val.
return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s) })
return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s, t.insensitive) })
case "|=":
return attributeDashMatch(t.key, t.val, n)
return attributeDashMatch(t.key, t.val, n, t.insensitive)
case "^=":
return attributePrefixMatch(t.key, t.val, n)
return attributePrefixMatch(t.key, t.val, n, t.insensitive)
case "$=":
return attributeSuffixMatch(t.key, t.val, n)
return attributeSuffixMatch(t.key, t.val, n, t.insensitive)
case "*=":
return attributeSubstringMatch(t.key, t.val, n)
return attributeSubstringMatch(t.key, t.val, n, t.insensitive)
case "#=":
return attributeRegexMatch(t.key, t.regexp, n)
default:
@ -295,6 +295,17 @@ func (t attrSelector) Match(n *html.Node) bool {
}
}
// matches elements where we ignore (or not) the case of the attribute value
// the user attribute is the value set by the user to match elements
// the real attribute is the attribute value found in the code parsed
func matchInsensitiveValue(userAttr string, realAttr string, ignoreCase bool) bool {
if ignoreCase {
return strings.EqualFold(userAttr, realAttr)
}
return userAttr == realAttr
}
// matches elements where the attribute named key satisifes the function f.
func matchAttribute(n *html.Node, key string, f func(string) bool) bool {
if n.Type != html.ElementNode {
@ -310,12 +321,12 @@ func matchAttribute(n *html.Node, key string, f func(string) bool) bool {
// attributeNotEqualMatch matches elements where
// the attribute named key does not have the value val.
func attributeNotEqualMatch(key, val string, n *html.Node) bool {
func attributeNotEqualMatch(key, val string, n *html.Node, ignoreCase bool) bool {
if n.Type != html.ElementNode {
return false
}
for _, a := range n.Attr {
if a.Key == key && a.Val == val {
if a.Key == key && matchInsensitiveValue(a.Val, val, ignoreCase) {
return false
}
}
@ -323,13 +334,13 @@ func attributeNotEqualMatch(key, val string, n *html.Node) bool {
}
// returns true if s is a whitespace-separated list that includes val.
func matchInclude(val, s string) bool {
func matchInclude(val string, s string, ignoreCase bool) bool {
for s != "" {
i := strings.IndexAny(s, " \t\r\n\f")
if i == -1 {
return s == val
return matchInsensitiveValue(s, val, ignoreCase)
}
if s[:i] == val {
if matchInsensitiveValue(s[:i], val, ignoreCase) {
return true
}
s = s[i+1:]
@ -338,16 +349,16 @@ func matchInclude(val, s string) bool {
}
// matches elements where the attribute named key equals val or starts with val plus a hyphen.
func attributeDashMatch(key, val string, n *html.Node) bool {
func attributeDashMatch(key, val string, n *html.Node, ignoreCase bool) bool {
return matchAttribute(n, key,
func(s string) bool {
if s == val {
if matchInsensitiveValue(s, val, ignoreCase) {
return true
}
if len(s) <= len(val) {
return false
}
if s[:len(val)] == val && s[len(val)] == '-' {
if matchInsensitiveValue(s[:len(val)], val, ignoreCase) && s[len(val)] == '-' {
return true
}
return false
@ -356,36 +367,45 @@ func attributeDashMatch(key, val string, n *html.Node) bool {
// attributePrefixMatch returns a Selector that matches elements where
// the attribute named key starts with val.
func attributePrefixMatch(key, val string, n *html.Node) bool {
func attributePrefixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
return matchAttribute(n, key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
if ignoreCase {
return strings.HasPrefix(strings.ToLower(s), strings.ToLower(val))
}
return strings.HasPrefix(s, val)
})
}
// attributeSuffixMatch matches elements where
// the attribute named key ends with val.
func attributeSuffixMatch(key, val string, n *html.Node) bool {
func attributeSuffixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
return matchAttribute(n, key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
if ignoreCase {
return strings.HasSuffix(strings.ToLower(s), strings.ToLower(val))
}
return strings.HasSuffix(s, val)
})
}
// attributeSubstringMatch matches nodes where
// the attribute named key contains val.
func attributeSubstringMatch(key, val string, n *html.Node) bool {
func attributeSubstringMatch(key, val string, n *html.Node, ignoreCase bool) bool {
return matchAttribute(n, key,
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
if ignoreCase {
return strings.Contains(strings.ToLower(s), strings.ToLower(val))
}
return strings.Contains(s, val)
})
}
@ -407,394 +427,22 @@ func (c attrSelector) PseudoElement() string {
return ""
}
// ---------------- Pseudo class selectors ----------------
// we use severals concrete types of pseudo-class selectors
// see pseudo_classes.go for pseudo classes selectors
type relativePseudoClassSelector struct {
name string // one of "not", "has", "haschild"
match SelectorGroup
}
func (s relativePseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
switch s.name {
case "not":
// matches elements that do not match a.
return !s.match.Match(n)
case "has":
// matches elements with any descendant that matches a.
return hasDescendantMatch(n, s.match)
case "haschild":
// matches elements with a child that matches a.
return hasChildMatch(n, s.match)
default:
panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name))
}
}
// hasChildMatch returns whether n has any child that matches a.
func hasChildMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) {
return true
}
}
return false
}
// hasDescendantMatch performs a depth-first search of n's descendants,
// testing whether any of them match a. It returns true as soon as a match is
// found, or false if no match is found.
func hasDescendantMatch(n *html.Node, a Matcher) bool {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
return true
}
}
return false
}
// Specificity returns the specificity of the most specific selectors
// in the pseudo-class arguments.
// See https://www.w3.org/TR/selectors/#specificity-rules
func (s relativePseudoClassSelector) Specificity() Specificity {
var max Specificity
for _, sel := range s.match {
newSpe := sel.Specificity()
if max.Less(newSpe) {
max = newSpe
}
}
return max
}
func (c relativePseudoClassSelector) PseudoElement() string {
return ""
}
type containsPseudoClassSelector struct {
own bool
// on a static context, some selectors can't match anything
type neverMatchSelector struct {
value string
}
func (s containsPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes that directly contain the given text
text = strings.ToLower(nodeOwnText(n))
} else {
// matches nodes that contain the given text.
text = strings.ToLower(nodeText(n))
}
return strings.Contains(text, s.value)
}
func (s containsPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c containsPseudoClassSelector) PseudoElement() string {
return ""
}
type regexpPseudoClassSelector struct {
own bool
regexp *regexp.Regexp
}
func (s regexpPseudoClassSelector) Match(n *html.Node) bool {
var text string
if s.own {
// matches nodes whose text directly matches the specified regular expression
text = nodeOwnText(n)
} else {
// matches nodes whose text matches the specified regular expression
text = nodeText(n)
}
return s.regexp.MatchString(text)
}
// writeNodeText writes the text contained in n and its descendants to b.
func writeNodeText(n *html.Node, b *bytes.Buffer) {
switch n.Type {
case html.TextNode:
b.WriteString(n.Data)
case html.ElementNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
writeNodeText(c, b)
}
}
}
// nodeText returns the text contained in n and its descendants.
func nodeText(n *html.Node) string {
var b bytes.Buffer
writeNodeText(n, &b)
return b.String()
}
// nodeOwnText returns the contents of the text nodes that are direct
// children of n.
func nodeOwnText(n *html.Node) string {
var b bytes.Buffer
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.TextNode {
b.WriteString(c.Data)
}
}
return b.String()
}
func (s regexpPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c regexpPseudoClassSelector) PseudoElement() string {
return ""
}
type nthPseudoClassSelector struct {
a, b int
last, ofType bool
}
func (s nthPseudoClassSelector) Match(n *html.Node) bool {
if s.a == 0 {
if s.last {
return simpleNthLastChildMatch(s.b, s.ofType, n)
} else {
return simpleNthChildMatch(s.b, s.ofType, n)
}
}
return nthChildMatch(s.a, s.b, s.last, s.ofType, n)
}
// nthChildMatch implements :nth-child(an+b).
// If last is true, implements :nth-last-child instead.
// If ofType is true, implements :nth-of-type instead.
func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
func (s neverMatchSelector) Match(n *html.Node) bool {
return false
}
parent := n.Parent
if parent == nil {
return false
func (s neverMatchSelector) Specificity() Specificity {
return Specificity{0, 0, 0}
}
if parent.Type == html.DocumentNode {
return false
}
i := -1
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
i = count
if !last {
break
}
}
}
if i == -1 {
// This shouldn't happen, since n should always be one of its parent's children.
return false
}
if last {
i = count - i + 1
}
i -= b
if a == 0 {
return i == 0
}
return i%a == 0 && i/a >= 0
}
// simpleNthChildMatch implements :nth-child(b).
// If ofType is true, implements :nth-of-type instead.
func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
// simpleNthLastChildMatch implements :nth-last-child(b).
// If ofType is true, implements :nth-last-of-type instead.
func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.LastChild; c != nil; c = c.PrevSibling {
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
continue
}
count++
if c == n {
return count == b
}
if count >= b {
return false
}
}
return false
}
// Specificity for nth-child pseudo-class.
// Does not support a list of selectors
func (s nthPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c nthPseudoClassSelector) PseudoElement() string {
return ""
}
type onlyChildPseudoClassSelector struct {
ofType bool
}
// Match implements :only-child.
// If `ofType` is true, it implements :only-of-type instead.
func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
parent := n.Parent
if parent == nil {
return false
}
if parent.Type == html.DocumentNode {
return false
}
count := 0
for c := parent.FirstChild; c != nil; c = c.NextSibling {
if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) {
continue
}
count++
if count > 1 {
return false
}
}
return count == 1
}
func (s onlyChildPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c onlyChildPseudoClassSelector) PseudoElement() string {
return ""
}
type inputPseudoClassSelector struct{}
// Matches input, select, textarea and button elements.
func (s inputPseudoClassSelector) Match(n *html.Node) bool {
return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button")
}
func (s inputPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c inputPseudoClassSelector) PseudoElement() string {
return ""
}
type emptyElementPseudoClassSelector struct{}
// Matches empty elements.
func (s emptyElementPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
switch c.Type {
case html.ElementNode, html.TextNode:
return false
}
}
return true
}
func (s emptyElementPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c emptyElementPseudoClassSelector) PseudoElement() string {
return ""
}
type rootPseudoClassSelector struct{}
// Match implements :root
func (s rootPseudoClassSelector) Match(n *html.Node) bool {
if n.Type != html.ElementNode {
return false
}
if n.Parent == nil {
return false
}
return n.Parent.Type == html.DocumentNode
}
func (s rootPseudoClassSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c rootPseudoClassSelector) PseudoElement() string {
func (c neverMatchSelector) PseudoElement() string {
return ""
}

View file

@ -2,21 +2,35 @@ package cascadia
import (
"fmt"
"strconv"
"strings"
)
// implements the reverse operation Sel -> string
var specialCharReplacer *strings.Replacer
func init() {
var pairs []string
for _, s := range ",!\"#$%&'()*+ -./:;<=>?@[\\]^`{|}~" {
pairs = append(pairs, string(s), "\\"+string(s))
}
specialCharReplacer = strings.NewReplacer(pairs...)
}
// espace special CSS char
func escape(s string) string { return specialCharReplacer.Replace(s) }
func (c tagSelector) String() string {
return c.tag
}
func (c idSelector) String() string {
return "#" + c.id
return "#" + escape(c.id)
}
func (c classSelector) String() string {
return "." + c.class
return "." + escape(c.class)
}
func (c attrSelector) String() string {
@ -26,12 +40,20 @@ func (c attrSelector) String() string {
} else if c.operation != "" {
val = fmt.Sprintf(`"%s"`, val)
}
return fmt.Sprintf(`[%s%s%s]`, c.key, c.operation, val)
ignoreCase := ""
if c.insensitive {
ignoreCase = " i"
}
return fmt.Sprintf(`[%s%s%s%s]`, c.key, c.operation, val, ignoreCase)
}
func (c relativePseudoClassSelector) String() string {
return fmt.Sprintf(":%s(%s)", c.name, c.match.String())
}
func (c containsPseudoClassSelector) String() string {
s := "contains"
if c.own {
@ -39,6 +61,7 @@ func (c containsPseudoClassSelector) String() string {
}
return fmt.Sprintf(`:%s("%s")`, s, c.value)
}
func (c regexpPseudoClassSelector) String() string {
s := "matches"
if c.own {
@ -46,6 +69,7 @@ func (c regexpPseudoClassSelector) String() string {
}
return fmt.Sprintf(":%s(%s)", s, c.regexp.String())
}
func (c nthPseudoClassSelector) String() string {
if c.a == 0 && c.b == 1 { // special cases
s := ":first-"
@ -70,24 +94,56 @@ func (c nthPseudoClassSelector) String() string {
case [2]bool{false, false}:
name = "nth-child"
}
return fmt.Sprintf(":%s(%dn+%d)", name, c.a, c.b)
s := fmt.Sprintf("+%d", c.b)
if c.b < 0 { // avoid +-8 invalid syntax
s = strconv.Itoa(c.b)
}
return fmt.Sprintf(":%s(%dn%s)", name, c.a, s)
}
func (c onlyChildPseudoClassSelector) String() string {
if c.ofType {
return ":only-of-type"
}
return ":only-child"
}
func (c inputPseudoClassSelector) String() string {
return ":input"
}
func (c emptyElementPseudoClassSelector) String() string {
return ":empty"
}
func (c rootPseudoClassSelector) String() string {
return ":root"
}
func (c linkPseudoClassSelector) String() string {
return ":link"
}
func (c langPseudoClassSelector) String() string {
return fmt.Sprintf(":lang(%s)", c.lang)
}
func (c neverMatchSelector) String() string {
return c.value
}
func (c enabledPseudoClassSelector) String() string {
return ":enabled"
}
func (c disabledPseudoClassSelector) String() string {
return ":disabled"
}
func (c checkedPseudoClassSelector) String() string {
return ":checked"
}
func (c compoundSelector) String() string {
if len(c.selectors) == 0 && c.pseudoElement == "" {
return "*"

View file

@ -8,8 +8,6 @@
A high-performance 100% compatible drop-in replacement of "encoding/json"
You can also use thrift like JSON using [thrift-iterator](https://github.com/thrift-iterator/go)
# Benchmark
![benchmark](http://jsoniter.com/benchmarks/go-benchmark.png)

View file

@ -6,6 +6,6 @@ require (
github.com/davecgh/go-spew v1.1.1
github.com/google/gofuzz v1.0.0
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742
github.com/modern-go/reflect2 v1.0.2
github.com/stretchr/testify v1.3.0
)

View file

@ -5,8 +5,8 @@ github.com/google/gofuzz v1.0.0 h1:A8PeW59pxE9IoFRqBp37U+mSNaQoZ46F1f0f863XSXw=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=

View file

@ -288,6 +288,9 @@ non_decimal_loop:
return iter.readFloat64SlowPath()
}
value = (value << 3) + (value << 1) + uint64(ind)
if value > maxFloat64 {
return iter.readFloat64SlowPath()
}
}
}
return iter.readFloat64SlowPath()

View file

@ -9,6 +9,7 @@ var intDigits []int8
const uint32SafeToMultiply10 = uint32(0xffffffff)/10 - 1
const uint64SafeToMultiple10 = uint64(0xffffffffffffffff)/10 - 1
const maxFloat64 = 1<<53 - 1
func init() {
intDigits = make([]int8, 256)
@ -339,7 +340,7 @@ func (iter *Iterator) readUint64(c byte) (ret uint64) {
}
func (iter *Iterator) assertInteger() {
if iter.head < len(iter.buf) && iter.buf[iter.head] == '.' {
if iter.head < iter.tail && iter.buf[iter.head] == '.' {
iter.ReportError("assertInteger", "can not decode float as int")
}
}

View file

@ -65,7 +65,7 @@ func (iter *Iterator) ReadVal(obj interface{}) {
decoder := iter.cfg.getDecoderFromCache(cacheKey)
if decoder == nil {
typ := reflect2.TypeOf(obj)
if typ.Kind() != reflect.Ptr {
if typ == nil || typ.Kind() != reflect.Ptr {
iter.ReportError("ReadVal", "can only unmarshal into pointer")
return
}

View file

@ -33,12 +33,20 @@ type jsonRawMessageCodec struct {
}
func (codec *jsonRawMessageCodec) Decode(ptr unsafe.Pointer, iter *Iterator) {
*((*json.RawMessage)(ptr)) = json.RawMessage(iter.SkipAndReturnBytes())
if iter.ReadNil() {
*((*json.RawMessage)(ptr)) = nil
} else {
*((*json.RawMessage)(ptr)) = iter.SkipAndReturnBytes()
}
}
func (codec *jsonRawMessageCodec) Encode(ptr unsafe.Pointer, stream *Stream) {
if *((*json.RawMessage)(ptr)) == nil {
stream.WriteNil()
} else {
stream.WriteRaw(string(*((*json.RawMessage)(ptr))))
}
}
func (codec *jsonRawMessageCodec) IsEmpty(ptr unsafe.Pointer) bool {
return len(*((*json.RawMessage)(ptr))) == 0
@ -48,12 +56,20 @@ type jsoniterRawMessageCodec struct {
}
func (codec *jsoniterRawMessageCodec) Decode(ptr unsafe.Pointer, iter *Iterator) {
*((*RawMessage)(ptr)) = RawMessage(iter.SkipAndReturnBytes())
if iter.ReadNil() {
*((*RawMessage)(ptr)) = nil
} else {
*((*RawMessage)(ptr)) = iter.SkipAndReturnBytes()
}
}
func (codec *jsoniterRawMessageCodec) Encode(ptr unsafe.Pointer, stream *Stream) {
if *((*RawMessage)(ptr)) == nil {
stream.WriteNil()
} else {
stream.WriteRaw(string(*((*RawMessage)(ptr))))
}
}
func (codec *jsoniterRawMessageCodec) IsEmpty(ptr unsafe.Pointer) bool {
return len(*((*RawMessage)(ptr))) == 0

View file

@ -1075,6 +1075,11 @@ type stringModeNumberDecoder struct {
}
func (decoder *stringModeNumberDecoder) Decode(ptr unsafe.Pointer, iter *Iterator) {
if iter.WhatIsNext() == NilValue {
decoder.elemDecoder.Decode(ptr, iter)
return
}
c := iter.nextToken()
if c != '"' {
iter.ReportError("stringModeNumberDecoder", `expect ", but found `+string([]byte{c}))

View file

@ -2,4 +2,4 @@ module github.com/mattn/go-runewidth
go 1.9
require github.com/rivo/uniseg v0.1.0
require github.com/rivo/uniseg v0.2.0

View file

@ -1,2 +1,2 @@
github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY=
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=

View file

@ -12,8 +12,14 @@ var (
// EastAsianWidth will be set true if the current locale is CJK
EastAsianWidth bool
// StrictEmojiNeutral should be set false if handle broken fonts
StrictEmojiNeutral bool = true
// DefaultCondition is a condition in current locale
DefaultCondition = &Condition{}
DefaultCondition = &Condition{
EastAsianWidth: false,
StrictEmojiNeutral: true,
}
)
func init() {
@ -84,26 +90,52 @@ var nonprint = table{
// Condition have flag EastAsianWidth whether the current locale is CJK or not.
type Condition struct {
EastAsianWidth bool
StrictEmojiNeutral bool
}
// NewCondition return new instance of Condition which is current locale.
func NewCondition() *Condition {
return &Condition{
EastAsianWidth: EastAsianWidth,
StrictEmojiNeutral: StrictEmojiNeutral,
}
}
// RuneWidth returns the number of cells in r.
// See http://www.unicode.org/reports/tr11/
func (c *Condition) RuneWidth(r rune) int {
// optimized version, verified by TestRuneWidthChecksums()
if !c.EastAsianWidth {
switch {
case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining, notassigned):
case r < 0x20 || r > 0x10FFFF:
return 0
case (c.EastAsianWidth && IsAmbiguousWidth(r)) || inTables(r, doublewidth):
case (r >= 0x7F && r <= 0x9F) || r == 0xAD: // nonprint
return 0
case r < 0x300:
return 1
case inTable(r, narrow):
return 1
case inTables(r, nonprint, combining):
return 0
case inTable(r, doublewidth):
return 2
default:
return 1
}
} else {
switch {
case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining):
return 0
case inTable(r, narrow):
return 1
case inTables(r, ambiguous, doublewidth):
return 2
case !c.StrictEmojiNeutral && inTables(r, ambiguous, emoji, narrow):
return 2
default:
return 1
}
}
}
// StringWidth return width as you can see

View file

@ -124,8 +124,10 @@ var ambiguous = table{
{0x1F18F, 0x1F190}, {0x1F19B, 0x1F1AC}, {0xE0100, 0xE01EF},
{0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD},
}
var notassigned = table{
{0x27E6, 0x27ED}, {0x2985, 0x2986},
var narrow = table{
{0x0020, 0x007E}, {0x00A2, 0x00A3}, {0x00A5, 0x00A6},
{0x00AC, 0x00AC}, {0x00AF, 0x00AF}, {0x27E6, 0x27ED},
{0x2985, 0x2986},
}
var neutral = table{

View file

@ -339,13 +339,19 @@ func (c *Client) init(o *Options) error {
var domain string
var user string
a := strings.SplitN(o.User, "@", 2)
if len(o.User) > 0 {
// Check if User is not empty. Otherwise, we'll be attempting ANONYMOUS with Host domain.
switch {
case len(o.User) > 0:
if len(a) != 2 {
return errors.New("xmpp: invalid username (want user@domain): " + o.User)
}
user = a[0]
domain = a[1]
} // Otherwise, we'll be attempting ANONYMOUS
case strings.Contains(o.Host, ":"):
domain = strings.SplitN(o.Host, ":", 2)[0]
default:
domain = o.Host
}
// Declare intent to be a jabber client and gather stream features.
f, err := c.startStream(o, domain)
@ -647,6 +653,10 @@ func (c *Client) Recv() (stanza interface{}, err error) {
// Handle Pubsub notifications
switch v.Event.Items.Node {
case XMPPNS_AVATAR_PEP_METADATA:
if len(v.Event.Items.Items) == 0 {
return AvatarMetadata{}, errors.New("No avatar metadata items available")
}
return handleAvatarMetadata(v.Event.Items.Items[0].Body,
v.From)
// I am not sure whether this can even happen.
@ -759,10 +769,18 @@ func (c *Client) Recv() (stanza interface{}, err error) {
switch p.Node {
case XMPPNS_AVATAR_PEP_DATA:
if len(p.Items) == 0 {
return AvatarData{}, errors.New("No avatar data items available")
}
return handleAvatarData(p.Items[0].Body,
v.From,
p.Items[0].ID)
case XMPPNS_AVATAR_PEP_METADATA:
if len(p.Items) == 0 {
return AvatarMetadata{}, errors.New("No avatar metadata items available")
}
return handleAvatarMetadata(p.Items[0].Body,
v.From)
default:

View file

@ -23,7 +23,7 @@ func (c *Client) RawInformationQuery(from, to, id, iqType, requestNamespace, bod
return id, err
}
// rawInformation send a IQ request with the the payload body to the server
// rawInformation send a IQ request with the payload body to the server
func (c *Client) RawInformation(from, to, id, iqType, body string) (string, error) {
const xmlIQ = "<iq from='%s' to='%s' id='%s' type='%s'>%s</iq>"
_, err := fmt.Fprintf(c.conn, xmlIQ, xmlEscape(from), xmlEscape(to), id, iqType, body)

View file

@ -32,6 +32,7 @@ The `gofeed` library is a robust feed parser that supports parsing both [RSS](ht
- Atom 0.3
- Atom 1.0
- JSON 1.0
- JSON 1.1
#### Extension Support
@ -113,6 +114,15 @@ feed, _ := fp.ParseURLWithContext("http://feeds.twit.tv/twit.xml", ctx)
fmt.Println(feed.Title)
```
##### Parse a feed from an URL with a custom User-Agent:
```go
fp := gofeed.NewParser()
fp.UserAgent = "MyCustomAgent 1.0"
feed, _ := fp.ParseURL("http://feeds.twit.tv/twit.xml")
fmt.Println(feed.Title)
```
#### Feed Specific Parsers
You can easily use the `rss.Parser`, `atom.Parser` or `json.Parser` directly if you have a usage scenario that requires it:
@ -228,36 +238,36 @@ In addition to the generic handling of extensions, `gofeed` also has built in su
The `DefaultRSSTranslator`, the `DefaultAtomTranslator` and the `DefaultJSONTranslator` map the following `rss.Feed`, `atom.Feed` and `json.Feed` fields to their respective `gofeed.Feed` fields. They are listed in order of precedence (highest to lowest):
`gofeed.Feed` | RSS | Atom | JSON
--- | --- | --- | --
Title | /rss/channel/title<br>/rdf:RDF/channel/title<br>/rss/channel/dc:title<br>/rdf:RDF/channel/dc:title | /feed/title | /title
Description | /rss/channel/description<br>/rdf:RDF/channel/description<br>/rss/channel/itunes:subtitle | /feed/subtitle<br>/feed/tagline | /description
Link | /rss/channel/link<br>/rdf:RDF/channel/link | /feed/link[@rel=”alternate”]/@href<br>/feed/link[not(@rel)]/@href | /home_page_url
FeedLink | /rss/channel/atom:link[@rel="self"]/@href<br>/rdf:RDF/channel/atom:link[@rel="self"]/@href | /feed/link[@rel="self"]/@href | /feed_url
Updated | /rss/channel/lastBuildDate<br>/rss/channel/dc:date<br>/rdf:RDF/channel/dc:date | /feed/updated<br>/feed/modified | /items[0]/date_modified
Published | /rss/channel/pubDate | | /items[0]/date_published
Author | /rss/channel/managingEditor<br>/rss/channel/webMaster<br>/rss/channel/dc:author<br>/rdf:RDF/channel/dc:author<br>/rss/channel/dc:creator<br>/rdf:RDF/channel/dc:creator<br>/rss/channel/itunes:author | /feed/author | /author/name
Language | /rss/channel/language<br>/rss/channel/dc:language<br>/rdf:RDF/channel/dc:language | /feed/@xml:lang |
Image | /rss/channel/image<br>/rdf:RDF/image<br>/rss/channel/itunes:image | /feed/logo | /icon
Copyright | /rss/channel/copyright<br>/rss/channel/dc:rights<br>/rdf:RDF/channel/dc:rights | /feed/rights<br>/feed/copyright |
Generator | /rss/channel/generator | /feed/generator |
Categories | /rss/channel/category<br>/rss/channel/itunes:category<br>/rss/channel/itunes:keywords<br>/rss/channel/dc:subject<br>/rdf:RDF/channel/dc:subject | /feed/category |
`gofeed.Item` | RSS | Atom | JSON
--- | --- | --- | ---
Title | /rss/channel/item/title<br>/rdf:RDF/item/title<br>/rdf:RDF/item/dc:title<br>/rss/channel/item/dc:title | /feed/entry/title | /items/title
Description | /rss/channel/item/description<br>/rdf:RDF/item/description<br>/rss/channel/item/dc:description<br>/rdf:RDF/item/dc:description | /feed/entry/summary | /items/summary
Content | /rss/channel/item/content:encoded | /feed/entry/content | /items/content_html
Link | /rss/channel/item/link<br>/rdf:RDF/item/link | /feed/entry/link[@rel=”alternate”]/@href<br>/feed/entry/link[not(@rel)]/@href | /items/url
Updated | /rss/channel/item/dc:date<br>/rdf:RDF/rdf:item/dc:date | /feed/entry/modified<br>/feed/entry/updated | /items/date_modified
Published | /rss/channel/item/pubDate<br>/rss/channel/item/dc:date | /feed/entry/published<br>/feed/entry/issued | /items/date_published
Author | /rss/channel/item/author<br>/rss/channel/item/dc:author<br>/rdf:RDF/item/dc:author<br>/rss/channel/item/dc:creator<br>/rdf:RDF/item/dc:creator<br>/rss/channel/item/itunes:author | /feed/entry/author | /items/author/name
GUID | /rss/channel/item/guid | /feed/entry/id | /items/id
Image | /rss/channel/item/itunes:image<br>/rss/channel/item/media:image | | /items/image<br>/items/banner_image
Categories | /rss/channel/item/category<br>/rss/channel/item/dc:subject<br>/rss/channel/item/itunes:keywords<br>/rdf:RDF/channel/item/dc:subject | /feed/entry/category | /items/tags
Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”] | /items/attachments
| `gofeed.Feed` | RSS | Atom | JSON |
| ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------- | ------------------------ |
| Title | /rss/channel/title<br>/rdf:RDF/channel/title<br>/rss/channel/dc:title<br>/rdf:RDF/channel/dc:title | /feed/title | /title |
| Description | /rss/channel/description<br>/rdf:RDF/channel/description<br>/rss/channel/itunes:subtitle | /feed/subtitle<br>/feed/tagline | /description |
| Link | /rss/channel/link<br>/rdf:RDF/channel/link | /feed/link[@rel=”alternate”]/@href<br>/feed/link[not(@rel)]/@href | /home_page_url |
| FeedLink | /rss/channel/atom:link[@rel="self"]/@href<br>/rdf:RDF/channel/atom:link[@rel="self"]/@href | /feed/link[@rel="self"]/@href | /feed_url |
| Updated | /rss/channel/lastBuildDate<br>/rss/channel/dc:date<br>/rdf:RDF/channel/dc:date | /feed/updated<br>/feed/modified | /items[0]/date_modified |
| Published | /rss/channel/pubDate | | /items[0]/date_published |
| Author | /rss/channel/managingEditor<br>/rss/channel/webMaster<br>/rss/channel/dc:author<br>/rdf:RDF/channel/dc:author<br>/rss/channel/dc:creator<br>/rdf:RDF/channel/dc:creator<br>/rss/channel/itunes:author | /feed/authors[0] | /author |
| Authors | /rss/channel/managingEditor<br>/rss/channel/webMaster<br>/rss/channel/dc:author<br>/rdf:RDF/channel/dc:author<br>/rss/channel/dc:creator<br>/rdf:RDF/channel/dc:creator<br>/rss/channel/itunes:author | /feed/authors | /authors<br>/author |
| Language | /rss/channel/language<br>/rss/channel/dc:language<br>/rdf:RDF/channel/dc:language | /feed/@xml:lang | /language |
| Image | /rss/channel/image<br>/rdf:RDF/image<br>/rss/channel/itunes:image | /feed/logo | /icon |
| Copyright | /rss/channel/copyright<br>/rss/channel/dc:rights<br>/rdf:RDF/channel/dc:rights | /feed/rights<br>/feed/copyright |
| Generator | /rss/channel/generator | /feed/generator |
| Categories | /rss/channel/category<br>/rss/channel/itunes:category<br>/rss/channel/itunes:keywords<br>/rss/channel/dc:subject<br>/rdf:RDF/channel/dc:subject | /feed/category |
| `gofeed.Item` | RSS | Atom | JSON |
| ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------- |
| Title | /rss/channel/item/title<br>/rdf:RDF/item/title<br>/rdf:RDF/item/dc:title<br>/rss/channel/item/dc:title | /feed/entry/title | /items/title |
| Description | /rss/channel/item/description<br>/rdf:RDF/item/description<br>/rss/channel/item/dc:description<br>/rdf:RDF/item/dc:description | /feed/entry/summary | /items/summary |
| Content | /rss/channel/item/content:encoded | /feed/entry/content | /items/content_html |
| Link | /rss/channel/item/link<br>/rdf:RDF/item/link | /feed/entry/link[@rel=”alternate”]/@href<br>/feed/entry/link[not(@rel)]/@href | /items/url |
| Updated | /rss/channel/item/dc:date<br>/rdf:RDF/rdf:item/dc:date | /feed/entry/modified<br>/feed/entry/updated | /items/date_modified |
| Published | /rss/channel/item/pubDate<br>/rss/channel/item/dc:date | /feed/entry/published<br>/feed/entry/issued | /items/date_published |
| Author | /rss/channel/item/author<br>/rss/channel/item/dc:author<br>/rdf:RDF/item/dc:author<br>/rss/channel/item/dc:creator<br>/rdf:RDF/item/dc:creator<br>/rss/channel/item/itunes:author | /feed/entry/author | /items/author/name |
| Authors | /rss/channel/item/author<br>/rss/channel/item/dc:author<br>/rdf:RDF/item/dc:author<br>/rss/channel/item/dc:creator<br>/rdf:RDF/item/dc:creator<br>/rss/channel/item/itunes:author | /feed/entry/authors[0] | /items/authors<br>/items/author/name |
| GUID | /rss/channel/item/guid | /feed/entry/id | /items/id |
| Image | /rss/channel/item/itunes:image<br>/rss/channel/item/media:image | | /items/image<br>/items/banner_image |
| Categories | /rss/channel/item/category<br>/rss/channel/item/dc:subject<br>/rss/channel/item/itunes:keywords<br>/rdf:RDF/channel/item/dc:subject | /feed/entry/category | /items/tags |
| Enclosures | /rss/channel/item/enclosure | /feed/entry/link[@rel=”enclosure”] | /items/attachments |
## Dependencies

View file

@ -33,17 +33,20 @@ func DetectFeedType(feed io.Reader) FeedType {
buffer := new(bytes.Buffer)
buffer.ReadFrom(feed)
// remove leading whitespace (if exists)
var firstChar byte
for {
loop: for {
ch, err := buffer.ReadByte()
if err != nil {
return FeedTypeUnknown
}
if ch != ' ' && ch != '\t' {
// ignore leading whitespace & byte order marks
switch ch {
case ' ', '\r', '\n', '\t':
case 0xFE, 0xFF, 0x00, 0xEF, 0xBB, 0xBF: // utf 8-16-32 bom
default:
firstChar = ch
buffer.UnreadByte()
break
break loop
}
}

View file

@ -4,7 +4,7 @@ import (
"encoding/json"
"time"
"github.com/mmcdole/gofeed/extensions"
ext "github.com/mmcdole/gofeed/extensions"
)
// Feed is the universal Feed type that atom.Feed
@ -17,11 +17,13 @@ type Feed struct {
Description string `json:"description,omitempty"`
Link string `json:"link,omitempty"`
FeedLink string `json:"feedLink,omitempty"`
Links []string `json:"links,omitempty"`
Updated string `json:"updated,omitempty"`
UpdatedParsed *time.Time `json:"updatedParsed,omitempty"`
Published string `json:"published,omitempty"`
PublishedParsed *time.Time `json:"publishedParsed,omitempty"`
Author *Person `json:"author,omitempty"`
Author *Person `json:"author,omitempty"` // Deprecated: Use feed.Authors instead
Authors []*Person `json:"authors,omitempty"`
Language string `json:"language,omitempty"`
Image *Image `json:"image,omitempty"`
Copyright string `json:"copyright,omitempty"`
@ -49,11 +51,13 @@ type Item struct {
Description string `json:"description,omitempty"`
Content string `json:"content,omitempty"`
Link string `json:"link,omitempty"`
Links []string `json:"links,omitempty"`
Updated string `json:"updated,omitempty"`
UpdatedParsed *time.Time `json:"updatedParsed,omitempty"`
Published string `json:"published,omitempty"`
PublishedParsed *time.Time `json:"publishedParsed,omitempty"`
Author *Person `json:"author,omitempty"`
Author *Person `json:"author,omitempty"` // Deprecated: Use item.Authors instead
Authors []*Person `json:"authors,omitempty"`
GUID string `json:"guid,omitempty"`
Image *Image `json:"image,omitempty"`
Categories []string `json:"categories,omitempty"`

View file

@ -4,7 +4,6 @@ go 1.14
require (
github.com/PuerkitoBio/goquery v1.5.1
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/json-iterator/go v1.1.10
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf
github.com/stretchr/testify v1.3.0

View file

@ -24,8 +24,6 @@ github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/urfave/cli v1.22.3 h1:FpNT6zq26xNpHZy08emi755QwzLPs6Pukqjlc7RfOMU=
@ -36,7 +34,6 @@ golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=

View file

@ -23,6 +23,7 @@ var dateFormats = []string{
"Mon Jan 2 15:04 2006",
"Mon Jan 02, 2006 3:04 pm",
"Mon Jan 02 2006 15:04:05 -0700",
"Mon Jan 02 2006 15:04:05 GMT-0700 (MST)",
"Monday, January 2, 2006 03:04 PM",
"Monday, January 2, 2006",
"Monday, January 02, 2006",
@ -64,6 +65,7 @@ var dateFormats = []string{
"Mon, 02 Jan 2006 15:04:05 -07",
"Mon, 02 Jan 2006 15:04:05 00",
"Mon, 02 Jan 2006 15:04:05",
"Mon, 02 Jan 2006 15:4:5 Z",
"Mon, 02 Jan 2006",
"January 2, 2006 3:04 PM",
"January 2, 2006, 3:04 p.m.",

View file

@ -19,6 +19,10 @@ type Feed struct {
Items []*Item `json:"items"` // items is an array, and is required
// TODO Hubs // hubs (very optional, array of objects) describes endpoints that can be used to subscribe to real-time notifications from the publisher of this feed. Each object has a type and url, both of which are required. See the section “Subscribing to Real-time Notifications” below for details.
// TODO Extensions
// Version 1.1
Authors []*Author `json:"authors,omitempty"`
Language string `json:"language,omitempty"`
}
func (f Feed) String() string {
@ -40,9 +44,14 @@ type Item struct {
DatePublished string `json:"date_published,omitempty"` // date_published (optional, string) specifies the date in RFC 3339 format. (Example: 2010-02-07T14:04:00-05:00.)
DateModified string `json:"date_modified,omitempty"` // date_modified (optional, string) specifies the modification date in RFC 3339 format.
Author *Author `json:"author,omitempty"` // author (optional, object) has the same structure as the top-level author. If not specified in an item, then the top-level author, if present, is the author of the item.
Tags []string `json:"tags,omitempty"` // tags (optional, array of strings) can have any plain text values you want. Tags tend to be just one word, but they may be anything.
Attachments *[]Attachments `json:"attachments,omitempty"` // attachments (optional, array) lists related resources. Podcasts, for instance, would include an attachment thats an audio or video file. An individual item may have one or more attachments.
// TODO Extensions
// Version 1.1
Authors []*Author `json:"authors,omitempty"`
Language string `json:"language,omitempty"`
}
// Author defines the feed author structure. The author object has several members. These are all optional — but if you provide an author object, then at least one is required:

View file

@ -35,6 +35,7 @@ type Parser struct {
AtomTranslator Translator
RSSTranslator Translator
JSONTranslator Translator
UserAgent string
Client *http.Client
rp *rss.Parser
ap *atom.Parser
@ -47,6 +48,7 @@ func NewParser() *Parser {
rp: &rss.Parser{},
ap: &atom.Parser{},
jp: &json.Parser{},
UserAgent: "Gofeed/1.0",
}
return &fp
}
@ -97,7 +99,7 @@ func (f *Parser) ParseURLWithContext(feedURL string, ctx context.Context) (feed
return nil, err
}
req = req.WithContext(ctx)
req.Header.Set("User-Agent", "Gofeed/1.0")
req.Header.Set("User-Agent", f.UserAgent)
resp, err := client.Do(req)
if err != nil {

View file

@ -4,7 +4,7 @@ import (
"encoding/json"
"time"
"github.com/mmcdole/gofeed/extensions"
ext "github.com/mmcdole/gofeed/extensions"
)
// Feed is an RSS Feed
@ -59,6 +59,7 @@ type Item struct {
DublinCoreExt *ext.DublinCoreExtension `json:"dcExt,omitempty"`
ITunesExt *ext.ITunesItemExtension `json:"itunesExt,omitempty"`
Extensions ext.Extensions `json:"extensions,omitempty"`
Custom map[string]string `json:"custom,omitempty"`
}
// Image is an image that represents the feed

View file

@ -415,8 +415,14 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) {
}
categories = append(categories, result)
} else {
// Skip any elements not part of the item spec
p.Skip()
result, err := shared.ParseText(p)
if err != nil {
continue
}
if item.Custom == nil {
item.Custom = make(map[string]string, 0)
}
item.Custom[name] = result
}
}
}

View file

@ -39,11 +39,13 @@ func (t *DefaultRSSTranslator) Translate(feed interface{}) (*Feed, error) {
result.Description = t.translateFeedDescription(rss)
result.Link = t.translateFeedLink(rss)
result.FeedLink = t.translateFeedFeedLink(rss)
result.Links = t.translateFeedLinks(rss)
result.Updated = t.translateFeedUpdated(rss)
result.UpdatedParsed = t.translateFeedUpdatedParsed(rss)
result.Published = t.translateFeedPublished(rss)
result.PublishedParsed = t.translateFeedPublishedParsed(rss)
result.Author = t.translateFeedAuthor(rss)
result.Authors = t.translateFeedAuthors(rss)
result.Language = t.translateFeedLanguage(rss)
result.Image = t.translateFeedImage(rss)
result.Copyright = t.translateFeedCopyright(rss)
@ -64,9 +66,11 @@ func (t *DefaultRSSTranslator) translateFeedItem(rssItem *rss.Item) (item *Item)
item.Description = t.translateItemDescription(rssItem)
item.Content = t.translateItemContent(rssItem)
item.Link = t.translateItemLink(rssItem)
item.Links = t.translateItemLinks(rssItem)
item.Published = t.translateItemPublished(rssItem)
item.PublishedParsed = t.translateItemPublishedParsed(rssItem)
item.Author = t.translateItemAuthor(rssItem)
item.Authors = t.translateItemAuthors(rssItem)
item.GUID = t.translateItemGUID(rssItem)
item.Image = t.translateItemImage(rssItem)
item.Categories = t.translateItemCategories(rssItem)
@ -74,6 +78,7 @@ func (t *DefaultRSSTranslator) translateFeedItem(rssItem *rss.Item) (item *Item)
item.DublinCoreExt = rssItem.DublinCoreExt
item.ITunesExt = rssItem.ITunesExt
item.Extensions = rssItem.Extensions
item.Custom = rssItem.Custom
return
}
@ -113,6 +118,26 @@ func (t *DefaultRSSTranslator) translateFeedFeedLink(rss *rss.Feed) (link string
return
}
func (t *DefaultRSSTranslator) translateFeedLinks(rss *rss.Feed) (links []string) {
if rss.Link != "" {
links = append(links, rss.Link)
}
if rss.ITunesExt != nil && rss.ITunesExt.Subtitle != "" {
links = append(links, rss.ITunesExt.Subtitle)
}
atomExtensions := t.extensionsForKeys([]string{"atom", "atom10", "atom03"}, rss.Extensions)
for _, ex := range atomExtensions {
if lks, ok := ex["link"]; ok {
for _, l := range lks {
if l.Attrs["rel"] == "" || l.Attrs["rel"] == "alternate" || l.Attrs["rel"] == "self" {
links = append(links, l.Attrs["href"])
}
}
}
}
return
}
func (t *DefaultRSSTranslator) translateFeedUpdated(rss *rss.Feed) (updated string) {
if rss.LastBuildDate != "" {
updated = rss.LastBuildDate
@ -175,6 +200,13 @@ func (t *DefaultRSSTranslator) translateFeedAuthor(rss *rss.Feed) (author *Perso
return
}
func (t *DefaultRSSTranslator) translateFeedAuthors(rss *rss.Feed) (authors []*Person) {
if author := t.translateFeedAuthor(rss); author != nil {
authors = []*Person{author}
}
return
}
func (t *DefaultRSSTranslator) translateFeedLanguage(rss *rss.Feed) (language string) {
if rss.Language != "" {
language = rss.Language
@ -279,6 +311,12 @@ func (t *DefaultRSSTranslator) translateItemContent(rssItem *rss.Item) (content
func (t *DefaultRSSTranslator) translateItemLink(rssItem *rss.Item) (link string) {
return rssItem.Link
}
func (t *DefaultRSSTranslator) translateItemLinks(rssItem *rss.Item) (links []string) {
if rssItem.Link == "" {
return nil
}
return []string{rssItem.Link}
}
func (t *DefaultRSSTranslator) translateItemUpdated(rssItem *rss.Item) (updated string) {
if rssItem.DublinCoreExt != nil && rssItem.DublinCoreExt.Date != nil {
@ -347,6 +385,14 @@ func (t *DefaultRSSTranslator) translateItemAuthor(rssItem *rss.Item) (author *P
return
}
func (t *DefaultRSSTranslator) translateItemAuthors(rssItem *rss.Item) (authors []*Person) {
if author := t.translateItemAuthor(rssItem); author != nil {
authors = []*Person{author}
}
return
}
func (t *DefaultRSSTranslator) translateItemGUID(rssItem *rss.Item) (guid string) {
if rssItem.GUID != nil {
guid = rssItem.GUID.Value
@ -449,9 +495,11 @@ func (t *DefaultAtomTranslator) Translate(feed interface{}) (*Feed, error) {
result.Description = t.translateFeedDescription(atom)
result.Link = t.translateFeedLink(atom)
result.FeedLink = t.translateFeedFeedLink(atom)
result.Links = t.translateFeedLinks(atom)
result.Updated = t.translateFeedUpdated(atom)
result.UpdatedParsed = t.translateFeedUpdatedParsed(atom)
result.Author = t.translateFeedAuthor(atom)
result.Authors = t.translateFeedAuthors(atom)
result.Language = t.translateFeedLanguage(atom)
result.Image = t.translateFeedImage(atom)
result.Copyright = t.translateFeedCopyright(atom)
@ -470,11 +518,13 @@ func (t *DefaultAtomTranslator) translateFeedItem(entry *atom.Entry) (item *Item
item.Description = t.translateItemDescription(entry)
item.Content = t.translateItemContent(entry)
item.Link = t.translateItemLink(entry)
item.Links = t.translateItemLinks(entry)
item.Updated = t.translateItemUpdated(entry)
item.UpdatedParsed = t.translateItemUpdatedParsed(entry)
item.Published = t.translateItemPublished(entry)
item.PublishedParsed = t.translateItemPublishedParsed(entry)
item.Author = t.translateItemAuthor(entry)
item.Authors = t.translateItemAuthors(entry)
item.GUID = t.translateItemGUID(entry)
item.Image = t.translateItemImage(entry)
item.Categories = t.translateItemCategories(entry)
@ -507,6 +557,15 @@ func (t *DefaultAtomTranslator) translateFeedFeedLink(atom *atom.Feed) (link str
return
}
func (t *DefaultAtomTranslator) translateFeedLinks(atom *atom.Feed) (links []string) {
for _, l := range atom.Links {
if l.Rel == "" || l.Rel == "alternate" || l.Rel == "self" {
links = append(links, l.Href)
}
}
return
}
func (t *DefaultAtomTranslator) translateFeedUpdated(atom *atom.Feed) (updated string) {
return atom.Updated
}
@ -526,6 +585,22 @@ func (t *DefaultAtomTranslator) translateFeedAuthor(atom *atom.Feed) (author *Pe
return
}
func (t *DefaultAtomTranslator) translateFeedAuthors(atom *atom.Feed) (authors []*Person) {
if atom.Authors != nil {
authors = []*Person{}
for _, a := range atom.Authors {
authors = append(authors, &Person{
Name: a.Name,
Email: a.Email,
})
}
}
return
}
func (t *DefaultAtomTranslator) translateFeedLanguage(atom *atom.Feed) (language string) {
return atom.Language
}
@ -600,6 +675,15 @@ func (t *DefaultAtomTranslator) translateItemLink(entry *atom.Entry) (link strin
return
}
func (t *DefaultAtomTranslator) translateItemLinks(entry *atom.Entry) (links []string) {
for _, l := range entry.Links {
if l.Rel == "" || l.Rel == "alternate" || l.Rel == "self" {
links = append(links, l.Href)
}
}
return
}
func (t *DefaultAtomTranslator) translateItemUpdated(entry *atom.Entry) (updated string) {
return entry.Updated
}
@ -608,12 +692,20 @@ func (t *DefaultAtomTranslator) translateItemUpdatedParsed(entry *atom.Entry) (u
return entry.UpdatedParsed
}
func (t *DefaultAtomTranslator) translateItemPublished(entry *atom.Entry) (updated string) {
return entry.Published
func (t *DefaultAtomTranslator) translateItemPublished(entry *atom.Entry) (published string) {
published = entry.Published
if published == "" {
published = entry.Updated
}
return
}
func (t *DefaultAtomTranslator) translateItemPublishedParsed(entry *atom.Entry) (updated *time.Time) {
return entry.PublishedParsed
func (t *DefaultAtomTranslator) translateItemPublishedParsed(entry *atom.Entry) (published *time.Time) {
published = entry.PublishedParsed
if published == nil {
published = entry.UpdatedParsed
}
return
}
func (t *DefaultAtomTranslator) translateItemAuthor(entry *atom.Entry) (author *Person) {
@ -626,6 +718,19 @@ func (t *DefaultAtomTranslator) translateItemAuthor(entry *atom.Entry) (author *
return
}
func (t *DefaultAtomTranslator) translateItemAuthors(entry *atom.Entry) (authors []*Person) {
if entry.Authors != nil {
authors = []*Person{}
for _, a := range entry.Authors {
authors = append(authors, &Person{
Name: a.Name,
Email: a.Email,
})
}
}
return
}
func (t *DefaultAtomTranslator) translateItemGUID(entry *atom.Entry) (guid string) {
return entry.ID
}
@ -707,9 +812,12 @@ func (t *DefaultJSONTranslator) Translate(feed interface{}) (*Feed, error) {
result.Title = t.translateFeedTitle(json)
result.Link = t.translateFeedLink(json)
result.FeedLink = t.translateFeedFeedLink(json)
result.Links = t.translateFeedLinks(json)
result.Description = t.translateFeedDescription(json)
result.Image = t.translateFeedImage(json)
result.Author = t.translateFeedAuthor(json)
result.Authors = t.translateFeedAuthors(json)
result.Language = t.translateFeedLanguage(json)
result.Items = t.translateFeedItems(json)
result.Updated = t.translateFeedUpdated(json)
result.UpdatedParsed = t.translateFeedUpdatedParsed(json)
@ -729,6 +837,7 @@ func (t *DefaultJSONTranslator) translateFeedItem(jsonItem *json.Item) (item *It
item = &Item{}
item.GUID = t.translateItemGUID(jsonItem)
item.Link = t.translateItemLink(jsonItem)
item.Links = t.translateItemLinks(jsonItem)
item.Title = t.translateItemTitle(jsonItem)
item.Content = t.translateItemContent(jsonItem)
item.Description = t.translateItemDescription(jsonItem)
@ -738,6 +847,7 @@ func (t *DefaultJSONTranslator) translateFeedItem(jsonItem *json.Item) (item *It
item.Updated = t.translateItemUpdated(jsonItem)
item.UpdatedParsed = t.translateItemUpdatedParsed(jsonItem)
item.Author = t.translateItemAuthor(jsonItem)
item.Authors = t.translateItemAuthors(jsonItem)
item.Categories = t.translateItemCategories(jsonItem)
item.Enclosures = t.translateItemEnclosures(jsonItem)
// TODO ExternalURL is missing in global Feed
@ -770,6 +880,16 @@ func (t *DefaultJSONTranslator) translateFeedFeedLink(json *json.Feed) (link str
return
}
func (t *DefaultJSONTranslator) translateFeedLinks(json *json.Feed) (links []string) {
if json.HomePageURL != "" {
links = append(links, json.HomePageURL)
}
if json.FeedURL != "" {
links = append(links, json.FeedURL)
}
return
}
func (t *DefaultJSONTranslator) translateFeedUpdated(json *json.Feed) (updated string) {
if len(json.Items) > 0 {
updated = json.Items[0].DateModified
@ -816,6 +936,31 @@ func (t *DefaultJSONTranslator) translateFeedAuthor(json *json.Feed) (author *Pe
return
}
func (t *DefaultJSONTranslator) translateFeedAuthors(json *json.Feed) (authors []*Person) {
if json.Authors != nil {
authors = []*Person{}
for _, a := range json.Authors {
name, address := shared.ParseNameAddress(a.Name)
author := &Person{}
author.Name = name
author.Email = address
authors = append(authors, author)
}
} else if author := t.translateFeedAuthor(json); author != nil {
authors = []*Person{author}
}
// Author.URL is missing in global feed
// Author.Avatar is missing in global feed
return
}
func (t *DefaultJSONTranslator) translateFeedLanguage(json *json.Feed) (language string) {
language = json.Language
return
}
func (t *DefaultJSONTranslator) translateFeedImage(json *json.Feed) (image *Image) {
// Using the Icon rather than the image
// icon (optional, string) is the URL of an image for the feed suitable to be used in a timeline. It should be square and relatively large — such as 512 x 512
@ -861,6 +1006,16 @@ func (t *DefaultJSONTranslator) translateItemLink(jsonItem *json.Item) (link str
return jsonItem.URL
}
func (t *DefaultJSONTranslator) translateItemLinks(jsonItem *json.Item) (links []string) {
if jsonItem.URL != "" {
links = append(links, jsonItem.URL)
}
if jsonItem.ExternalURL != "" {
links = append(links, jsonItem.ExternalURL)
}
return
}
func (t *DefaultJSONTranslator) translateItemUpdated(jsonItem *json.Item) (updated string) {
if jsonItem.DateModified != "" {
updated = jsonItem.DateModified
@ -907,6 +1062,26 @@ func (t *DefaultJSONTranslator) translateItemAuthor(jsonItem *json.Item) (author
return
}
func (t *DefaultJSONTranslator) translateItemAuthors(jsonItem *json.Item) (authors []*Person) {
if jsonItem.Authors != nil {
authors = []*Person{}
for _, a := range jsonItem.Authors {
name, address := shared.ParseNameAddress(a.Name)
author := &Person{}
author.Name = name
author.Email = address
authors = append(authors, author)
}
} else if author := t.translateItemAuthor(jsonItem); author != nil {
authors = []*Person{author}
}
// Author.URL is missing in global feed
// Author.Avatar is missing in global feed
return
}
func (t *DefaultJSONTranslator) translateItemGUID(jsonItem *json.Item) (guid string) {
if jsonItem.ID != "" {
guid = jsonItem.ID

View file

@ -1,7 +1,7 @@
language: go
go:
- 1.8.x
- 1.9.x
- 1.x
before_install:

View file

@ -1,15 +1,9 @@
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
[[projects]]
name = "github.com/modern-go/concurrent"
packages = ["."]
revision = "e0a39a4cb4216ea8db28e22a69f4ec25610d513a"
version = "1.0.0"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "daee8a88b3498b61c5640056665b8b9eea062006f5e596bbb6a3ed9119a11ec7"
input-imports = []
solver-name = "gps-cdcl"
solver-version = 1

View file

@ -26,10 +26,6 @@
ignored = []
[[constraint]]
name = "github.com/modern-go/concurrent"
version = "1.0.0"
[prune]
go-tests = true
unused-packages = true

3
vendor/github.com/modern-go/reflect2/go.mod generated vendored Normal file
View file

@ -0,0 +1,3 @@
module github.com/modern-go/reflect2
go 1.12

23
vendor/github.com/modern-go/reflect2/go_above_118.go generated vendored Normal file
View file

@ -0,0 +1,23 @@
//+build go1.18
package reflect2
import (
"unsafe"
)
// m escapes into the return value, but the caller of mapiterinit
// doesn't let the return value escape.
//go:noescape
//go:linkname mapiterinit reflect.mapiterinit
func mapiterinit(rtype unsafe.Pointer, m unsafe.Pointer, it *hiter)
func (type2 *UnsafeMapType) UnsafeIterate(obj unsafe.Pointer) MapIterator {
var it hiter
mapiterinit(type2.rtype, *(*unsafe.Pointer)(obj), &it)
return &UnsafeMapIterator{
hiter: &it,
pKeyRType: type2.pKeyRType,
pElemRType: type2.pElemRType,
}
}

View file

@ -1,8 +0,0 @@
//+build go1.7
package reflect2
import "unsafe"
//go:linkname resolveTypeOff reflect.resolveTypeOff
func resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer

View file

@ -6,6 +6,9 @@ import (
"unsafe"
)
//go:linkname resolveTypeOff reflect.resolveTypeOff
func resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer
//go:linkname makemap reflect.makemap
func makemap(rtype unsafe.Pointer, cap int) (m unsafe.Pointer)

21
vendor/github.com/modern-go/reflect2/go_below_118.go generated vendored Normal file
View file

@ -0,0 +1,21 @@
//+build !go1.18
package reflect2
import (
"unsafe"
)
// m escapes into the return value, but the caller of mapiterinit
// doesn't let the return value escape.
//go:noescape
//go:linkname mapiterinit reflect.mapiterinit
func mapiterinit(rtype unsafe.Pointer, m unsafe.Pointer) (val *hiter)
func (type2 *UnsafeMapType) UnsafeIterate(obj unsafe.Pointer) MapIterator {
return &UnsafeMapIterator{
hiter: mapiterinit(type2.rtype, *(*unsafe.Pointer)(obj)),
pKeyRType: type2.pKeyRType,
pElemRType: type2.pElemRType,
}
}

View file

@ -1,9 +0,0 @@
//+build !go1.7
package reflect2
import "unsafe"
func resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer {
return nil
}

View file

@ -1,14 +0,0 @@
//+build !go1.9
package reflect2
import (
"unsafe"
)
//go:linkname makemap reflect.makemap
func makemap(rtype unsafe.Pointer) (m unsafe.Pointer)
func makeMapWithSize(rtype unsafe.Pointer, cap int) unsafe.Pointer {
return makemap(rtype)
}

View file

@ -1,8 +1,9 @@
package reflect2
import (
"github.com/modern-go/concurrent"
"reflect"
"runtime"
"sync"
"unsafe"
)
@ -130,13 +131,13 @@ var ConfigSafe = Config{UseSafeImplementation: true}.Froze()
type frozenConfig struct {
useSafeImplementation bool
cache *concurrent.Map
cache *sync.Map
}
func (cfg Config) Froze() *frozenConfig {
return &frozenConfig{
useSafeImplementation: cfg.UseSafeImplementation,
cache: concurrent.NewMap(),
cache: new(sync.Map),
}
}
@ -288,11 +289,12 @@ func NoEscape(p unsafe.Pointer) unsafe.Pointer {
}
func UnsafeCastString(str string) []byte {
bytes := make([]byte, 0)
stringHeader := (*reflect.StringHeader)(unsafe.Pointer(&str))
sliceHeader := &reflect.SliceHeader{
Data: stringHeader.Data,
Cap: stringHeader.Len,
Len: stringHeader.Len,
}
return *(*[]byte)(unsafe.Pointer(sliceHeader))
sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&bytes))
sliceHeader.Data = stringHeader.Data
sliceHeader.Cap = stringHeader.Len
sliceHeader.Len = stringHeader.Len
runtime.KeepAlive(str)
return bytes
}

View file

@ -1,12 +0,0 @@
#!/usr/bin/env bash
set -e
echo "" > coverage.txt
for d in $(go list github.com/modern-go/reflect2-tests/... | grep -v vendor); do
go test -coverprofile=profile.out -coverpkg=github.com/modern-go/reflect2 $d
if [ -f profile.out ]; then
cat profile.out >> coverage.txt
rm profile.out
fi
done

View file

@ -1,17 +1,13 @@
// +build !gccgo
package reflect2
import (
"reflect"
"runtime"
"strings"
"sync"
"unsafe"
)
// typelinks1 for 1.5 ~ 1.6
//go:linkname typelinks1 reflect.typelinks
func typelinks1() [][]unsafe.Pointer
// typelinks2 for 1.7 ~
//go:linkname typelinks2 reflect.typelinks
func typelinks2() (sections []unsafe.Pointer, offset [][]int32)
@ -27,49 +23,10 @@ func discoverTypes() {
types = make(map[string]reflect.Type)
packages = make(map[string]map[string]reflect.Type)
ver := runtime.Version()
if ver == "go1.5" || strings.HasPrefix(ver, "go1.5.") {
loadGo15Types()
} else if ver == "go1.6" || strings.HasPrefix(ver, "go1.6.") {
loadGo15Types()
} else {
loadGo17Types()
}
loadGoTypes()
}
func loadGo15Types() {
var obj interface{} = reflect.TypeOf(0)
typePtrss := typelinks1()
for _, typePtrs := range typePtrss {
for _, typePtr := range typePtrs {
(*emptyInterface)(unsafe.Pointer(&obj)).word = typePtr
typ := obj.(reflect.Type)
if typ.Kind() == reflect.Ptr && typ.Elem().Kind() == reflect.Struct {
loadedType := typ.Elem()
pkgTypes := packages[loadedType.PkgPath()]
if pkgTypes == nil {
pkgTypes = map[string]reflect.Type{}
packages[loadedType.PkgPath()] = pkgTypes
}
types[loadedType.String()] = loadedType
pkgTypes[loadedType.Name()] = loadedType
}
if typ.Kind() == reflect.Slice && typ.Elem().Kind() == reflect.Ptr &&
typ.Elem().Elem().Kind() == reflect.Struct {
loadedType := typ.Elem().Elem()
pkgTypes := packages[loadedType.PkgPath()]
if pkgTypes == nil {
pkgTypes = map[string]reflect.Type{}
packages[loadedType.PkgPath()] = pkgTypes
}
types[loadedType.String()] = loadedType
pkgTypes[loadedType.Name()] = loadedType
}
}
}
}
func loadGo17Types() {
func loadGoTypes() {
var obj interface{} = reflect.TypeOf(0)
sections, offset := typelinks2()
for i, offs := range offset {

View file

@ -19,18 +19,12 @@ func typedslicecopy(elemType unsafe.Pointer, dst, src sliceHeader) int
//go:linkname mapassign reflect.mapassign
//go:noescape
func mapassign(rtype unsafe.Pointer, m unsafe.Pointer, key, val unsafe.Pointer)
func mapassign(rtype unsafe.Pointer, m unsafe.Pointer, key unsafe.Pointer, val unsafe.Pointer)
//go:linkname mapaccess reflect.mapaccess
//go:noescape
func mapaccess(rtype unsafe.Pointer, m unsafe.Pointer, key unsafe.Pointer) (val unsafe.Pointer)
// m escapes into the return value, but the caller of mapiterinit
// doesn't let the return value escape.
//go:noescape
//go:linkname mapiterinit reflect.mapiterinit
func mapiterinit(rtype unsafe.Pointer, m unsafe.Pointer) *hiter
//go:noescape
//go:linkname mapiternext reflect.mapiternext
func mapiternext(it *hiter)
@ -42,9 +36,21 @@ func ifaceE2I(rtype unsafe.Pointer, src interface{}, dst unsafe.Pointer)
// If you modify hiter, also change cmd/internal/gc/reflect.go to indicate
// the layout of this structure.
type hiter struct {
key unsafe.Pointer // Must be in first position. Write nil to indicate iteration end (see cmd/internal/gc/range.go).
value unsafe.Pointer // Must be in second position (see cmd/internal/gc/range.go).
// rest fields are ignored
key unsafe.Pointer
value unsafe.Pointer
t unsafe.Pointer
h unsafe.Pointer
buckets unsafe.Pointer
bptr unsafe.Pointer
overflow *[]unsafe.Pointer
oldoverflow *[]unsafe.Pointer
startBucket uintptr
offset uint8
wrapped bool
B uint8
i uint8
bucket uintptr
checkBucket uintptr
}
// add returns p+x.

View file

@ -107,14 +107,6 @@ func (type2 *UnsafeMapType) Iterate(obj interface{}) MapIterator {
return type2.UnsafeIterate(objEFace.data)
}
func (type2 *UnsafeMapType) UnsafeIterate(obj unsafe.Pointer) MapIterator {
return &UnsafeMapIterator{
hiter: mapiterinit(type2.rtype, *(*unsafe.Pointer)(obj)),
pKeyRType: type2.pKeyRType,
pElemRType: type2.pElemRType,
}
}
type UnsafeMapIterator struct {
*hiter
pKeyRType unsafe.Pointer

View file

@ -663,6 +663,24 @@ func inHeadIM(p *parser) bool {
// Ignore the token.
return true
case a.Template:
// TODO: remove this divergence from the HTML5 spec.
//
// We don't handle all of the corner cases when mixing foreign
// content (i.e. <math> or <svg>) with <template>. Without this
// early return, we can get into an infinite loop, possibly because
// of the "TODO... further divergence" a little below.
//
// As a workaround, if we are mixing foreign content and templates,
// just ignore the rest of the HTML. Foreign content is rare and a
// relatively old HTML feature. Templates are also rare and a
// relatively new HTML feature. Their combination is very rare.
for _, e := range p.oe {
if e.Namespace != "" {
p.im = ignoreTheRemainingTokens
return true
}
}
p.addElement()
p.afe = append(p.afe, &scopeMarker)
p.framesetOK = false
@ -683,7 +701,7 @@ func inHeadIM(p *parser) bool {
if !p.oe.contains(a.Template) {
return true
}
// TODO: remove this divergence from the HTML5 spec.
// TODO: remove this further divergence from the HTML5 spec.
//
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
p.generateImpliedEndTags()
@ -2127,6 +2145,10 @@ func afterAfterFramesetIM(p *parser) bool {
return true
}
func ignoreTheRemainingTokens(p *parser) bool {
return true
}
const whitespaceOrNUL = whitespace + "\x00"
// Section 12.2.6.5

View file

@ -251,6 +251,13 @@ func (t Tag) Parent() Tag {
// ParseExtension parses s as an extension and returns it on success.
func ParseExtension(s string) (ext string, err error) {
defer func() {
if recover() != nil {
ext = ""
err = ErrSyntax
}
}()
scan := makeScannerString(s)
var end int
if n := len(scan.token); n != 1 {
@ -303,9 +310,17 @@ func (t Tag) Extensions() []string {
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
//
// If there are multiple types associated with a key, only the first will be
// returned. If there is no type associated with a key, it returns the empty
// string.
func (t Tag) TypeForKey(key string) string {
if start, end, _ := t.findTypeForKey(key); end != start {
return t.str[start:end]
if _, start, end, _ := t.findTypeForKey(key); end != start {
s := t.str[start:end]
if p := strings.IndexByte(s, '-'); p >= 0 {
s = s[:p]
}
return s
}
return ""
}
@ -329,13 +344,13 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
// Remove the setting if value is "".
if value == "" {
start, end, _ := t.findTypeForKey(key)
if start != end {
// Remove key tag and leading '-'.
start -= 4
start, sep, end, _ := t.findTypeForKey(key)
if start != sep {
// Remove a possible empty extension.
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
switch {
case t.str[start-2] != '-': // has previous elements.
case end == len(t.str), // end of string
end+2 < len(t.str) && t.str[end+2] == '-': // end of extension
start -= 2
}
if start == int(t.pVariant) && end == len(t.str) {
@ -381,14 +396,14 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
t.str = string(buf[:uStart+len(b)])
} else {
s := t.str
start, end, hasExt := t.findTypeForKey(key)
if start == end {
start, sep, end, hasExt := t.findTypeForKey(key)
if start == sep {
if hasExt {
b = b[2:]
}
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
t.str = fmt.Sprintf("%s-%s%s", s[:sep], b, s[end:])
} else {
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
t.str = fmt.Sprintf("%s-%s%s", s[:start+3], value, s[end:])
}
}
return t, nil
@ -399,10 +414,10 @@ func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
// wasn't found. The hasExt return value reports whether an -u extension was present.
// Note: the extensions are typically very small and are likely to contain
// only one key-type pair.
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
func (t Tag) findTypeForKey(key string) (start, sep, end int, hasExt bool) {
p := int(t.pExt)
if len(key) != 2 || p == len(t.str) || p == 0 {
return p, p, false
return p, p, p, false
}
s := t.str
@ -410,10 +425,10 @@ func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
for p++; s[p] != 'u'; p++ {
if s[p] > 'u' {
p--
return p, p, false
return p, p, p, false
}
if p = nextExtension(s, p); p == len(s) {
return len(s), len(s), false
return len(s), len(s), len(s), false
}
}
// Proceed to the hyphen following the extension name.
@ -424,40 +439,28 @@ func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
// Iterate over keys until we get the end of a section.
for {
// p points to the hyphen preceding the current token.
if p3 := p + 3; s[p3] == '-' {
// Found a key.
// Check whether we just processed the key that was requested.
if curKey == key {
return start, p, true
end = p
for p++; p < len(s) && s[p] != '-'; p++ {
}
// Set to the next key and continue scanning type tokens.
curKey = s[p+1 : p3]
n := p - end - 1
if n <= 2 && curKey == key {
if sep < end {
sep++
}
return start, sep, end, true
}
switch n {
case 0, // invalid string
1: // next extension
return end, end, end, true
case 2:
// next key
curKey = s[end+1 : p]
if curKey > key {
return p, p, true
return end, end, end, true
}
// Start of the type token sequence.
start = p + 4
// A type is at least 3 characters long.
p += 7 // 4 + 3
} else {
// Attribute or type, which is at least 3 characters long.
p += 4
}
// p points past the third character of a type or attribute.
max := p + 5 // maximum length of token plus hyphen.
if len(s) < max {
max = len(s)
}
for ; p < max && s[p] != '-'; p++ {
}
// Bail if we have exhausted all tokens or if the next token starts
// a new extension.
if p == len(s) || s[p+2] == '-' {
if curKey == key {
return start, p, true
}
return p, p, true
start = end
sep = p
}
}
}
@ -465,7 +468,14 @@ func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
// ParseBase parses a 2- or 3-letter ISO 639 code.
// It returns a ValueError if s is a well-formed but unknown language identifier
// or another error if another error occurred.
func ParseBase(s string) (Language, error) {
func ParseBase(s string) (l Language, err error) {
defer func() {
if recover() != nil {
l = 0
err = ErrSyntax
}
}()
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
@ -476,7 +486,14 @@ func ParseBase(s string) (Language, error) {
// ParseScript parses a 4-letter ISO 15924 code.
// It returns a ValueError if s is a well-formed but unknown script identifier
// or another error if another error occurred.
func ParseScript(s string) (Script, error) {
func ParseScript(s string) (scr Script, err error) {
defer func() {
if recover() != nil {
scr = 0
err = ErrSyntax
}
}()
if len(s) != 4 {
return 0, ErrSyntax
}
@ -493,7 +510,14 @@ func EncodeM49(r int) (Region, error) {
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
// It returns a ValueError if s is a well-formed but unknown region identifier
// or another error if another error occurred.
func ParseRegion(s string) (Region, error) {
func ParseRegion(s string) (r Region, err error) {
defer func() {
if recover() != nil {
r = 0
err = ErrSyntax
}
}()
if n := len(s); n < 2 || 3 < n {
return 0, ErrSyntax
}
@ -582,7 +606,14 @@ type Variant struct {
// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (Variant, error) {
func ParseVariant(s string) (v Variant, err error) {
defer func() {
if recover() != nil {
v = Variant{}
err = ErrSyntax
}
}()
s = strings.ToLower(s)
if id, ok := variantIndex[s]; ok {
return Variant{id, s}, nil

View file

@ -138,7 +138,7 @@ func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
b = make([]byte, n)
copy(b, s.b[:oldStart])
} else {
b = s.b[:n:n]
b = s.b[:n]
}
copy(b[end:], s.b[oldEnd:])
s.b = b
@ -232,6 +232,13 @@ func Parse(s string) (t Tag, err error) {
if s == "" {
return Und, ErrSyntax
}
defer func() {
if recover() != nil {
t = Und
err = ErrSyntax
return
}
}()
if len(s) <= maxAltTaglen {
b := [maxAltTaglen]byte{}
for i, c := range s {
@ -483,7 +490,7 @@ func parseExtensions(scan *scanner) int {
func parseExtension(scan *scanner) int {
start, end := scan.start, scan.end
switch scan.token[0] {
case 'u':
case 'u': // https://www.ietf.org/rfc/rfc6067.txt
attrStart := end
scan.scan()
for last := []byte{}; len(scan.token) > 2; scan.scan() {
@ -503,27 +510,29 @@ func parseExtension(scan *scanner) int {
last = scan.token
end = scan.end
}
// Scan key-type sequences. A key is of length 2 and may be followed
// by 0 or more "type" subtags from 3 to the maximum of 8 letters.
var last, key []byte
for attrEnd := end; len(scan.token) == 2; last = key {
key = scan.token
keyEnd := scan.end
end = scan.acceptMinSize(3)
end = scan.end
for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
end = scan.end
}
// TODO: check key value validity
if keyEnd == end || bytes.Compare(key, last) != 1 {
if bytes.Compare(key, last) != 1 || scan.err != nil {
// We have an invalid key or the keys are not sorted.
// Start scanning keys from scratch and reorder.
p := attrEnd + 1
scan.next = p
keys := [][]byte{}
for scan.scan(); len(scan.token) == 2; {
keyStart, keyEnd := scan.start, scan.end
end = scan.acceptMinSize(3)
if keyEnd != end {
keys = append(keys, scan.b[keyStart:end])
} else {
scan.setError(ErrSyntax)
end = keyStart
keyStart := scan.start
end = scan.end
for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
end = scan.end
}
keys = append(keys, scan.b[keyStart:end])
}
sort.Stable(bytesSort{keys, 2})
if n := len(keys); n > 0 {
@ -547,7 +556,7 @@ func parseExtension(scan *scanner) int {
break
}
}
case 't':
case 't': // https://www.ietf.org/rfc/rfc6497.txt
scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan)

View file

@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.2
// +build !go1.2
package language

View file

@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.2
// +build go1.2
package language

View file

@ -412,6 +412,10 @@ func (t Tag) Extensions() []Extension {
// are of the allowed values defined for the Unicode locale extension ('u') in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
//
// If there are multiple types associated with a key, only the first will be
// returned. If there is no type associated with a key, it returns the empty
// string.
func (t Tag) TypeForKey(key string) string {
if !compact.Tag(t).MayHaveExtensions() {
if key != "rg" && key != "va" {

View file

@ -43,6 +43,13 @@ func Parse(s string) (t Tag, err error) {
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// The resulting tag is canonicalized using the canonicalization type c.
func (c CanonType) Parse(s string) (t Tag, err error) {
defer func() {
if recover() != nil {
t = Tag{}
err = language.ErrSyntax
}
}()
tt, err := language.Parse(s)
if err != nil {
return makeTag(tt), err
@ -79,6 +86,13 @@ func Compose(part ...interface{}) (t Tag, err error) {
// tag is returned after canonicalizing using CanonType c. If one or more errors
// are encountered, one of the errors is returned.
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
defer func() {
if recover() != nil {
t = Tag{}
err = language.ErrSyntax
}
}()
var b language.Builder
if err = update(&b, part...); err != nil {
return und, err
@ -142,6 +156,14 @@ var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
// Tags with a weight of zero will be dropped. An error will be returned if the
// input could not be parsed.
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
defer func() {
if recover() != nil {
tag = nil
q = nil
err = language.ErrSyntax
}
}()
var entry string
for s != "" {
if entry, s = split(s, ','); entry == "" {

View file

@ -47,7 +47,7 @@ const (
_Zzzz = 251
)
var regionToGroups = []uint8{ // 357 elements
var regionToGroups = []uint8{ // 358 elements
// Entry 0 - 3F
0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
@ -98,8 +98,8 @@ var regionToGroups = []uint8{ // 357 elements
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00,
} // Size: 381 bytes
0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
} // Size: 382 bytes
var paradigmLocales = [][3]uint16{ // 3 elements
0: [3]uint16{0x139, 0x0, 0x7b},
@ -295,4 +295,4 @@ var matchRegion = []regionIntelligibility{ // 15 elements
14: {lang: 0x529, script: 0x3c, group: 0x80, distance: 0x5},
} // Size: 114 bytes
// Total table size 1471 bytes (1KiB); checksum: 4CB1CD46
// Total table size 1472 bytes (1KiB); checksum: F86C669

View file

@ -6,7 +6,6 @@
### Converts HTML into text of the markdown-flavored variety
## Introduction
Ensure your emails are readable by all!
@ -19,7 +18,6 @@ There are still lots of improvements to be had, but FWIW this has worked fine fo
It requires go 1.x or newer ;)
## Download the package
```bash
@ -28,6 +26,8 @@ go get jaytaylor.com/html2text
## Example usage
### Library
```go
package main
@ -110,6 +110,11 @@ Here is some more information:
+-------------+-------------+
```
### Command line
```
echo '<div>hi</div>' | html2text
```
## Unit-tests
@ -119,12 +124,10 @@ Running the unit-tests is straightforward and standard:
go test
```
# License
Permissive MIT license.
## Contact
You are more than welcome to open issues and send pull requests if you find a bug or want a new feature.

View file

@ -18,6 +18,7 @@ type Options struct {
PrettyTables bool // Turns on pretty ASCII rendering for table elements.
PrettyTablesOptions *PrettyTablesOptions // Configures pretty ASCII rendering for table elements.
OmitLinks bool // Turns on omitting links
TextOnly bool // Returns only plain text
}
// PrettyTablesOptions overrides tablewriter behaviors
@ -157,6 +158,9 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
}
str := subCtx.buf.String()
if ctx.options.TextOnly {
return ctx.emit(str + ".\n\n")
}
dividerLen := 0
for _, line := range strings.Split(str, "\n") {
if lineLen := len([]rune(line)); lineLen-1 > dividerLen {
@ -177,7 +181,9 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
case atom.Blockquote:
ctx.blockquoteLevel++
if !ctx.options.TextOnly {
ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel) + " "
}
if err := ctx.emit("\n"); err != nil {
return err
}
@ -190,7 +196,9 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
return err
}
ctx.blockquoteLevel--
if !ctx.options.TextOnly {
ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel)
}
if ctx.blockquoteLevel > 0 {
ctx.prefix += " "
}
@ -213,9 +221,11 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
return err
case atom.Li:
if !ctx.options.TextOnly {
if err := ctx.emit("* "); err != nil {
return err
}
}
if err := ctx.traverseChildren(node); err != nil {
return err
@ -230,6 +240,9 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
return err
}
str := subCtx.buf.String()
if ctx.options.TextOnly {
return ctx.emit(str + ".")
}
return ctx.emit("*" + str + "*")
case atom.A:
@ -254,7 +267,7 @@ func (ctx *textifyTraverseContext) handleElement(node *html.Node) error {
if attrVal := getAttrVal(node, "href"); attrVal != "" {
attrVal = ctx.normalizeHrefLink(attrVal)
// Don't print link href if it matches link element content or if the link is empty.
if !ctx.options.OmitLinks && attrVal != "" && linkText != attrVal {
if (!ctx.options.OmitLinks && attrVal != "" && linkText != attrVal) || !ctx.options.TextOnly {
hrefLink = "( " + attrVal + " )"
}
}

24
vendor/modules.txt vendored
View file

@ -1,21 +1,21 @@
# github.com/PuerkitoBio/goquery v1.6.1
# github.com/PuerkitoBio/goquery v1.8.0
## explicit
github.com/PuerkitoBio/goquery
# github.com/andybalholm/cascadia v1.2.0
## explicit
# github.com/andybalholm/cascadia v1.3.1
github.com/andybalholm/cascadia
# github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9
## explicit
github.com/chilts/sid
# github.com/json-iterator/go v1.1.10
# github.com/json-iterator/go v1.1.12
## explicit
github.com/json-iterator/go
# github.com/mattn/go-runewidth v0.0.10
# github.com/mattn/go-runewidth v0.0.13
## explicit
github.com/mattn/go-runewidth
# github.com/mattn/go-xmpp v0.0.0-20210121082723-b40e1294994d
# github.com/mattn/go-xmpp v0.0.0-20211029151415-912ba614897a
## explicit
github.com/mattn/go-xmpp
# github.com/mmcdole/gofeed v1.1.0
# github.com/mmcdole/gofeed v1.1.3
## explicit
github.com/mmcdole/gofeed
github.com/mmcdole/gofeed/atom
@ -29,26 +29,24 @@ github.com/mmcdole/goxpp
# github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd
## explicit
github.com/modern-go/concurrent
# github.com/modern-go/reflect2 v1.0.1
## explicit
# github.com/modern-go/reflect2 v1.0.2
github.com/modern-go/reflect2
# github.com/olekukonko/tablewriter v0.0.5
## explicit
github.com/olekukonko/tablewriter
# github.com/rivo/uniseg v0.2.0
## explicit
github.com/rivo/uniseg
# github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf
## explicit
github.com/ssor/bom
# github.com/stretchr/testify v1.5.1
## explicit
# golang.org/x/net v0.0.0-20210119194325-5f4716e94777
# golang.org/x/net v0.0.0-20211209124913-491a49abca63
## explicit
golang.org/x/net/html
golang.org/x/net/html/atom
golang.org/x/net/html/charset
# golang.org/x/text v0.3.5
# golang.org/x/text v0.3.7
## explicit
golang.org/x/text/encoding
golang.org/x/text/encoding/charmap
@ -67,6 +65,6 @@ golang.org/x/text/internal/utf8internal
golang.org/x/text/language
golang.org/x/text/runes
golang.org/x/text/transform
# jaytaylor.com/html2text v0.0.0-20200412013138-3577fbdbcff7
# jaytaylor.com/html2text v0.0.0-20211105163654-bc68cce691ba
## explicit
jaytaylor.com/html2text