feed-to-muc/vendor/github.com/andybalholm/cascadia/selector.go

587 lines
14 KiB
Go
Raw Normal View History

2019-05-31 11:07:56 +02:00
package cascadia
import (
"fmt"
"regexp"
"strings"
"golang.org/x/net/html"
)
2020-10-24 12:40:39 +02:00
// Matcher is the interface for basic selector functionality.
// Match returns whether a selector matches n.
type Matcher interface {
Match(n *html.Node) bool
}
2019-05-31 11:07:56 +02:00
2020-10-24 12:40:39 +02:00
// Sel is the interface for all the functionality provided by selectors.
type Sel interface {
Matcher
Specificity() Specificity
2019-05-31 11:07:56 +02:00
2020-10-24 12:40:39 +02:00
// Returns a CSS input compiling to this selector.
String() string
// Returns a pseudo-element, or an empty string.
PseudoElement() string
}
// Parse parses a selector. Use `ParseWithPseudoElement`
// if you need support for pseudo-elements.
func Parse(sel string) (Sel, error) {
p := &parser{s: sel}
compiled, err := p.parseSelector()
if err != nil {
return nil, err
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
// ParseWithPseudoElement parses a single selector,
// with support for pseudo-element.
func ParseWithPseudoElement(sel string) (Sel, error) {
p := &parser{s: sel, acceptPseudoElements: true}
compiled, err := p.parseSelector()
if err != nil {
return nil, err
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
// ParseGroup parses a selector, or a group of selectors separated by commas.
// Use `ParseGroupWithPseudoElements`
// if you need support for pseudo-elements.
func ParseGroup(sel string) (SelectorGroup, error) {
2019-05-31 11:07:56 +02:00
p := &parser{s: sel}
compiled, err := p.parseSelectorGroup()
if err != nil {
return nil, err
}
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
}
2020-10-24 12:40:39 +02:00
// ParseGroupWithPseudoElements parses a selector, or a group of selectors separated by commas.
// It supports pseudo-elements.
func ParseGroupWithPseudoElements(sel string) (SelectorGroup, error) {
p := &parser{s: sel, acceptPseudoElements: true}
compiled, err := p.parseSelectorGroup()
if err != nil {
return nil, err
}
if p.i < len(sel) {
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
}
return compiled, nil
}
// A Selector is a function which tells whether a node matches or not.
//
// This type is maintained for compatibility; I recommend using the newer and
// more idiomatic interfaces Sel and Matcher.
type Selector func(*html.Node) bool
// Compile parses a selector and returns, if successful, a Selector object
// that can be used to match against html.Node objects.
func Compile(sel string) (Selector, error) {
compiled, err := ParseGroup(sel)
if err != nil {
return nil, err
}
return Selector(compiled.Match), nil
}
2019-05-31 11:07:56 +02:00
// MustCompile is like Compile, but panics instead of returning an error.
func MustCompile(sel string) Selector {
compiled, err := Compile(sel)
if err != nil {
panic(err)
}
return compiled
}
// MatchAll returns a slice of the nodes that match the selector,
// from n and its children.
func (s Selector) MatchAll(n *html.Node) []*html.Node {
return s.matchAllInto(n, nil)
}
func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node {
if s(n) {
storage = append(storage, n)
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
storage = s.matchAllInto(child, storage)
}
return storage
}
2020-10-24 12:40:39 +02:00
func queryInto(n *html.Node, m Matcher, storage []*html.Node) []*html.Node {
for child := n.FirstChild; child != nil; child = child.NextSibling {
if m.Match(child) {
storage = append(storage, child)
}
storage = queryInto(child, m, storage)
}
return storage
}
// QueryAll returns a slice of all the nodes that match m, from the descendants
// of n.
func QueryAll(n *html.Node, m Matcher) []*html.Node {
return queryInto(n, m, nil)
}
2019-05-31 11:07:56 +02:00
// Match returns true if the node matches the selector.
func (s Selector) Match(n *html.Node) bool {
return s(n)
}
// MatchFirst returns the first node that matches s, from n and its children.
func (s Selector) MatchFirst(n *html.Node) *html.Node {
if s.Match(n) {
return n
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
m := s.MatchFirst(c)
if m != nil {
return m
}
}
return nil
}
2020-10-24 12:40:39 +02:00
// Query returns the first node that matches m, from the descendants of n.
// If none matches, it returns nil.
func Query(n *html.Node, m Matcher) *html.Node {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if m.Match(c) {
return c
}
if matched := Query(c, m); matched != nil {
return matched
}
}
return nil
}
2019-05-31 11:07:56 +02:00
// Filter returns the nodes in nodes that match the selector.
func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
for _, n := range nodes {
if s(n) {
result = append(result, n)
}
}
return result
}
2020-10-24 12:40:39 +02:00
// Filter returns the nodes that match m.
func Filter(nodes []*html.Node, m Matcher) (result []*html.Node) {
for _, n := range nodes {
if m.Match(n) {
result = append(result, n)
}
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
return result
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
type tagSelector struct {
tag string
}
2019-05-31 11:07:56 +02:00
2020-10-24 12:40:39 +02:00
// Matches elements with a given tag name.
func (t tagSelector) Match(n *html.Node) bool {
return n.Type == html.ElementNode && n.Data == t.tag
}
2019-05-31 11:07:56 +02:00
2020-10-24 12:40:39 +02:00
func (c tagSelector) Specificity() Specificity {
return Specificity{0, 0, 1}
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
func (c tagSelector) PseudoElement() string {
return ""
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
type classSelector struct {
class string
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
// Matches elements by class attribute.
func (t classSelector) Match(n *html.Node) bool {
return matchAttribute(n, "class", func(s string) bool {
2021-12-13 10:29:47 +01:00
return matchInclude(t.class, s, false)
2020-10-24 12:40:39 +02:00
})
}
func (c classSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c classSelector) PseudoElement() string {
return ""
}
type idSelector struct {
id string
}
// Matches elements by id attribute.
func (t idSelector) Match(n *html.Node) bool {
return matchAttribute(n, "id", func(s string) bool {
return s == t.id
})
}
func (c idSelector) Specificity() Specificity {
return Specificity{1, 0, 0}
}
func (c idSelector) PseudoElement() string {
return ""
}
type attrSelector struct {
key, val, operation string
regexp *regexp.Regexp
2021-12-13 10:29:47 +01:00
insensitive bool
2020-10-24 12:40:39 +02:00
}
// Matches elements by attribute value.
func (t attrSelector) Match(n *html.Node) bool {
switch t.operation {
case "":
return matchAttribute(n, t.key, func(string) bool { return true })
case "=":
2021-12-13 10:29:47 +01:00
return matchAttribute(n, t.key, func(s string) bool { return matchInsensitiveValue(s, t.val, t.insensitive) })
2020-10-24 12:40:39 +02:00
case "!=":
2021-12-13 10:29:47 +01:00
return attributeNotEqualMatch(t.key, t.val, n, t.insensitive)
2020-10-24 12:40:39 +02:00
case "~=":
// matches elements where the attribute named key is a whitespace-separated list that includes val.
2021-12-13 10:29:47 +01:00
return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s, t.insensitive) })
2020-10-24 12:40:39 +02:00
case "|=":
2021-12-13 10:29:47 +01:00
return attributeDashMatch(t.key, t.val, n, t.insensitive)
2020-10-24 12:40:39 +02:00
case "^=":
2021-12-13 10:29:47 +01:00
return attributePrefixMatch(t.key, t.val, n, t.insensitive)
2020-10-24 12:40:39 +02:00
case "$=":
2021-12-13 10:29:47 +01:00
return attributeSuffixMatch(t.key, t.val, n, t.insensitive)
2020-10-24 12:40:39 +02:00
case "*=":
2021-12-13 10:29:47 +01:00
return attributeSubstringMatch(t.key, t.val, n, t.insensitive)
2020-10-24 12:40:39 +02:00
case "#=":
return attributeRegexMatch(t.key, t.regexp, n)
default:
panic(fmt.Sprintf("unsuported operation : %s", t.operation))
}
2019-05-31 11:07:56 +02:00
}
2021-12-13 10:29:47 +01:00
// matches elements where we ignore (or not) the case of the attribute value
// the user attribute is the value set by the user to match elements
// the real attribute is the attribute value found in the code parsed
func matchInsensitiveValue(userAttr string, realAttr string, ignoreCase bool) bool {
if ignoreCase {
return strings.EqualFold(userAttr, realAttr)
}
return userAttr == realAttr
}
2020-10-24 12:40:39 +02:00
// matches elements where the attribute named key satisifes the function f.
func matchAttribute(n *html.Node, key string, f func(string) bool) bool {
if n.Type != html.ElementNode {
return false
}
for _, a := range n.Attr {
if a.Key == key && f(a.Val) {
return true
}
}
return false
}
// attributeNotEqualMatch matches elements where
2019-05-31 11:07:56 +02:00
// the attribute named key does not have the value val.
2021-12-13 10:29:47 +01:00
func attributeNotEqualMatch(key, val string, n *html.Node, ignoreCase bool) bool {
2020-10-24 12:40:39 +02:00
if n.Type != html.ElementNode {
return false
}
for _, a := range n.Attr {
2021-12-13 10:29:47 +01:00
if a.Key == key && matchInsensitiveValue(a.Val, val, ignoreCase) {
2019-05-31 11:07:56 +02:00
return false
}
}
2020-10-24 12:40:39 +02:00
return true
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
// returns true if s is a whitespace-separated list that includes val.
2021-12-13 10:29:47 +01:00
func matchInclude(val string, s string, ignoreCase bool) bool {
2020-10-24 12:40:39 +02:00
for s != "" {
i := strings.IndexAny(s, " \t\r\n\f")
if i == -1 {
2021-12-13 10:29:47 +01:00
return matchInsensitiveValue(s, val, ignoreCase)
2020-10-24 12:40:39 +02:00
}
2021-12-13 10:29:47 +01:00
if matchInsensitiveValue(s[:i], val, ignoreCase) {
2020-10-24 12:40:39 +02:00
return true
}
s = s[i+1:]
}
return false
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
// matches elements where the attribute named key equals val or starts with val plus a hyphen.
2021-12-13 10:29:47 +01:00
func attributeDashMatch(key, val string, n *html.Node, ignoreCase bool) bool {
2020-10-24 12:40:39 +02:00
return matchAttribute(n, key,
2019-05-31 11:07:56 +02:00
func(s string) bool {
2021-12-13 10:29:47 +01:00
if matchInsensitiveValue(s, val, ignoreCase) {
2019-05-31 11:07:56 +02:00
return true
}
if len(s) <= len(val) {
return false
}
2021-12-13 10:29:47 +01:00
if matchInsensitiveValue(s[:len(val)], val, ignoreCase) && s[len(val)] == '-' {
2019-05-31 11:07:56 +02:00
return true
}
return false
})
}
2020-10-24 12:40:39 +02:00
// attributePrefixMatch returns a Selector that matches elements where
2019-05-31 11:07:56 +02:00
// the attribute named key starts with val.
2021-12-13 10:29:47 +01:00
func attributePrefixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
2020-10-24 12:40:39 +02:00
return matchAttribute(n, key,
2019-05-31 11:07:56 +02:00
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
2021-12-13 10:29:47 +01:00
if ignoreCase {
return strings.HasPrefix(strings.ToLower(s), strings.ToLower(val))
}
2019-05-31 11:07:56 +02:00
return strings.HasPrefix(s, val)
})
}
2020-10-24 12:40:39 +02:00
// attributeSuffixMatch matches elements where
2019-05-31 11:07:56 +02:00
// the attribute named key ends with val.
2021-12-13 10:29:47 +01:00
func attributeSuffixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
2020-10-24 12:40:39 +02:00
return matchAttribute(n, key,
2019-05-31 11:07:56 +02:00
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
2021-12-13 10:29:47 +01:00
if ignoreCase {
return strings.HasSuffix(strings.ToLower(s), strings.ToLower(val))
}
2019-05-31 11:07:56 +02:00
return strings.HasSuffix(s, val)
})
}
2020-10-24 12:40:39 +02:00
// attributeSubstringMatch matches nodes where
2019-05-31 11:07:56 +02:00
// the attribute named key contains val.
2021-12-13 10:29:47 +01:00
func attributeSubstringMatch(key, val string, n *html.Node, ignoreCase bool) bool {
2020-10-24 12:40:39 +02:00
return matchAttribute(n, key,
2019-05-31 11:07:56 +02:00
func(s string) bool {
if strings.TrimSpace(s) == "" {
return false
}
2021-12-13 10:29:47 +01:00
if ignoreCase {
return strings.Contains(strings.ToLower(s), strings.ToLower(val))
}
2019-05-31 11:07:56 +02:00
return strings.Contains(s, val)
})
}
2020-10-24 12:40:39 +02:00
// attributeRegexMatch matches nodes where
2019-05-31 11:07:56 +02:00
// the attribute named key matches the regular expression rx
2020-10-24 12:40:39 +02:00
func attributeRegexMatch(key string, rx *regexp.Regexp, n *html.Node) bool {
return matchAttribute(n, key,
2019-05-31 11:07:56 +02:00
func(s string) bool {
return rx.MatchString(s)
})
}
2020-10-24 12:40:39 +02:00
func (c attrSelector) Specificity() Specificity {
return Specificity{0, 1, 0}
}
func (c attrSelector) PseudoElement() string {
return ""
}
2021-12-13 10:29:47 +01:00
// see pseudo_classes.go for pseudo classes selectors
2020-10-24 12:40:39 +02:00
2021-12-13 10:29:47 +01:00
// on a static context, some selectors can't match anything
type neverMatchSelector struct {
2020-10-24 12:40:39 +02:00
value string
}
2021-12-13 10:29:47 +01:00
func (s neverMatchSelector) Match(n *html.Node) bool {
2020-10-24 12:40:39 +02:00
return false
}
2019-05-31 11:07:56 +02:00
2021-12-13 10:29:47 +01:00
func (s neverMatchSelector) Specificity() Specificity {
return Specificity{0, 0, 0}
2020-10-24 12:40:39 +02:00
}
2021-12-13 10:29:47 +01:00
func (c neverMatchSelector) PseudoElement() string {
2020-10-24 12:40:39 +02:00
return ""
}
type compoundSelector struct {
selectors []Sel
pseudoElement string
}
// Matches elements if each sub-selectors matches.
func (t compoundSelector) Match(n *html.Node) bool {
if len(t.selectors) == 0 {
return n.Type == html.ElementNode
}
for _, sel := range t.selectors {
if !sel.Match(n) {
2019-05-31 11:07:56 +02:00
return false
}
2020-10-24 12:40:39 +02:00
}
return true
}
2019-05-31 11:07:56 +02:00
2020-10-24 12:40:39 +02:00
func (s compoundSelector) Specificity() Specificity {
var out Specificity
for _, sel := range s.selectors {
out = out.Add(sel.Specificity())
}
if s.pseudoElement != "" {
// https://drafts.csswg.org/selectors-3/#specificity
out = out.Add(Specificity{0, 0, 1})
}
return out
}
2019-05-31 11:07:56 +02:00
2020-10-24 12:40:39 +02:00
func (c compoundSelector) PseudoElement() string {
return c.pseudoElement
}
type combinedSelector struct {
first Sel
combinator byte
second Sel
}
func (t combinedSelector) Match(n *html.Node) bool {
if t.first == nil {
return false // maybe we should panic
}
switch t.combinator {
case 0:
return t.first.Match(n)
case ' ':
return descendantMatch(t.first, t.second, n)
case '>':
return childMatch(t.first, t.second, n)
case '+':
return siblingMatch(t.first, t.second, true, n)
case '~':
return siblingMatch(t.first, t.second, false, n)
default:
panic("unknown combinator")
2019-05-31 11:07:56 +02:00
}
}
2020-10-24 12:40:39 +02:00
// matches an element if it matches d and has an ancestor that matches a.
func descendantMatch(a, d Matcher, n *html.Node) bool {
if !d.Match(n) {
return false
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
for p := n.Parent; p != nil; p = p.Parent {
if a.Match(p) {
return true
}
}
return false
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
// matches an element if it matches d and its parent matches a.
func childMatch(a, d Matcher, n *html.Node) bool {
return d.Match(n) && n.Parent != nil && a.Match(n.Parent)
}
// matches an element if it matches s2 and is preceded by an element that matches s1.
2019-05-31 11:07:56 +02:00
// If adjacent is true, the sibling must be immediately before the element.
2020-10-24 12:40:39 +02:00
func siblingMatch(s1, s2 Matcher, adjacent bool, n *html.Node) bool {
if !s2.Match(n) {
return false
}
2019-05-31 11:07:56 +02:00
2020-10-24 12:40:39 +02:00
if adjacent {
for n = n.PrevSibling; n != nil; n = n.PrevSibling {
if n.Type == html.TextNode || n.Type == html.CommentNode {
continue
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
return s1.Match(n)
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
return false
}
2019-05-31 11:07:56 +02:00
2020-10-24 12:40:39 +02:00
// Walk backwards looking for element that matches s1
for c := n.PrevSibling; c != nil; c = c.PrevSibling {
if s1.Match(c) {
return true
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
}
2019-05-31 11:07:56 +02:00
2020-10-24 12:40:39 +02:00
return false
}
func (s combinedSelector) Specificity() Specificity {
spec := s.first.Specificity()
if s.second != nil {
spec = spec.Add(s.second.Specificity())
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
return spec
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
// on combinedSelector, a pseudo-element only makes sens on the last
// selector, although others increase specificity.
func (c combinedSelector) PseudoElement() string {
if c.second == nil {
return ""
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
return c.second.PseudoElement()
}
// A SelectorGroup is a list of selectors, which matches if any of the
// individual selectors matches.
type SelectorGroup []Sel
// Match returns true if the node matches one of the single selectors.
func (s SelectorGroup) Match(n *html.Node) bool {
for _, sel := range s {
if sel.Match(n) {
return true
}
2019-05-31 11:07:56 +02:00
}
2020-10-24 12:40:39 +02:00
return false
2019-05-31 11:07:56 +02:00
}