mirror of
https://salsa.debian.org/mdosch/feed-to-muc.git
synced 2024-11-23 14:38:39 +01:00
343 lines
7.2 KiB
Go
343 lines
7.2 KiB
Go
|
package xpp
|
||
|
|
||
|
import (
|
||
|
"encoding/xml"
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"strings"
|
||
|
)
|
||
|
|
||
|
type XMLEventType int
|
||
|
type CharsetReader func(charset string, input io.Reader) (io.Reader, error)
|
||
|
|
||
|
const (
|
||
|
StartDocument XMLEventType = iota
|
||
|
EndDocument
|
||
|
StartTag
|
||
|
EndTag
|
||
|
Text
|
||
|
Comment
|
||
|
ProcessingInstruction
|
||
|
Directive
|
||
|
IgnorableWhitespace // TODO: ?
|
||
|
// TODO: CDSECT ?
|
||
|
)
|
||
|
|
||
|
type XMLPullParser struct {
|
||
|
// Document State
|
||
|
Spaces map[string]string
|
||
|
SpacesStack []map[string]string
|
||
|
|
||
|
// Token State
|
||
|
Depth int
|
||
|
Event XMLEventType
|
||
|
Attrs []xml.Attr
|
||
|
Name string
|
||
|
Space string
|
||
|
Text string
|
||
|
|
||
|
decoder *xml.Decoder
|
||
|
token interface{}
|
||
|
}
|
||
|
|
||
|
func NewXMLPullParser(r io.Reader, strict bool, cr CharsetReader) *XMLPullParser {
|
||
|
d := xml.NewDecoder(r)
|
||
|
d.Strict = strict
|
||
|
d.CharsetReader = cr
|
||
|
return &XMLPullParser{
|
||
|
decoder: d,
|
||
|
Event: StartDocument,
|
||
|
Depth: 0,
|
||
|
Spaces: map[string]string{},
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) NextTag() (event XMLEventType, err error) {
|
||
|
t, err := p.Next()
|
||
|
if err != nil {
|
||
|
return event, err
|
||
|
}
|
||
|
|
||
|
for t == Text && p.IsWhitespace() {
|
||
|
t, err = p.Next()
|
||
|
if err != nil {
|
||
|
return event, err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if t != StartTag && t != EndTag {
|
||
|
return event, fmt.Errorf("Expected StartTag or EndTag but got %s at offset: %d", p.EventName(t), p.decoder.InputOffset())
|
||
|
}
|
||
|
|
||
|
return t, nil
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) Next() (event XMLEventType, err error) {
|
||
|
for {
|
||
|
event, err = p.NextToken()
|
||
|
if err != nil {
|
||
|
return event, err
|
||
|
}
|
||
|
|
||
|
// Return immediately after encountering a StartTag
|
||
|
// EndTag, Text, EndDocument
|
||
|
if event == StartTag ||
|
||
|
event == EndTag ||
|
||
|
event == EndDocument ||
|
||
|
event == Text {
|
||
|
return event, nil
|
||
|
}
|
||
|
|
||
|
// Skip Comment/Directive and ProcessingInstruction
|
||
|
if event == Comment ||
|
||
|
event == Directive ||
|
||
|
event == ProcessingInstruction {
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
return event, nil
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) NextToken() (event XMLEventType, err error) {
|
||
|
// Clear any state held for the previous token
|
||
|
p.resetTokenState()
|
||
|
|
||
|
token, err := p.decoder.Token()
|
||
|
if err != nil {
|
||
|
if err == io.EOF {
|
||
|
// XML decoder returns the EOF as an error
|
||
|
// but we want to return it as a valid
|
||
|
// EndDocument token instead
|
||
|
p.token = nil
|
||
|
p.Event = EndDocument
|
||
|
return p.Event, nil
|
||
|
}
|
||
|
return event, err
|
||
|
}
|
||
|
|
||
|
p.token = xml.CopyToken(token)
|
||
|
p.processToken(p.token)
|
||
|
p.Event = p.EventType(p.token)
|
||
|
|
||
|
return p.Event, nil
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) NextText() (string, error) {
|
||
|
if p.Event != StartTag {
|
||
|
return "", errors.New("Parser must be on StartTag to get NextText()")
|
||
|
}
|
||
|
|
||
|
t, err := p.Next()
|
||
|
if err != nil {
|
||
|
return "", err
|
||
|
}
|
||
|
|
||
|
if t != EndTag && t != Text {
|
||
|
return "", errors.New("Parser must be on EndTag or Text to read text")
|
||
|
}
|
||
|
|
||
|
var result string
|
||
|
for t == Text {
|
||
|
result = result + p.Text
|
||
|
t, err = p.Next()
|
||
|
if err != nil {
|
||
|
return "", err
|
||
|
}
|
||
|
|
||
|
if t != EndTag && t != Text {
|
||
|
errstr := fmt.Sprintf("Event Text must be immediately followed by EndTag or Text but got %s", p.EventName(t))
|
||
|
return "", errors.New(errstr)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return result, nil
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) Skip() error {
|
||
|
for {
|
||
|
tok, err := p.NextToken()
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
if tok == StartTag {
|
||
|
if err := p.Skip(); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
} else if tok == EndTag {
|
||
|
return nil
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) Attribute(name string) string {
|
||
|
for _, attr := range p.Attrs {
|
||
|
if attr.Name.Local == name {
|
||
|
return attr.Value
|
||
|
}
|
||
|
}
|
||
|
return ""
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) Expect(event XMLEventType, name string) (err error) {
|
||
|
return p.ExpectAll(event, "*", name)
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) ExpectAll(event XMLEventType, space string, name string) (err error) {
|
||
|
if !(p.Event == event && (strings.ToLower(p.Space) == strings.ToLower(space) || space == "*") && (strings.ToLower(p.Name) == strings.ToLower(name) || name == "*")) {
|
||
|
err = fmt.Errorf("Expected Space:%s Name:%s Event:%s but got Space:%s Name:%s Event:%s at offset: %d", space, name, p.EventName(event), p.Space, p.Name, p.EventName(p.Event), p.decoder.InputOffset())
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) DecodeElement(v interface{}) error {
|
||
|
if p.Event != StartTag {
|
||
|
return errors.New("DecodeElement can only be called from a StartTag event")
|
||
|
}
|
||
|
|
||
|
//tok := &p.token
|
||
|
|
||
|
startToken := p.token.(xml.StartElement)
|
||
|
|
||
|
// Consumes all tokens until the matching end token.
|
||
|
err := p.decoder.DecodeElement(v, &startToken)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
name := p.Name
|
||
|
|
||
|
// Need to set the "current" token name/event
|
||
|
// to the previous StartTag event's name
|
||
|
p.resetTokenState()
|
||
|
p.Event = EndTag
|
||
|
p.Depth--
|
||
|
p.Name = name
|
||
|
p.token = nil
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) IsWhitespace() bool {
|
||
|
return strings.TrimSpace(p.Text) == ""
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) EventName(e XMLEventType) (name string) {
|
||
|
switch e {
|
||
|
case StartTag:
|
||
|
name = "StartTag"
|
||
|
case EndTag:
|
||
|
name = "EndTag"
|
||
|
case StartDocument:
|
||
|
name = "StartDocument"
|
||
|
case EndDocument:
|
||
|
name = "EndDocument"
|
||
|
case ProcessingInstruction:
|
||
|
name = "ProcessingInstruction"
|
||
|
case Directive:
|
||
|
name = "Directive"
|
||
|
case Comment:
|
||
|
name = "Comment"
|
||
|
case Text:
|
||
|
name = "Text"
|
||
|
case IgnorableWhitespace:
|
||
|
name = "IgnorableWhitespace"
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) EventType(t xml.Token) (event XMLEventType) {
|
||
|
switch t.(type) {
|
||
|
case xml.StartElement:
|
||
|
event = StartTag
|
||
|
case xml.EndElement:
|
||
|
event = EndTag
|
||
|
case xml.CharData:
|
||
|
event = Text
|
||
|
case xml.Comment:
|
||
|
event = Comment
|
||
|
case xml.ProcInst:
|
||
|
event = ProcessingInstruction
|
||
|
case xml.Directive:
|
||
|
event = Directive
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) processToken(t xml.Token) {
|
||
|
switch tt := t.(type) {
|
||
|
case xml.StartElement:
|
||
|
p.processStartToken(tt)
|
||
|
case xml.EndElement:
|
||
|
p.processEndToken(tt)
|
||
|
case xml.CharData:
|
||
|
p.processCharDataToken(tt)
|
||
|
case xml.Comment:
|
||
|
p.processCommentToken(tt)
|
||
|
case xml.ProcInst:
|
||
|
p.processProcInstToken(tt)
|
||
|
case xml.Directive:
|
||
|
p.processDirectiveToken(tt)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) processStartToken(t xml.StartElement) {
|
||
|
p.Depth++
|
||
|
p.Attrs = t.Attr
|
||
|
p.Name = t.Name.Local
|
||
|
p.Space = t.Name.Space
|
||
|
p.trackNamespaces(t)
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) processEndToken(t xml.EndElement) {
|
||
|
p.Depth--
|
||
|
p.SpacesStack = p.SpacesStack[:len(p.SpacesStack)-1]
|
||
|
if len(p.SpacesStack) == 0 {
|
||
|
p.Spaces = map[string]string{}
|
||
|
} else {
|
||
|
p.Spaces = p.SpacesStack[len(p.SpacesStack)-1]
|
||
|
}
|
||
|
p.Name = t.Name.Local
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) processCharDataToken(t xml.CharData) {
|
||
|
p.Text = string([]byte(t))
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) processCommentToken(t xml.Comment) {
|
||
|
p.Text = string([]byte(t))
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) processProcInstToken(t xml.ProcInst) {
|
||
|
p.Text = fmt.Sprintf("%s %s", t.Target, string(t.Inst))
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) processDirectiveToken(t xml.Directive) {
|
||
|
p.Text = string([]byte(t))
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) resetTokenState() {
|
||
|
p.Attrs = nil
|
||
|
p.Name = ""
|
||
|
p.Space = ""
|
||
|
p.Text = ""
|
||
|
}
|
||
|
|
||
|
func (p *XMLPullParser) trackNamespaces(t xml.StartElement) {
|
||
|
newSpace := map[string]string{}
|
||
|
for k, v := range p.Spaces {
|
||
|
newSpace[k] = v
|
||
|
}
|
||
|
for _, attr := range t.Attr {
|
||
|
if attr.Name.Space == "xmlns" {
|
||
|
space := strings.TrimSpace(attr.Value)
|
||
|
spacePrefix := strings.TrimSpace(strings.ToLower(attr.Name.Local))
|
||
|
newSpace[space] = spacePrefix
|
||
|
} else if attr.Name.Local == "xmlns" {
|
||
|
space := strings.TrimSpace(attr.Value)
|
||
|
newSpace[space] = ""
|
||
|
}
|
||
|
}
|
||
|
p.Spaces = newSpace
|
||
|
p.SpacesStack = append(p.SpacesStack, newSpace)
|
||
|
}
|