feed-to-muc/vendor/github.com/mmcdole/goxpp/xpp.go
2019-02-20 20:23:48 +01:00

342 lines
7.2 KiB
Go

package xpp
import (
"encoding/xml"
"errors"
"fmt"
"io"
"strings"
)
type XMLEventType int
type CharsetReader func(charset string, input io.Reader) (io.Reader, error)
const (
StartDocument XMLEventType = iota
EndDocument
StartTag
EndTag
Text
Comment
ProcessingInstruction
Directive
IgnorableWhitespace // TODO: ?
// TODO: CDSECT ?
)
type XMLPullParser struct {
// Document State
Spaces map[string]string
SpacesStack []map[string]string
// Token State
Depth int
Event XMLEventType
Attrs []xml.Attr
Name string
Space string
Text string
decoder *xml.Decoder
token interface{}
}
func NewXMLPullParser(r io.Reader, strict bool, cr CharsetReader) *XMLPullParser {
d := xml.NewDecoder(r)
d.Strict = strict
d.CharsetReader = cr
return &XMLPullParser{
decoder: d,
Event: StartDocument,
Depth: 0,
Spaces: map[string]string{},
}
}
func (p *XMLPullParser) NextTag() (event XMLEventType, err error) {
t, err := p.Next()
if err != nil {
return event, err
}
for t == Text && p.IsWhitespace() {
t, err = p.Next()
if err != nil {
return event, err
}
}
if t != StartTag && t != EndTag {
return event, fmt.Errorf("Expected StartTag or EndTag but got %s at offset: %d", p.EventName(t), p.decoder.InputOffset())
}
return t, nil
}
func (p *XMLPullParser) Next() (event XMLEventType, err error) {
for {
event, err = p.NextToken()
if err != nil {
return event, err
}
// Return immediately after encountering a StartTag
// EndTag, Text, EndDocument
if event == StartTag ||
event == EndTag ||
event == EndDocument ||
event == Text {
return event, nil
}
// Skip Comment/Directive and ProcessingInstruction
if event == Comment ||
event == Directive ||
event == ProcessingInstruction {
continue
}
}
return event, nil
}
func (p *XMLPullParser) NextToken() (event XMLEventType, err error) {
// Clear any state held for the previous token
p.resetTokenState()
token, err := p.decoder.Token()
if err != nil {
if err == io.EOF {
// XML decoder returns the EOF as an error
// but we want to return it as a valid
// EndDocument token instead
p.token = nil
p.Event = EndDocument
return p.Event, nil
}
return event, err
}
p.token = xml.CopyToken(token)
p.processToken(p.token)
p.Event = p.EventType(p.token)
return p.Event, nil
}
func (p *XMLPullParser) NextText() (string, error) {
if p.Event != StartTag {
return "", errors.New("Parser must be on StartTag to get NextText()")
}
t, err := p.Next()
if err != nil {
return "", err
}
if t != EndTag && t != Text {
return "", errors.New("Parser must be on EndTag or Text to read text")
}
var result string
for t == Text {
result = result + p.Text
t, err = p.Next()
if err != nil {
return "", err
}
if t != EndTag && t != Text {
errstr := fmt.Sprintf("Event Text must be immediately followed by EndTag or Text but got %s", p.EventName(t))
return "", errors.New(errstr)
}
}
return result, nil
}
func (p *XMLPullParser) Skip() error {
for {
tok, err := p.NextToken()
if err != nil {
return err
}
if tok == StartTag {
if err := p.Skip(); err != nil {
return err
}
} else if tok == EndTag {
return nil
}
}
}
func (p *XMLPullParser) Attribute(name string) string {
for _, attr := range p.Attrs {
if attr.Name.Local == name {
return attr.Value
}
}
return ""
}
func (p *XMLPullParser) Expect(event XMLEventType, name string) (err error) {
return p.ExpectAll(event, "*", name)
}
func (p *XMLPullParser) ExpectAll(event XMLEventType, space string, name string) (err error) {
if !(p.Event == event && (strings.ToLower(p.Space) == strings.ToLower(space) || space == "*") && (strings.ToLower(p.Name) == strings.ToLower(name) || name == "*")) {
err = fmt.Errorf("Expected Space:%s Name:%s Event:%s but got Space:%s Name:%s Event:%s at offset: %d", space, name, p.EventName(event), p.Space, p.Name, p.EventName(p.Event), p.decoder.InputOffset())
}
return
}
func (p *XMLPullParser) DecodeElement(v interface{}) error {
if p.Event != StartTag {
return errors.New("DecodeElement can only be called from a StartTag event")
}
//tok := &p.token
startToken := p.token.(xml.StartElement)
// Consumes all tokens until the matching end token.
err := p.decoder.DecodeElement(v, &startToken)
if err != nil {
return err
}
name := p.Name
// Need to set the "current" token name/event
// to the previous StartTag event's name
p.resetTokenState()
p.Event = EndTag
p.Depth--
p.Name = name
p.token = nil
return nil
}
func (p *XMLPullParser) IsWhitespace() bool {
return strings.TrimSpace(p.Text) == ""
}
func (p *XMLPullParser) EventName(e XMLEventType) (name string) {
switch e {
case StartTag:
name = "StartTag"
case EndTag:
name = "EndTag"
case StartDocument:
name = "StartDocument"
case EndDocument:
name = "EndDocument"
case ProcessingInstruction:
name = "ProcessingInstruction"
case Directive:
name = "Directive"
case Comment:
name = "Comment"
case Text:
name = "Text"
case IgnorableWhitespace:
name = "IgnorableWhitespace"
}
return
}
func (p *XMLPullParser) EventType(t xml.Token) (event XMLEventType) {
switch t.(type) {
case xml.StartElement:
event = StartTag
case xml.EndElement:
event = EndTag
case xml.CharData:
event = Text
case xml.Comment:
event = Comment
case xml.ProcInst:
event = ProcessingInstruction
case xml.Directive:
event = Directive
}
return
}
func (p *XMLPullParser) processToken(t xml.Token) {
switch tt := t.(type) {
case xml.StartElement:
p.processStartToken(tt)
case xml.EndElement:
p.processEndToken(tt)
case xml.CharData:
p.processCharDataToken(tt)
case xml.Comment:
p.processCommentToken(tt)
case xml.ProcInst:
p.processProcInstToken(tt)
case xml.Directive:
p.processDirectiveToken(tt)
}
}
func (p *XMLPullParser) processStartToken(t xml.StartElement) {
p.Depth++
p.Attrs = t.Attr
p.Name = t.Name.Local
p.Space = t.Name.Space
p.trackNamespaces(t)
}
func (p *XMLPullParser) processEndToken(t xml.EndElement) {
p.Depth--
p.SpacesStack = p.SpacesStack[:len(p.SpacesStack)-1]
if len(p.SpacesStack) == 0 {
p.Spaces = map[string]string{}
} else {
p.Spaces = p.SpacesStack[len(p.SpacesStack)-1]
}
p.Name = t.Name.Local
}
func (p *XMLPullParser) processCharDataToken(t xml.CharData) {
p.Text = string([]byte(t))
}
func (p *XMLPullParser) processCommentToken(t xml.Comment) {
p.Text = string([]byte(t))
}
func (p *XMLPullParser) processProcInstToken(t xml.ProcInst) {
p.Text = fmt.Sprintf("%s %s", t.Target, string(t.Inst))
}
func (p *XMLPullParser) processDirectiveToken(t xml.Directive) {
p.Text = string([]byte(t))
}
func (p *XMLPullParser) resetTokenState() {
p.Attrs = nil
p.Name = ""
p.Space = ""
p.Text = ""
}
func (p *XMLPullParser) trackNamespaces(t xml.StartElement) {
newSpace := map[string]string{}
for k, v := range p.Spaces {
newSpace[k] = v
}
for _, attr := range t.Attr {
if attr.Name.Space == "xmlns" {
space := strings.TrimSpace(attr.Value)
spacePrefix := strings.TrimSpace(strings.ToLower(attr.Name.Local))
newSpace[space] = spacePrefix
} else if attr.Name.Local == "xmlns" {
space := strings.TrimSpace(attr.Value)
newSpace[space] = ""
}
}
p.Spaces = newSpace
p.SpacesStack = append(p.SpacesStack, newSpace)
}