diff --git a/Readme.md b/Readme.md index 79d3606..ec4bdd3 100644 --- a/Readme.md +++ b/Readme.md @@ -69,7 +69,7 @@ Usage: ./bin/kubeconform [OPTION]... [FILE OR FOLDER]... -n int number of goroutines to run concurrently (default 4) -output string - output format - json, tap, text (default "text") + output format - json, junit, tap, text (default "text") -reject string comma-separated list of kinds to reject -schema-location value @@ -79,9 +79,9 @@ Usage: ./bin/kubeconform [OPTION]... [FILE OR FOLDER]... -strict disallow additional properties not in schema -summary - print a summary at the end + print a summary at the end (ignored for junit output) -verbose - print results for all resources (ignored for tap output) + print results for all resources (ignored for tap and junit output) ``` ### Usage examples diff --git a/go.mod b/go.mod index 6584fd4..025821a 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/yannh/kubeconform go 1.14 require ( + github.com/beevik/etree v1.1.0 github.com/xeipuuv/gojsonschema v1.2.0 gopkg.in/yaml.v2 v2.3.0 // indirect sigs.k8s.io/yaml v1.2.0 diff --git a/go.sum b/go.sum index 8f3d7fa..905671a 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,6 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/beevik/etree v1.1.0 h1:T0xke/WvNtMoCqgzPhkX2r4rjY3GDZFi+FjpRZY2Jbs= +github.com/beevik/etree v1.1.0/go.mod h1:r8Aw8JqVegEf0w2fDnATrX9VpkMcyFeM0FhwO62wh+A= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys= diff --git a/pkg/config/config.go b/pkg/config/config.go index a321728..a64dcae 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -70,11 +70,11 @@ func FromFlags(progName string, args []string) (Config, string, error) { flags.BoolVar(&c.ExitOnError, "exit-on-error", false, "immediately stop execution when the first error is encountered") flags.BoolVar(&c.IgnoreMissingSchemas, "ignore-missing-schemas", false, "skip files with missing schemas instead of failing") flags.Var(&ignoreFilenamePatterns, "ignore-filename-pattern", "regular expression specifying paths to ignore (can be specified multiple times)") - flags.BoolVar(&c.Summary, "summary", false, "print a summary at the end") + flags.BoolVar(&c.Summary, "summary", false, "print a summary at the end (ignored for junit output)") flags.IntVar(&c.NumberOfWorkers, "n", 4, "number of goroutines to run concurrently") flags.BoolVar(&c.Strict, "strict", false, "disallow additional properties not in schema") - flags.StringVar(&c.OutputFormat, "output", "text", "output format - json, tap, text") - flags.BoolVar(&c.Verbose, "verbose", false, "print results for all resources (ignored for tap output)") + flags.StringVar(&c.OutputFormat, "output", "text", "output format - json, junit, tap, text") + flags.BoolVar(&c.Verbose, "verbose", false, "print results for all resources (ignored for tap and junit output)") flags.BoolVar(&c.SkipTLS, "insecure-skip-tls-verify", false, "disable verification of the server's SSL certificate. This will make your HTTPS connections insecure") flags.StringVar(&c.Cache, "cache", "", "cache schemas downloaded via HTTP to this folder") flags.StringVar(&c.CPUProfileFile, "cpu-prof", "", "debug - log CPU profiling to file") diff --git a/pkg/output/junit.go b/pkg/output/junit.go new file mode 100644 index 0000000..d19a817 --- /dev/null +++ b/pkg/output/junit.go @@ -0,0 +1,176 @@ +package output + +// References: +// https://github.com/windyroad/JUnit-Schema/blob/master/JUnit.xsd +// https://llg.cubic.org/docs/junit/ +// https://github.com/jstemmer/go-junit-report/blob/master/formatter/formatter.go +// https://github.com/junit-team/junit5/blob/main/platform-tests/src/test/resources/jenkins-junit.xsd + +import ( + "bufio" + "encoding/xml" + "fmt" + "github.com/yannh/kubeconform/pkg/validator" + "io" + "time" +) + +type TestSuiteCollection struct { + XMLName xml.Name `xml:"testsuites"` + Name string `xml:"name,attr"` + Time float64 `xml:"time,attr"` + Tests int `xml:"tests,attr"` + Failures int `xml:"failures,attr"` + Disabled int `xml:"disabled,attr"` + Errors int `xml:"errors,attr"` + Suites []TestSuite `xml:"testsuite"` +} + +type Property struct { + Name string `xml:"name,attr"` + Value string `xml:"value,attr"` +} + +type TestSuite struct { + XMLName xml.Name `xml:"testsuite"` + Properties []*Property `xml:"properties>property,omitempty"` + Cases []TestCase `xml:"testcase"` + Name string `xml:"name,attr"` + Id int `xml:"id,attr"` + Tests int `xml:"tests,attr"` + Failures int `xml:"failures,attr"` + Errors int `xml:"errors,attr"` + Disabled int `xml:"disabled,attr"` + Skipped int `xml:"skipped,attr"` +} + +type TestCase struct { + XMLName xml.Name `xml:"testcase"` + Name string `xml:"name,attr"` + ClassName string `xml:"classname,attr"` + Skipped *TestCaseSkipped `xml:"skipped,omitempty"` + Error *TestCaseError `xml:"error,omitempty"` + Failure []TestCaseError `xml:"failure,omitempty"` +} + +type TestCaseSkipped struct { + Message string `xml:"message,attr"` +} + +type TestCaseError struct { + Message string `xml:"message,attr"` + Type string `xml:"type,attr"` + Content string `xml:",chardata"` +} + +type junito struct { + id int + w io.Writer + withSummary bool + verbose bool + suites map[string]*TestSuite // map filename to corresponding suite + nValid, nInvalid, nErrors, nSkipped int + startTime time.Time +} + +func junitOutput(w io.Writer, withSummary bool, isStdin, verbose bool) Output { + return &junito{ + id: 0, + w: w, + withSummary: withSummary, + verbose: verbose, + suites: make(map[string]*TestSuite), + nValid: 0, + nInvalid: 0, + nErrors: 0, + nSkipped: 0, + startTime: time.Now(), + } +} + +// Write adds a result to the report. +func (o *junito) Write(result validator.Result) error { + var suite *TestSuite + suite, found := o.suites[result.Resource.Path] + + if !found { + o.id++ + suite = &TestSuite{ + Name: result.Resource.Path, + Id: o.id, + Tests: 0, Failures: 0, Errors: 0, Disabled: 0, Skipped: 0, + Cases: make([]TestCase, 0), + Properties: make([]*Property, 0), + } + o.suites[result.Resource.Path] = suite + } + + suite.Tests++ + + sig, _ := result.Resource.Signature() + var objectName string + if len(sig.Namespace) > 0 { + objectName = fmt.Sprintf("%s/%s", sig.Namespace, sig.Name) + } else { + objectName = sig.Name + } + typeName := fmt.Sprintf("%s@%s", sig.Kind, sig.Version) + testCase := TestCase{ClassName: typeName, Name: objectName} + + switch result.Status { + case validator.Valid: + o.nValid++ + case validator.Invalid: + suite.Failures++ + o.nInvalid++ + failure := TestCaseError{Message: result.Err.Error()} + testCase.Failure = append(testCase.Failure, failure) + case validator.Error: + suite.Errors++ + o.nErrors++ + testCase.Error = &TestCaseError{Message: result.Err.Error()} + case validator.Skipped: + suite.Skipped++ + testCase.Skipped = &TestCaseSkipped{} + o.nSkipped++ + case validator.Empty: + } + + suite.Cases = append(suite.Cases, testCase) + + return nil +} + +// Flush outputs the results as XML +func (o *junito) Flush() error { + runtime := time.Now().Sub(o.startTime) + + var suites = make([]TestSuite, 0) + for _, suite := range o.suites { + suites = append(suites, *suite) + } + + root := TestSuiteCollection{ + Name: "kubeconform", + Time: runtime.Seconds(), + Tests: o.nValid + o.nInvalid + o.nErrors + o.nSkipped, + Failures: o.nInvalid, + Errors: o.nErrors, + Disabled: o.nSkipped, + Suites: suites, + } + + // 2-space indentation + content, err := xml.MarshalIndent(root, "", " ") + + if err != nil { + return err + } + + writer := bufio.NewWriter(o.w) + writer.Write(content) + writer.WriteByte('\n') + writer.Flush() + + return nil +} diff --git a/pkg/output/junit_test.go b/pkg/output/junit_test.go new file mode 100644 index 0000000..3d8e48f --- /dev/null +++ b/pkg/output/junit_test.go @@ -0,0 +1,168 @@ +package output + +import ( + "bytes" + "github.com/yannh/kubeconform/pkg/resource" + "regexp" + "testing" + + "github.com/yannh/kubeconform/pkg/validator" + + "github.com/beevik/etree" +) + +func isNumeric(s string) bool { + matched, _ := regexp.MatchString("^\\d+(\\.\\d+)?$", s) + return matched +} + +func TestJUnitWrite(t *testing.T) { + for _, testCase := range []struct { + name string + withSummary bool + isStdin bool + verbose bool + results []validator.Result + evaluate func(d *etree.Document) + }{ + { + "empty document", + false, + false, + false, + []validator.Result{}, + func(d *etree.Document) { + root := d.FindElement("/testsuites") + if root == nil { + t.Errorf("Can't find root testsuite element") + return + } + for _, attr := range root.Attr { + switch attr.Key { + case "time": + case "tests": + case "failures": + case "disabled": + case "errors": + if !isNumeric(attr.Value) { + t.Errorf("Expected a number for /testsuites/@%s", attr.Key) + } + continue + case "name": + if attr.Value != "kubeconform" { + t.Errorf("Expected 'kubeconform' for /testsuites/@name") + } + continue + default: + t.Errorf("Unknown attribute /testsuites/@%s", attr.Key) + continue + } + } + suites := root.SelectElements("testsuite") + if len(suites) != 0 { + t.Errorf("No testsuite elements should be generated when there are no resources") + } + }, + }, + { + "a single deployment, verbose, with summary", + true, + false, + true, + []validator.Result{ + { + Resource: resource.Resource{ + Path: "deployment.yml", + Bytes: []byte(`apiVersion: apps/v1 +kind: Deployment +metadata: + name: "my-app" + namespace: "my-namespace" +`), + }, + Status: validator.Valid, + Err: nil, + }, + }, + func(d *etree.Document) { + suites := d.FindElements("//testsuites/testsuite") + if len(suites) != 1 { + t.Errorf("Expected exactly 1 testsuite element, got %d", len(suites)) + return + } + suite := suites[0] + for _, attr := range suite.Attr { + switch attr.Key { + case "name": + if attr.Value != "deployment.yml" { + t.Errorf("Test suite name should be the resource path") + } + continue + case "tests": + if attr.Value != "1" { + t.Errorf("testsuite/@tests should be 1") + } + continue + case "failures": + if attr.Value != "0" { + t.Errorf("testsuite/@failures should be 0") + } + continue + case "errors": + if attr.Value != "0" { + t.Errorf("testsuite/@errors should be 0") + } + continue + case "disabled": + if attr.Value != "0" { + t.Errorf("testsuite/@disabled should be 0") + } + continue + case "skipped": + if attr.Value != "0" { + t.Errorf("testsuite/@skipped should be 0") + } + continue + default: + t.Errorf("Unknown testsuite attribute %s", attr.Key) + continue + } + } + testcases := suite.SelectElements("testcase") + if len(testcases) != 1 { + t.Errorf("Expected exactly 1 testcase, got %d", len(testcases)) + return + } + testcase := testcases[0] + if testcase.SelectAttrValue("name", "") != "my-namespace/my-app" { + t.Errorf("Test case name should be namespace / name") + } + if testcase.SelectAttrValue("classname", "") != "Deployment@apps/v1" { + t.Errorf("Test case class name should be resource kind @ api version") + } + if testcase.SelectElement("skipped") != nil { + t.Errorf("skipped element should not be generated if the kind was not skipped") + } + if testcase.SelectElement("error") != nil { + t.Errorf("error element should not be generated if there was no error") + } + if len(testcase.SelectElements("failure")) != 0 { + t.Errorf("failure elements should not be generated if there were no failures") + } + }, + }, + } { + w := new(bytes.Buffer) + o := junitOutput(w, testCase.withSummary, testCase.isStdin, testCase.verbose) + + for _, res := range testCase.results { + o.Write(res) + } + o.Flush() + + doc := etree.NewDocument() + doc.ReadFromString(w.String()) + + testCase.evaluate(doc) + } +} diff --git a/pkg/output/output.go b/pkg/output/output.go index 3dce1b7..7612873 100644 --- a/pkg/output/output.go +++ b/pkg/output/output.go @@ -18,6 +18,8 @@ func New(outputFormat string, printSummary, isStdin, verbose bool) (Output, erro switch { case outputFormat == "json": return jsonOutput(w, printSummary, isStdin, verbose), nil + case outputFormat == "junit": + return junitOutput(w, printSummary, isStdin, verbose), nil case outputFormat == "tap": return tapOutput(w, printSummary, isStdin, verbose), nil case outputFormat == "text": diff --git a/vendor/github.com/beevik/etree/.travis.yml b/vendor/github.com/beevik/etree/.travis.yml new file mode 100644 index 0000000..f4cb25d --- /dev/null +++ b/vendor/github.com/beevik/etree/.travis.yml @@ -0,0 +1,14 @@ +language: go +sudo: false + +go: + - 1.11.x + - tip + +matrix: + allow_failures: + - go: tip + +script: + - go vet ./... + - go test -v ./... diff --git a/vendor/github.com/beevik/etree/CONTRIBUTORS b/vendor/github.com/beevik/etree/CONTRIBUTORS new file mode 100644 index 0000000..03211a8 --- /dev/null +++ b/vendor/github.com/beevik/etree/CONTRIBUTORS @@ -0,0 +1,10 @@ +Brett Vickers (beevik) +Felix Geisendörfer (felixge) +Kamil Kisiel (kisielk) +Graham King (grahamking) +Matt Smith (ma314smith) +Michal Jemala (michaljemala) +Nicolas Piganeau (npiganeau) +Chris Brown (ccbrown) +Earncef Sequeira (earncef) +Gabriel de Labachelerie (wuzuf) diff --git a/vendor/github.com/beevik/etree/LICENSE b/vendor/github.com/beevik/etree/LICENSE new file mode 100644 index 0000000..26f1f77 --- /dev/null +++ b/vendor/github.com/beevik/etree/LICENSE @@ -0,0 +1,24 @@ +Copyright 2015-2019 Brett Vickers. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/beevik/etree/README.md b/vendor/github.com/beevik/etree/README.md new file mode 100644 index 0000000..08ec26b --- /dev/null +++ b/vendor/github.com/beevik/etree/README.md @@ -0,0 +1,205 @@ +[![Build Status](https://travis-ci.org/beevik/etree.svg?branch=master)](https://travis-ci.org/beevik/etree) +[![GoDoc](https://godoc.org/github.com/beevik/etree?status.svg)](https://godoc.org/github.com/beevik/etree) + +etree +===== + +The etree package is a lightweight, pure go package that expresses XML in +the form of an element tree. Its design was inspired by the Python +[ElementTree](http://docs.python.org/2/library/xml.etree.elementtree.html) +module. + +Some of the package's capabilities and features: + +* Represents XML documents as trees of elements for easy traversal. +* Imports, serializes, modifies or creates XML documents from scratch. +* Writes and reads XML to/from files, byte slices, strings and io interfaces. +* Performs simple or complex searches with lightweight XPath-like query APIs. +* Auto-indents XML using spaces or tabs for better readability. +* Implemented in pure go; depends only on standard go libraries. +* Built on top of the go [encoding/xml](http://golang.org/pkg/encoding/xml) + package. + +### Creating an XML document + +The following example creates an XML document from scratch using the etree +package and outputs its indented contents to stdout. +```go +doc := etree.NewDocument() +doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`) +doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`) + +people := doc.CreateElement("People") +people.CreateComment("These are all known people") + +jon := people.CreateElement("Person") +jon.CreateAttr("name", "Jon") + +sally := people.CreateElement("Person") +sally.CreateAttr("name", "Sally") + +doc.Indent(2) +doc.WriteTo(os.Stdout) +``` + +Output: +```xml + + + + + + + +``` + +### Reading an XML file + +Suppose you have a file on disk called `bookstore.xml` containing the +following data: + +```xml + + + + Everyday Italian + Giada De Laurentiis + 2005 + 30.00 + + + + Harry Potter + J K. Rowling + 2005 + 29.99 + + + + XQuery Kick Start + James McGovern + Per Bothner + Kurt Cagle + James Linn + Vaidyanathan Nagarajan + 2003 + 49.99 + + + + Learning XML + Erik T. Ray + 2003 + 39.95 + + + +``` + +This code reads the file's contents into an etree document. +```go +doc := etree.NewDocument() +if err := doc.ReadFromFile("bookstore.xml"); err != nil { + panic(err) +} +``` + +You can also read XML from a string, a byte slice, or an `io.Reader`. + +### Processing elements and attributes + +This example illustrates several ways to access elements and attributes using +etree selection queries. +```go +root := doc.SelectElement("bookstore") +fmt.Println("ROOT element:", root.Tag) + +for _, book := range root.SelectElements("book") { + fmt.Println("CHILD element:", book.Tag) + if title := book.SelectElement("title"); title != nil { + lang := title.SelectAttrValue("lang", "unknown") + fmt.Printf(" TITLE: %s (%s)\n", title.Text(), lang) + } + for _, attr := range book.Attr { + fmt.Printf(" ATTR: %s=%s\n", attr.Key, attr.Value) + } +} +``` +Output: +``` +ROOT element: bookstore +CHILD element: book + TITLE: Everyday Italian (en) + ATTR: category=COOKING +CHILD element: book + TITLE: Harry Potter (en) + ATTR: category=CHILDREN +CHILD element: book + TITLE: XQuery Kick Start (en) + ATTR: category=WEB +CHILD element: book + TITLE: Learning XML (en) + ATTR: category=WEB +``` + +### Path queries + +This example uses etree's path functions to select all book titles that fall +into the category of 'WEB'. The double-slash prefix in the path causes the +search for book elements to occur recursively; book elements may appear at any +level of the XML hierarchy. +```go +for _, t := range doc.FindElements("//book[@category='WEB']/title") { + fmt.Println("Title:", t.Text()) +} +``` + +Output: +``` +Title: XQuery Kick Start +Title: Learning XML +``` + +This example finds the first book element under the root bookstore element and +outputs the tag and text of each of its child elements. +```go +for _, e := range doc.FindElements("./bookstore/book[1]/*") { + fmt.Printf("%s: %s\n", e.Tag, e.Text()) +} +``` + +Output: +``` +title: Everyday Italian +author: Giada De Laurentiis +year: 2005 +price: 30.00 +``` + +This example finds all books with a price of 49.99 and outputs their titles. +```go +path := etree.MustCompilePath("./bookstore/book[p:price='49.99']/title") +for _, e := range doc.FindElementsPath(path) { + fmt.Println(e.Text()) +} +``` + +Output: +``` +XQuery Kick Start +``` + +Note that this example uses the FindElementsPath function, which takes as an +argument a pre-compiled path object. Use precompiled paths when you plan to +search with the same path more than once. + +### Other features + +These are just a few examples of the things the etree package can do. See the +[documentation](http://godoc.org/github.com/beevik/etree) for a complete +description of its capabilities. + +### Contributing + +This project accepts contributions. Just fork the repo and submit a pull +request! diff --git a/vendor/github.com/beevik/etree/RELEASE_NOTES.md b/vendor/github.com/beevik/etree/RELEASE_NOTES.md new file mode 100644 index 0000000..ee59d7a --- /dev/null +++ b/vendor/github.com/beevik/etree/RELEASE_NOTES.md @@ -0,0 +1,109 @@ +Release v1.1.0 +============== + +**New Features** + +* New attribute helpers. + * Added the `Element.SortAttrs` method, which lexicographically sorts an + element's attributes by key. +* New `ReadSettings` properties. + * Added `Entity` for the support of custom entity maps. +* New `WriteSettings` properties. + * Added `UseCRLF` to allow the output of CR-LF newlines instead of the + default LF newlines. This is useful on Windows systems. +* Additional support for text and CDATA sections. + * The `Element.Text` method now returns the concatenation of all consecutive + character data tokens immediately following an element's opening tag. + * Added `Element.SetCData` to replace the character data immediately + following an element's opening tag with a CDATA section. + * Added `Element.CreateCData` to create and add a CDATA section child + `CharData` token to an element. + * Added `Element.CreateText` to create and add a child text `CharData` token + to an element. + * Added `NewCData` to create a parentless CDATA section `CharData` token. + * Added `NewText` to create a parentless text `CharData` + token. + * Added `CharData.IsCData` to detect if the token contains a CDATA section. + * Added `CharData.IsWhitespace` to detect if the token contains whitespace + inserted by one of the document Indent functions. + * Modified `Element.SetText` so that it replaces a run of consecutive + character data tokens following the element's opening tag (instead of just + the first one). +* New "tail text" support. + * Added the `Element.Tail` method, which returns the text immediately + following an element's closing tag. + * Added the `Element.SetTail` method, which modifies the text immediately + following an element's closing tag. +* New element child insertion and removal methods. + * Added the `Element.InsertChildAt` method, which inserts a new child token + before the specified child token index. + * Added the `Element.RemoveChildAt` method, which removes the child token at + the specified child token index. +* New element and attribute queries. + * Added the `Element.Index` method, which returns the element's index within + its parent element's child token list. + * Added the `Element.NamespaceURI` method to return the namespace URI + associated with an element. + * Added the `Attr.NamespaceURI` method to return the namespace URI + associated with an element. + * Added the `Attr.Element` method to return the element that an attribute + belongs to. +* New Path filter functions. + * Added `[local-name()='val']` to keep elements whose unprefixed tag matches + the desired value. + * Added `[name()='val']` to keep elements whose full tag matches the desired + value. + * Added `[namespace-prefix()='val']` to keep elements whose namespace prefix + matches the desired value. + * Added `[namespace-uri()='val']` to keep elements whose namespace URI + matches the desired value. + +**Bug Fixes** + +* A default XML `CharSetReader` is now used to prevent failed parsing of XML + documents using certain encodings. + ([Issue](https://github.com/beevik/etree/issues/53)). +* All characters are now properly escaped according to XML parsing rules. + ([Issue](https://github.com/beevik/etree/issues/55)). +* The `Document.Indent` and `Document.IndentTabs` functions no longer insert + empty string `CharData` tokens. + +**Deprecated** + +* `Element` + * The `InsertChild` method is deprecated. Use `InsertChildAt` instead. + * The `CreateCharData` method is deprecated. Use `CreateText` instead. +* `CharData` + * The `NewCharData` method is deprecated. Use `NewText` instead. + + +Release v1.0.1 +============== + +**Changes** + +* Added support for absolute etree Path queries. An absolute path begins with + `/` or `//` and begins its search from the element's document root. +* Added [`GetPath`](https://godoc.org/github.com/beevik/etree#Element.GetPath) + and [`GetRelativePath`](https://godoc.org/github.com/beevik/etree#Element.GetRelativePath) + functions to the [`Element`](https://godoc.org/github.com/beevik/etree#Element) + type. + +**Breaking changes** + +* A path starting with `//` is now interpreted as an absolute path. + Previously, it was interpreted as a relative path starting from the element + whose + [`FindElement`](https://godoc.org/github.com/beevik/etree#Element.FindElement) + method was called. To remain compatible with this release, all paths + prefixed with `//` should be prefixed with `.//` when called from any + element other than the document's root. +* [**edit 2/1/2019**]: Minor releases should not contain breaking changes. + Even though this breaking change was very minor, it was a mistake to include + it in this minor release. In the future, all breaking changes will be + limited to major releases (e.g., version 2.0.0). + +Release v1.0.0 +============== + +Initial release. diff --git a/vendor/github.com/beevik/etree/etree.go b/vendor/github.com/beevik/etree/etree.go new file mode 100644 index 0000000..9e24f90 --- /dev/null +++ b/vendor/github.com/beevik/etree/etree.go @@ -0,0 +1,1453 @@ +// Copyright 2015-2019 Brett Vickers. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package etree provides XML services through an Element Tree +// abstraction. +package etree + +import ( + "bufio" + "bytes" + "encoding/xml" + "errors" + "io" + "os" + "sort" + "strings" +) + +const ( + // NoIndent is used with Indent to disable all indenting. + NoIndent = -1 +) + +// ErrXML is returned when XML parsing fails due to incorrect formatting. +var ErrXML = errors.New("etree: invalid XML format") + +// ReadSettings allow for changing the default behavior of the ReadFrom* +// methods. +type ReadSettings struct { + // CharsetReader to be passed to standard xml.Decoder. Default: nil. + CharsetReader func(charset string, input io.Reader) (io.Reader, error) + + // Permissive allows input containing common mistakes such as missing tags + // or attribute values. Default: false. + Permissive bool + + // Entity to be passed to standard xml.Decoder. Default: nil. + Entity map[string]string +} + +// newReadSettings creates a default ReadSettings record. +func newReadSettings() ReadSettings { + return ReadSettings{ + CharsetReader: func(label string, input io.Reader) (io.Reader, error) { + return input, nil + }, + Permissive: false, + } +} + +// WriteSettings allow for changing the serialization behavior of the WriteTo* +// methods. +type WriteSettings struct { + // CanonicalEndTags forces the production of XML end tags, even for + // elements that have no child elements. Default: false. + CanonicalEndTags bool + + // CanonicalText forces the production of XML character references for + // text data characters &, <, and >. If false, XML character references + // are also produced for " and '. Default: false. + CanonicalText bool + + // CanonicalAttrVal forces the production of XML character references for + // attribute value characters &, < and ". If false, XML character + // references are also produced for > and '. Default: false. + CanonicalAttrVal bool + + // When outputting indented XML, use a carriage return and linefeed + // ("\r\n") as a new-line delimiter instead of just a linefeed ("\n"). + // This is useful on Windows-based systems. + UseCRLF bool +} + +// newWriteSettings creates a default WriteSettings record. +func newWriteSettings() WriteSettings { + return WriteSettings{ + CanonicalEndTags: false, + CanonicalText: false, + CanonicalAttrVal: false, + UseCRLF: false, + } +} + +// A Token is an empty interface that represents an Element, CharData, +// Comment, Directive, or ProcInst. +type Token interface { + Parent() *Element + Index() int + dup(parent *Element) Token + setParent(parent *Element) + setIndex(index int) + writeTo(w *bufio.Writer, s *WriteSettings) +} + +// A Document is a container holding a complete XML hierarchy. Its embedded +// element contains zero or more children, one of which is usually the root +// element. The embedded element may include other children such as +// processing instructions or BOM CharData tokens. +type Document struct { + Element + ReadSettings ReadSettings + WriteSettings WriteSettings +} + +// An Element represents an XML element, its attributes, and its child tokens. +type Element struct { + Space, Tag string // namespace prefix and tag + Attr []Attr // key-value attribute pairs + Child []Token // child tokens (elements, comments, etc.) + parent *Element // parent element + index int // token index in parent's children +} + +// An Attr represents a key-value attribute of an XML element. +type Attr struct { + Space, Key string // The attribute's namespace prefix and key + Value string // The attribute value string + element *Element // element containing the attribute +} + +// charDataFlags are used with CharData tokens to store additional settings. +type charDataFlags uint8 + +const ( + // The CharData was created by an indent function as whitespace. + whitespaceFlag charDataFlags = 1 << iota + + // The CharData contains a CDATA section. + cdataFlag +) + +// CharData can be used to represent character data or a CDATA section within +// an XML document. +type CharData struct { + Data string + parent *Element + index int + flags charDataFlags +} + +// A Comment represents an XML comment. +type Comment struct { + Data string + parent *Element + index int +} + +// A Directive represents an XML directive. +type Directive struct { + Data string + parent *Element + index int +} + +// A ProcInst represents an XML processing instruction. +type ProcInst struct { + Target string + Inst string + parent *Element + index int +} + +// NewDocument creates an XML document without a root element. +func NewDocument() *Document { + return &Document{ + Element{Child: make([]Token, 0)}, + newReadSettings(), + newWriteSettings(), + } +} + +// Copy returns a recursive, deep copy of the document. +func (d *Document) Copy() *Document { + return &Document{*(d.dup(nil).(*Element)), d.ReadSettings, d.WriteSettings} +} + +// Root returns the root element of the document, or nil if there is no root +// element. +func (d *Document) Root() *Element { + for _, t := range d.Child { + if c, ok := t.(*Element); ok { + return c + } + } + return nil +} + +// SetRoot replaces the document's root element with e. If the document +// already has a root when this function is called, then the document's +// original root is unbound first. If the element e is bound to another +// document (or to another element within a document), then it is unbound +// first. +func (d *Document) SetRoot(e *Element) { + if e.parent != nil { + e.parent.RemoveChild(e) + } + + p := &d.Element + e.setParent(p) + + // If there is already a root element, replace it. + for i, t := range p.Child { + if _, ok := t.(*Element); ok { + t.setParent(nil) + t.setIndex(-1) + p.Child[i] = e + e.setIndex(i) + return + } + } + + // No existing root element, so add it. + p.addChild(e) +} + +// ReadFrom reads XML from the reader r into the document d. It returns the +// number of bytes read and any error encountered. +func (d *Document) ReadFrom(r io.Reader) (n int64, err error) { + return d.Element.readFrom(r, d.ReadSettings) +} + +// ReadFromFile reads XML from the string s into the document d. +func (d *Document) ReadFromFile(filename string) error { + f, err := os.Open(filename) + if err != nil { + return err + } + defer f.Close() + _, err = d.ReadFrom(f) + return err +} + +// ReadFromBytes reads XML from the byte slice b into the document d. +func (d *Document) ReadFromBytes(b []byte) error { + _, err := d.ReadFrom(bytes.NewReader(b)) + return err +} + +// ReadFromString reads XML from the string s into the document d. +func (d *Document) ReadFromString(s string) error { + _, err := d.ReadFrom(strings.NewReader(s)) + return err +} + +// WriteTo serializes an XML document into the writer w. It +// returns the number of bytes written and any error encountered. +func (d *Document) WriteTo(w io.Writer) (n int64, err error) { + cw := newCountWriter(w) + b := bufio.NewWriter(cw) + for _, c := range d.Child { + c.writeTo(b, &d.WriteSettings) + } + err, n = b.Flush(), cw.bytes + return +} + +// WriteToFile serializes an XML document into the file named +// filename. +func (d *Document) WriteToFile(filename string) error { + f, err := os.Create(filename) + if err != nil { + return err + } + defer f.Close() + _, err = d.WriteTo(f) + return err +} + +// WriteToBytes serializes the XML document into a slice of +// bytes. +func (d *Document) WriteToBytes() (b []byte, err error) { + var buf bytes.Buffer + if _, err = d.WriteTo(&buf); err != nil { + return + } + return buf.Bytes(), nil +} + +// WriteToString serializes the XML document into a string. +func (d *Document) WriteToString() (s string, err error) { + var b []byte + if b, err = d.WriteToBytes(); err != nil { + return + } + return string(b), nil +} + +type indentFunc func(depth int) string + +// Indent modifies the document's element tree by inserting character data +// tokens containing newlines and indentation. The amount of indentation per +// depth level is given as spaces. Pass etree.NoIndent for spaces if you want +// no indentation at all. +func (d *Document) Indent(spaces int) { + var indent indentFunc + switch { + case spaces < 0: + indent = func(depth int) string { return "" } + case d.WriteSettings.UseCRLF == true: + indent = func(depth int) string { return indentCRLF(depth*spaces, indentSpaces) } + default: + indent = func(depth int) string { return indentLF(depth*spaces, indentSpaces) } + } + d.Element.indent(0, indent) +} + +// IndentTabs modifies the document's element tree by inserting CharData +// tokens containing newlines and tabs for indentation. One tab is used per +// indentation level. +func (d *Document) IndentTabs() { + var indent indentFunc + switch d.WriteSettings.UseCRLF { + case true: + indent = func(depth int) string { return indentCRLF(depth, indentTabs) } + default: + indent = func(depth int) string { return indentLF(depth, indentTabs) } + } + d.Element.indent(0, indent) +} + +// NewElement creates an unparented element with the specified tag. The tag +// may be prefixed by a namespace prefix and a colon. +func NewElement(tag string) *Element { + space, stag := spaceDecompose(tag) + return newElement(space, stag, nil) +} + +// newElement is a helper function that creates an element and binds it to +// a parent element if possible. +func newElement(space, tag string, parent *Element) *Element { + e := &Element{ + Space: space, + Tag: tag, + Attr: make([]Attr, 0), + Child: make([]Token, 0), + parent: parent, + index: -1, + } + if parent != nil { + parent.addChild(e) + } + return e +} + +// Copy creates a recursive, deep copy of the element and all its attributes +// and children. The returned element has no parent but can be parented to a +// another element using AddElement, or to a document using SetRoot. +func (e *Element) Copy() *Element { + return e.dup(nil).(*Element) +} + +// FullTag returns the element e's complete tag, including namespace prefix if +// present. +func (e *Element) FullTag() string { + if e.Space == "" { + return e.Tag + } + return e.Space + ":" + e.Tag +} + +// NamespaceURI returns the XML namespace URI associated with the element. If +// the element is part of the XML default namespace, NamespaceURI returns the +// empty string. +func (e *Element) NamespaceURI() string { + if e.Space == "" { + return e.findDefaultNamespaceURI() + } + return e.findLocalNamespaceURI(e.Space) +} + +// findLocalNamespaceURI finds the namespace URI corresponding to the +// requested prefix. +func (e *Element) findLocalNamespaceURI(prefix string) string { + for _, a := range e.Attr { + if a.Space == "xmlns" && a.Key == prefix { + return a.Value + } + } + + if e.parent == nil { + return "" + } + + return e.parent.findLocalNamespaceURI(prefix) +} + +// findDefaultNamespaceURI finds the default namespace URI of the element. +func (e *Element) findDefaultNamespaceURI() string { + for _, a := range e.Attr { + if a.Space == "" && a.Key == "xmlns" { + return a.Value + } + } + + if e.parent == nil { + return "" + } + + return e.parent.findDefaultNamespaceURI() +} + +// hasText returns true if the element has character data immediately +// folllowing the element's opening tag. +func (e *Element) hasText() bool { + if len(e.Child) == 0 { + return false + } + _, ok := e.Child[0].(*CharData) + return ok +} + +// namespacePrefix returns the namespace prefix associated with the element. +func (e *Element) namespacePrefix() string { + return e.Space +} + +// name returns the tag associated with the element. +func (e *Element) name() string { + return e.Tag +} + +// Text returns all character data immediately following the element's opening +// tag. +func (e *Element) Text() string { + if len(e.Child) == 0 { + return "" + } + + text := "" + for _, ch := range e.Child { + if cd, ok := ch.(*CharData); ok { + if text == "" { + text = cd.Data + } else { + text = text + cd.Data + } + } else { + break + } + } + return text +} + +// SetText replaces all character data immediately following an element's +// opening tag with the requested string. +func (e *Element) SetText(text string) { + e.replaceText(0, text, 0) +} + +// SetCData replaces all character data immediately following an element's +// opening tag with a CDATA section. +func (e *Element) SetCData(text string) { + e.replaceText(0, text, cdataFlag) +} + +// Tail returns all character data immediately following the element's end +// tag. +func (e *Element) Tail() string { + if e.Parent() == nil { + return "" + } + + p := e.Parent() + i := e.Index() + + text := "" + for _, ch := range p.Child[i+1:] { + if cd, ok := ch.(*CharData); ok { + if text == "" { + text = cd.Data + } else { + text = text + cd.Data + } + } else { + break + } + } + return text +} + +// SetTail replaces all character data immediately following the element's end +// tag with the requested string. +func (e *Element) SetTail(text string) { + if e.Parent() == nil { + return + } + + p := e.Parent() + p.replaceText(e.Index()+1, text, 0) +} + +// replaceText is a helper function that replaces a series of chardata tokens +// starting at index i with the requested text. +func (e *Element) replaceText(i int, text string, flags charDataFlags) { + end := e.findTermCharDataIndex(i) + + switch { + case end == i: + if text != "" { + // insert a new chardata token at index i + cd := newCharData(text, flags, nil) + e.InsertChildAt(i, cd) + } + + case end == i+1: + if text == "" { + // remove the chardata token at index i + e.RemoveChildAt(i) + } else { + // replace the first and only character token at index i + cd := e.Child[i].(*CharData) + cd.Data, cd.flags = text, flags + } + + default: + if text == "" { + // remove all chardata tokens starting from index i + copy(e.Child[i:], e.Child[end:]) + removed := end - i + e.Child = e.Child[:len(e.Child)-removed] + for j := i; j < len(e.Child); j++ { + e.Child[j].setIndex(j) + } + } else { + // replace the first chardata token at index i and remove all + // subsequent chardata tokens + cd := e.Child[i].(*CharData) + cd.Data, cd.flags = text, flags + copy(e.Child[i+1:], e.Child[end:]) + removed := end - (i + 1) + e.Child = e.Child[:len(e.Child)-removed] + for j := i + 1; j < len(e.Child); j++ { + e.Child[j].setIndex(j) + } + } + } +} + +// findTermCharDataIndex finds the index of the first child token that isn't +// a CharData token. It starts from the requested start index. +func (e *Element) findTermCharDataIndex(start int) int { + for i := start; i < len(e.Child); i++ { + if _, ok := e.Child[i].(*CharData); !ok { + return i + } + } + return len(e.Child) +} + +// CreateElement creates an element with the specified tag and adds it as the +// last child element of the element e. The tag may be prefixed by a namespace +// prefix and a colon. +func (e *Element) CreateElement(tag string) *Element { + space, stag := spaceDecompose(tag) + return newElement(space, stag, e) +} + +// AddChild adds the token t as the last child of element e. If token t was +// already the child of another element, it is first removed from its current +// parent element. +func (e *Element) AddChild(t Token) { + if t.Parent() != nil { + t.Parent().RemoveChild(t) + } + + t.setParent(e) + e.addChild(t) +} + +// InsertChild inserts the token t before e's existing child token ex. If ex +// is nil or ex is not a child of e, then t is added to the end of e's child +// token list. If token t was already the child of another element, it is +// first removed from its current parent element. +// +// Deprecated: InsertChild is deprecated. Use InsertChildAt instead. +func (e *Element) InsertChild(ex Token, t Token) { + if ex == nil || ex.Parent() != e { + e.AddChild(t) + return + } + + if t.Parent() != nil { + t.Parent().RemoveChild(t) + } + + t.setParent(e) + + i := ex.Index() + e.Child = append(e.Child, nil) + copy(e.Child[i+1:], e.Child[i:]) + e.Child[i] = t + + for j := i; j < len(e.Child); j++ { + e.Child[j].setIndex(j) + } +} + +// InsertChildAt inserts the token t into the element e's list of child tokens +// just before the requested index. If the index is greater than or equal to +// the length of the list of child tokens, the token t is added to the end of +// the list. +func (e *Element) InsertChildAt(index int, t Token) { + if index >= len(e.Child) { + e.AddChild(t) + return + } + + if t.Parent() != nil { + if t.Parent() == e && t.Index() > index { + index-- + } + t.Parent().RemoveChild(t) + } + + t.setParent(e) + + e.Child = append(e.Child, nil) + copy(e.Child[index+1:], e.Child[index:]) + e.Child[index] = t + + for j := index; j < len(e.Child); j++ { + e.Child[j].setIndex(j) + } +} + +// RemoveChild attempts to remove the token t from element e's list of +// children. If the token t is a child of e, then it is returned. Otherwise, +// nil is returned. +func (e *Element) RemoveChild(t Token) Token { + if t.Parent() != e { + return nil + } + return e.RemoveChildAt(t.Index()) +} + +// RemoveChildAt removes the index-th child token from the element e. The +// removed child token is returned. If the index is out of bounds, no child is +// removed and nil is returned. +func (e *Element) RemoveChildAt(index int) Token { + if index >= len(e.Child) { + return nil + } + + t := e.Child[index] + for j := index + 1; j < len(e.Child); j++ { + e.Child[j].setIndex(j - 1) + } + e.Child = append(e.Child[:index], e.Child[index+1:]...) + t.setIndex(-1) + t.setParent(nil) + return t +} + +// ReadFrom reads XML from the reader r and stores the result as a new child +// of element e. +func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err error) { + r := newCountReader(ri) + dec := xml.NewDecoder(r) + dec.CharsetReader = settings.CharsetReader + dec.Strict = !settings.Permissive + dec.Entity = settings.Entity + var stack stack + stack.push(e) + for { + t, err := dec.RawToken() + switch { + case err == io.EOF: + return r.bytes, nil + case err != nil: + return r.bytes, err + case stack.empty(): + return r.bytes, ErrXML + } + + top := stack.peek().(*Element) + + switch t := t.(type) { + case xml.StartElement: + e := newElement(t.Name.Space, t.Name.Local, top) + for _, a := range t.Attr { + e.createAttr(a.Name.Space, a.Name.Local, a.Value, e) + } + stack.push(e) + case xml.EndElement: + stack.pop() + case xml.CharData: + data := string(t) + var flags charDataFlags + if isWhitespace(data) { + flags = whitespaceFlag + } + newCharData(data, flags, top) + case xml.Comment: + newComment(string(t), top) + case xml.Directive: + newDirective(string(t), top) + case xml.ProcInst: + newProcInst(t.Target, string(t.Inst), top) + } + } +} + +// SelectAttr finds an element attribute matching the requested key and +// returns it if found. Returns nil if no matching attribute is found. The key +// may be prefixed by a namespace prefix and a colon. +func (e *Element) SelectAttr(key string) *Attr { + space, skey := spaceDecompose(key) + for i, a := range e.Attr { + if spaceMatch(space, a.Space) && skey == a.Key { + return &e.Attr[i] + } + } + return nil +} + +// SelectAttrValue finds an element attribute matching the requested key and +// returns its value if found. The key may be prefixed by a namespace prefix +// and a colon. If the key is not found, the dflt value is returned instead. +func (e *Element) SelectAttrValue(key, dflt string) string { + space, skey := spaceDecompose(key) + for _, a := range e.Attr { + if spaceMatch(space, a.Space) && skey == a.Key { + return a.Value + } + } + return dflt +} + +// ChildElements returns all elements that are children of element e. +func (e *Element) ChildElements() []*Element { + var elements []*Element + for _, t := range e.Child { + if c, ok := t.(*Element); ok { + elements = append(elements, c) + } + } + return elements +} + +// SelectElement returns the first child element with the given tag. The tag +// may be prefixed by a namespace prefix and a colon. Returns nil if no +// element with a matching tag was found. +func (e *Element) SelectElement(tag string) *Element { + space, stag := spaceDecompose(tag) + for _, t := range e.Child { + if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag { + return c + } + } + return nil +} + +// SelectElements returns a slice of all child elements with the given tag. +// The tag may be prefixed by a namespace prefix and a colon. +func (e *Element) SelectElements(tag string) []*Element { + space, stag := spaceDecompose(tag) + var elements []*Element + for _, t := range e.Child { + if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag { + elements = append(elements, c) + } + } + return elements +} + +// FindElement returns the first element matched by the XPath-like path +// string. Returns nil if no element is found using the path. Panics if an +// invalid path string is supplied. +func (e *Element) FindElement(path string) *Element { + return e.FindElementPath(MustCompilePath(path)) +} + +// FindElementPath returns the first element matched by the XPath-like path +// string. Returns nil if no element is found using the path. +func (e *Element) FindElementPath(path Path) *Element { + p := newPather() + elements := p.traverse(e, path) + switch { + case len(elements) > 0: + return elements[0] + default: + return nil + } +} + +// FindElements returns a slice of elements matched by the XPath-like path +// string. Panics if an invalid path string is supplied. +func (e *Element) FindElements(path string) []*Element { + return e.FindElementsPath(MustCompilePath(path)) +} + +// FindElementsPath returns a slice of elements matched by the Path object. +func (e *Element) FindElementsPath(path Path) []*Element { + p := newPather() + return p.traverse(e, path) +} + +// GetPath returns the absolute path of the element. +func (e *Element) GetPath() string { + path := []string{} + for seg := e; seg != nil; seg = seg.Parent() { + if seg.Tag != "" { + path = append(path, seg.Tag) + } + } + + // Reverse the path. + for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 { + path[i], path[j] = path[j], path[i] + } + + return "/" + strings.Join(path, "/") +} + +// GetRelativePath returns the path of the element relative to the source +// element. If the two elements are not part of the same element tree, then +// GetRelativePath returns the empty string. +func (e *Element) GetRelativePath(source *Element) string { + var path []*Element + + if source == nil { + return "" + } + + // Build a reverse path from the element toward the root. Stop if the + // source element is encountered. + var seg *Element + for seg = e; seg != nil && seg != source; seg = seg.Parent() { + path = append(path, seg) + } + + // If we found the source element, reverse the path and compose the + // string. + if seg == source { + if len(path) == 0 { + return "." + } + parts := []string{} + for i := len(path) - 1; i >= 0; i-- { + parts = append(parts, path[i].Tag) + } + return "./" + strings.Join(parts, "/") + } + + // The source wasn't encountered, so climb from the source element toward + // the root of the tree until an element in the reversed path is + // encountered. + + findPathIndex := func(e *Element, path []*Element) int { + for i, ee := range path { + if e == ee { + return i + } + } + return -1 + } + + climb := 0 + for seg = source; seg != nil; seg = seg.Parent() { + i := findPathIndex(seg, path) + if i >= 0 { + path = path[:i] // truncate at found segment + break + } + climb++ + } + + // No element in the reversed path was encountered, so the two elements + // must not be part of the same tree. + if seg == nil { + return "" + } + + // Reverse the (possibly truncated) path and prepend ".." segments to + // climb. + parts := []string{} + for i := 0; i < climb; i++ { + parts = append(parts, "..") + } + for i := len(path) - 1; i >= 0; i-- { + parts = append(parts, path[i].Tag) + } + return strings.Join(parts, "/") +} + +// indent recursively inserts proper indentation between an +// XML element's child tokens. +func (e *Element) indent(depth int, indent indentFunc) { + e.stripIndent() + n := len(e.Child) + if n == 0 { + return + } + + oldChild := e.Child + e.Child = make([]Token, 0, n*2+1) + isCharData, firstNonCharData := false, true + for _, c := range oldChild { + // Insert NL+indent before child if it's not character data. + // Exceptions: when it's the first non-character-data child, or when + // the child is at root depth. + _, isCharData = c.(*CharData) + if !isCharData { + if !firstNonCharData || depth > 0 { + s := indent(depth) + if s != "" { + newCharData(s, whitespaceFlag, e) + } + } + firstNonCharData = false + } + + e.addChild(c) + + // Recursively process child elements. + if ce, ok := c.(*Element); ok { + ce.indent(depth+1, indent) + } + } + + // Insert NL+indent before the last child. + if !isCharData { + if !firstNonCharData || depth > 0 { + s := indent(depth - 1) + if s != "" { + newCharData(s, whitespaceFlag, e) + } + } + } +} + +// stripIndent removes any previously inserted indentation. +func (e *Element) stripIndent() { + // Count the number of non-indent child tokens + n := len(e.Child) + for _, c := range e.Child { + if cd, ok := c.(*CharData); ok && cd.IsWhitespace() { + n-- + } + } + if n == len(e.Child) { + return + } + + // Strip out indent CharData + newChild := make([]Token, n) + j := 0 + for _, c := range e.Child { + if cd, ok := c.(*CharData); ok && cd.IsWhitespace() { + continue + } + newChild[j] = c + newChild[j].setIndex(j) + j++ + } + e.Child = newChild +} + +// dup duplicates the element. +func (e *Element) dup(parent *Element) Token { + ne := &Element{ + Space: e.Space, + Tag: e.Tag, + Attr: make([]Attr, len(e.Attr)), + Child: make([]Token, len(e.Child)), + parent: parent, + index: e.index, + } + for i, t := range e.Child { + ne.Child[i] = t.dup(ne) + } + for i, a := range e.Attr { + ne.Attr[i] = a + } + return ne +} + +// Parent returns the element token's parent element, or nil if it has no +// parent. +func (e *Element) Parent() *Element { + return e.parent +} + +// Index returns the index of this element within its parent element's +// list of child tokens. If this element has no parent element, the index +// is -1. +func (e *Element) Index() int { + return e.index +} + +// setParent replaces the element token's parent. +func (e *Element) setParent(parent *Element) { + e.parent = parent +} + +// setIndex sets the element token's index within its parent's Child slice. +func (e *Element) setIndex(index int) { + e.index = index +} + +// writeTo serializes the element to the writer w. +func (e *Element) writeTo(w *bufio.Writer, s *WriteSettings) { + w.WriteByte('<') + w.WriteString(e.FullTag()) + for _, a := range e.Attr { + w.WriteByte(' ') + a.writeTo(w, s) + } + if len(e.Child) > 0 { + w.WriteString(">") + for _, c := range e.Child { + c.writeTo(w, s) + } + w.Write([]byte{'<', '/'}) + w.WriteString(e.FullTag()) + w.WriteByte('>') + } else { + if s.CanonicalEndTags { + w.Write([]byte{'>', '<', '/'}) + w.WriteString(e.FullTag()) + w.WriteByte('>') + } else { + w.Write([]byte{'/', '>'}) + } + } +} + +// addChild adds a child token to the element e. +func (e *Element) addChild(t Token) { + t.setIndex(len(e.Child)) + e.Child = append(e.Child, t) +} + +// CreateAttr creates an attribute and adds it to element e. The key may be +// prefixed by a namespace prefix and a colon. If an attribute with the key +// already exists, its value is replaced. +func (e *Element) CreateAttr(key, value string) *Attr { + space, skey := spaceDecompose(key) + return e.createAttr(space, skey, value, e) +} + +// createAttr is a helper function that creates attributes. +func (e *Element) createAttr(space, key, value string, parent *Element) *Attr { + for i, a := range e.Attr { + if space == a.Space && key == a.Key { + e.Attr[i].Value = value + return &e.Attr[i] + } + } + a := Attr{ + Space: space, + Key: key, + Value: value, + element: parent, + } + e.Attr = append(e.Attr, a) + return &e.Attr[len(e.Attr)-1] +} + +// RemoveAttr removes and returns a copy of the first attribute of the element +// whose key matches the given key. The key may be prefixed by a namespace +// prefix and a colon. If a matching attribute does not exist, nil is +// returned. +func (e *Element) RemoveAttr(key string) *Attr { + space, skey := spaceDecompose(key) + for i, a := range e.Attr { + if space == a.Space && skey == a.Key { + e.Attr = append(e.Attr[0:i], e.Attr[i+1:]...) + return &Attr{ + Space: a.Space, + Key: a.Key, + Value: a.Value, + element: nil, + } + } + } + return nil +} + +// SortAttrs sorts the element's attributes lexicographically by key. +func (e *Element) SortAttrs() { + sort.Sort(byAttr(e.Attr)) +} + +type byAttr []Attr + +func (a byAttr) Len() int { + return len(a) +} + +func (a byAttr) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a byAttr) Less(i, j int) bool { + sp := strings.Compare(a[i].Space, a[j].Space) + if sp == 0 { + return strings.Compare(a[i].Key, a[j].Key) < 0 + } + return sp < 0 +} + +// FullKey returns the attribute a's complete key, including namespace prefix +// if present. +func (a *Attr) FullKey() string { + if a.Space == "" { + return a.Key + } + return a.Space + ":" + a.Key +} + +// Element returns the element containing the attribute. +func (a *Attr) Element() *Element { + return a.element +} + +// NamespaceURI returns the XML namespace URI associated with the attribute. +// If the element is part of the XML default namespace, NamespaceURI returns +// the empty string. +func (a *Attr) NamespaceURI() string { + return a.element.NamespaceURI() +} + +// writeTo serializes the attribute to the writer. +func (a *Attr) writeTo(w *bufio.Writer, s *WriteSettings) { + w.WriteString(a.FullKey()) + w.WriteString(`="`) + var m escapeMode + if s.CanonicalAttrVal { + m = escapeCanonicalAttr + } else { + m = escapeNormal + } + escapeString(w, a.Value, m) + w.WriteByte('"') +} + +// NewText creates a parentless CharData token containing character data. +func NewText(text string) *CharData { + return newCharData(text, 0, nil) +} + +// NewCData creates a parentless XML character CDATA section. +func NewCData(data string) *CharData { + return newCharData(data, cdataFlag, nil) +} + +// NewCharData creates a parentless CharData token containing character data. +// +// Deprecated: NewCharData is deprecated. Instead, use NewText, which does the +// same thing. +func NewCharData(data string) *CharData { + return newCharData(data, 0, nil) +} + +// newCharData creates a character data token and binds it to a parent +// element. If parent is nil, the CharData token remains unbound. +func newCharData(data string, flags charDataFlags, parent *Element) *CharData { + c := &CharData{ + Data: data, + parent: parent, + index: -1, + flags: flags, + } + if parent != nil { + parent.addChild(c) + } + return c +} + +// CreateText creates a CharData token containing character data and adds it +// as a child of element e. +func (e *Element) CreateText(text string) *CharData { + return newCharData(text, 0, e) +} + +// CreateCData creates a CharData token containing a CDATA section and adds it +// as a child of element e. +func (e *Element) CreateCData(data string) *CharData { + return newCharData(data, cdataFlag, e) +} + +// CreateCharData creates a CharData token containing character data and adds +// it as a child of element e. +// +// Deprecated: CreateCharData is deprecated. Instead, use CreateText, which +// does the same thing. +func (e *Element) CreateCharData(data string) *CharData { + return newCharData(data, 0, e) +} + +// dup duplicates the character data. +func (c *CharData) dup(parent *Element) Token { + return &CharData{ + Data: c.Data, + flags: c.flags, + parent: parent, + index: c.index, + } +} + +// IsCData returns true if the character data token is to be encoded as a +// CDATA section. +func (c *CharData) IsCData() bool { + return (c.flags & cdataFlag) != 0 +} + +// IsWhitespace returns true if the character data token was created by one of +// the document Indent methods to contain only whitespace. +func (c *CharData) IsWhitespace() bool { + return (c.flags & whitespaceFlag) != 0 +} + +// Parent returns the character data token's parent element, or nil if it has +// no parent. +func (c *CharData) Parent() *Element { + return c.parent +} + +// Index returns the index of this CharData token within its parent element's +// list of child tokens. If this CharData token has no parent element, the +// index is -1. +func (c *CharData) Index() int { + return c.index +} + +// setParent replaces the character data token's parent. +func (c *CharData) setParent(parent *Element) { + c.parent = parent +} + +// setIndex sets the CharData token's index within its parent element's Child +// slice. +func (c *CharData) setIndex(index int) { + c.index = index +} + +// writeTo serializes character data to the writer. +func (c *CharData) writeTo(w *bufio.Writer, s *WriteSettings) { + if c.IsCData() { + w.WriteString(``) + } else { + var m escapeMode + if s.CanonicalText { + m = escapeCanonicalText + } else { + m = escapeNormal + } + escapeString(w, c.Data, m) + } +} + +// NewComment creates a parentless XML comment. +func NewComment(comment string) *Comment { + return newComment(comment, nil) +} + +// NewComment creates an XML comment and binds it to a parent element. If +// parent is nil, the Comment remains unbound. +func newComment(comment string, parent *Element) *Comment { + c := &Comment{ + Data: comment, + parent: parent, + index: -1, + } + if parent != nil { + parent.addChild(c) + } + return c +} + +// CreateComment creates an XML comment and adds it as a child of element e. +func (e *Element) CreateComment(comment string) *Comment { + return newComment(comment, e) +} + +// dup duplicates the comment. +func (c *Comment) dup(parent *Element) Token { + return &Comment{ + Data: c.Data, + parent: parent, + index: c.index, + } +} + +// Parent returns comment token's parent element, or nil if it has no parent. +func (c *Comment) Parent() *Element { + return c.parent +} + +// Index returns the index of this Comment token within its parent element's +// list of child tokens. If this Comment token has no parent element, the +// index is -1. +func (c *Comment) Index() int { + return c.index +} + +// setParent replaces the comment token's parent. +func (c *Comment) setParent(parent *Element) { + c.parent = parent +} + +// setIndex sets the Comment token's index within its parent element's Child +// slice. +func (c *Comment) setIndex(index int) { + c.index = index +} + +// writeTo serialies the comment to the writer. +func (c *Comment) writeTo(w *bufio.Writer, s *WriteSettings) { + w.WriteString("") +} + +// NewDirective creates a parentless XML directive. +func NewDirective(data string) *Directive { + return newDirective(data, nil) +} + +// newDirective creates an XML directive and binds it to a parent element. If +// parent is nil, the Directive remains unbound. +func newDirective(data string, parent *Element) *Directive { + d := &Directive{ + Data: data, + parent: parent, + index: -1, + } + if parent != nil { + parent.addChild(d) + } + return d +} + +// CreateDirective creates an XML directive and adds it as the last child of +// element e. +func (e *Element) CreateDirective(data string) *Directive { + return newDirective(data, e) +} + +// dup duplicates the directive. +func (d *Directive) dup(parent *Element) Token { + return &Directive{ + Data: d.Data, + parent: parent, + index: d.index, + } +} + +// Parent returns directive token's parent element, or nil if it has no +// parent. +func (d *Directive) Parent() *Element { + return d.parent +} + +// Index returns the index of this Directive token within its parent element's +// list of child tokens. If this Directive token has no parent element, the +// index is -1. +func (d *Directive) Index() int { + return d.index +} + +// setParent replaces the directive token's parent. +func (d *Directive) setParent(parent *Element) { + d.parent = parent +} + +// setIndex sets the Directive token's index within its parent element's Child +// slice. +func (d *Directive) setIndex(index int) { + d.index = index +} + +// writeTo serializes the XML directive to the writer. +func (d *Directive) writeTo(w *bufio.Writer, s *WriteSettings) { + w.WriteString("") +} + +// NewProcInst creates a parentless XML processing instruction. +func NewProcInst(target, inst string) *ProcInst { + return newProcInst(target, inst, nil) +} + +// newProcInst creates an XML processing instruction and binds it to a parent +// element. If parent is nil, the ProcInst remains unbound. +func newProcInst(target, inst string, parent *Element) *ProcInst { + p := &ProcInst{ + Target: target, + Inst: inst, + parent: parent, + index: -1, + } + if parent != nil { + parent.addChild(p) + } + return p +} + +// CreateProcInst creates a processing instruction and adds it as a child of +// element e. +func (e *Element) CreateProcInst(target, inst string) *ProcInst { + return newProcInst(target, inst, e) +} + +// dup duplicates the procinst. +func (p *ProcInst) dup(parent *Element) Token { + return &ProcInst{ + Target: p.Target, + Inst: p.Inst, + parent: parent, + index: p.index, + } +} + +// Parent returns processing instruction token's parent element, or nil if it +// has no parent. +func (p *ProcInst) Parent() *Element { + return p.parent +} + +// Index returns the index of this ProcInst token within its parent element's +// list of child tokens. If this ProcInst token has no parent element, the +// index is -1. +func (p *ProcInst) Index() int { + return p.index +} + +// setParent replaces the processing instruction token's parent. +func (p *ProcInst) setParent(parent *Element) { + p.parent = parent +} + +// setIndex sets the processing instruction token's index within its parent +// element's Child slice. +func (p *ProcInst) setIndex(index int) { + p.index = index +} + +// writeTo serializes the processing instruction to the writer. +func (p *ProcInst) writeTo(w *bufio.Writer, s *WriteSettings) { + w.WriteString("") +} diff --git a/vendor/github.com/beevik/etree/helpers.go b/vendor/github.com/beevik/etree/helpers.go new file mode 100644 index 0000000..825e14e --- /dev/null +++ b/vendor/github.com/beevik/etree/helpers.go @@ -0,0 +1,276 @@ +// Copyright 2015-2019 Brett Vickers. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package etree + +import ( + "bufio" + "io" + "strings" + "unicode/utf8" +) + +// A simple stack +type stack struct { + data []interface{} +} + +func (s *stack) empty() bool { + return len(s.data) == 0 +} + +func (s *stack) push(value interface{}) { + s.data = append(s.data, value) +} + +func (s *stack) pop() interface{} { + value := s.data[len(s.data)-1] + s.data[len(s.data)-1] = nil + s.data = s.data[:len(s.data)-1] + return value +} + +func (s *stack) peek() interface{} { + return s.data[len(s.data)-1] +} + +// A fifo is a simple first-in-first-out queue. +type fifo struct { + data []interface{} + head, tail int +} + +func (f *fifo) add(value interface{}) { + if f.len()+1 >= len(f.data) { + f.grow() + } + f.data[f.tail] = value + if f.tail++; f.tail == len(f.data) { + f.tail = 0 + } +} + +func (f *fifo) remove() interface{} { + value := f.data[f.head] + f.data[f.head] = nil + if f.head++; f.head == len(f.data) { + f.head = 0 + } + return value +} + +func (f *fifo) len() int { + if f.tail >= f.head { + return f.tail - f.head + } + return len(f.data) - f.head + f.tail +} + +func (f *fifo) grow() { + c := len(f.data) * 2 + if c == 0 { + c = 4 + } + buf, count := make([]interface{}, c), f.len() + if f.tail >= f.head { + copy(buf[0:count], f.data[f.head:f.tail]) + } else { + hindex := len(f.data) - f.head + copy(buf[0:hindex], f.data[f.head:]) + copy(buf[hindex:count], f.data[:f.tail]) + } + f.data, f.head, f.tail = buf, 0, count +} + +// countReader implements a proxy reader that counts the number of +// bytes read from its encapsulated reader. +type countReader struct { + r io.Reader + bytes int64 +} + +func newCountReader(r io.Reader) *countReader { + return &countReader{r: r} +} + +func (cr *countReader) Read(p []byte) (n int, err error) { + b, err := cr.r.Read(p) + cr.bytes += int64(b) + return b, err +} + +// countWriter implements a proxy writer that counts the number of +// bytes written by its encapsulated writer. +type countWriter struct { + w io.Writer + bytes int64 +} + +func newCountWriter(w io.Writer) *countWriter { + return &countWriter{w: w} +} + +func (cw *countWriter) Write(p []byte) (n int, err error) { + b, err := cw.w.Write(p) + cw.bytes += int64(b) + return b, err +} + +// isWhitespace returns true if the byte slice contains only +// whitespace characters. +func isWhitespace(s string) bool { + for i := 0; i < len(s); i++ { + if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' { + return false + } + } + return true +} + +// spaceMatch returns true if namespace a is the empty string +// or if namespace a equals namespace b. +func spaceMatch(a, b string) bool { + switch { + case a == "": + return true + default: + return a == b + } +} + +// spaceDecompose breaks a namespace:tag identifier at the ':' +// and returns the two parts. +func spaceDecompose(str string) (space, key string) { + colon := strings.IndexByte(str, ':') + if colon == -1 { + return "", str + } + return str[:colon], str[colon+1:] +} + +// Strings used by indentCRLF and indentLF +const ( + indentSpaces = "\r\n " + indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" +) + +// indentCRLF returns a CRLF newline followed by n copies of the first +// non-CRLF character in the source string. +func indentCRLF(n int, source string) string { + switch { + case n < 0: + return source[:2] + case n < len(source)-1: + return source[:n+2] + default: + return source + strings.Repeat(source[2:3], n-len(source)+2) + } +} + +// indentLF returns a LF newline followed by n copies of the first non-LF +// character in the source string. +func indentLF(n int, source string) string { + switch { + case n < 0: + return source[1:2] + case n < len(source)-1: + return source[1 : n+2] + default: + return source[1:] + strings.Repeat(source[2:3], n-len(source)+2) + } +} + +// nextIndex returns the index of the next occurrence of sep in s, +// starting from offset. It returns -1 if the sep string is not found. +func nextIndex(s, sep string, offset int) int { + switch i := strings.Index(s[offset:], sep); i { + case -1: + return -1 + default: + return offset + i + } +} + +// isInteger returns true if the string s contains an integer. +func isInteger(s string) bool { + for i := 0; i < len(s); i++ { + if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') { + return false + } + } + return true +} + +type escapeMode byte + +const ( + escapeNormal escapeMode = iota + escapeCanonicalText + escapeCanonicalAttr +) + +// escapeString writes an escaped version of a string to the writer. +func escapeString(w *bufio.Writer, s string, m escapeMode) { + var esc []byte + last := 0 + for i := 0; i < len(s); { + r, width := utf8.DecodeRuneInString(s[i:]) + i += width + switch r { + case '&': + esc = []byte("&") + case '<': + esc = []byte("<") + case '>': + if m == escapeCanonicalAttr { + continue + } + esc = []byte(">") + case '\'': + if m != escapeNormal { + continue + } + esc = []byte("'") + case '"': + if m == escapeCanonicalText { + continue + } + esc = []byte(""") + case '\t': + if m != escapeCanonicalAttr { + continue + } + esc = []byte(" ") + case '\n': + if m != escapeCanonicalAttr { + continue + } + esc = []byte(" ") + case '\r': + if m == escapeNormal { + continue + } + esc = []byte(" ") + default: + if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { + esc = []byte("\uFFFD") + break + } + continue + } + w.WriteString(s[last : i-width]) + w.Write(esc) + last = i + } + w.WriteString(s[last:]) +} + +func isInCharacterRange(r rune) bool { + return r == 0x09 || + r == 0x0A || + r == 0x0D || + r >= 0x20 && r <= 0xD7FF || + r >= 0xE000 && r <= 0xFFFD || + r >= 0x10000 && r <= 0x10FFFF +} diff --git a/vendor/github.com/beevik/etree/path.go b/vendor/github.com/beevik/etree/path.go new file mode 100644 index 0000000..82db0ac --- /dev/null +++ b/vendor/github.com/beevik/etree/path.go @@ -0,0 +1,582 @@ +// Copyright 2015-2019 Brett Vickers. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package etree + +import ( + "strconv" + "strings" +) + +/* +A Path is a string that represents a search path through an etree starting +from the document root or an arbitrary element. Paths are used with the +Element object's Find* methods to locate and return desired elements. + +A Path consists of a series of slash-separated "selectors", each of which may +be modified by one or more bracket-enclosed "filters". Selectors are used to +traverse the etree from element to element, while filters are used to narrow +the list of candidate elements at each node. + +Although etree Path strings are similar to XPath strings +(https://www.w3.org/TR/1999/REC-xpath-19991116/), they have a more limited set +of selectors and filtering options. + +The following selectors are supported by etree Path strings: + + . Select the current element. + .. Select the parent of the current element. + * Select all child elements of the current element. + / Select the root element when used at the start of a path. + // Select all descendants of the current element. + tag Select all child elements with a name matching the tag. + +The following basic filters are supported by etree Path strings: + + [@attrib] Keep elements with an attribute named attrib. + [@attrib='val'] Keep elements with an attribute named attrib and value matching val. + [tag] Keep elements with a child element named tag. + [tag='val'] Keep elements with a child element named tag and text matching val. + [n] Keep the n-th element, where n is a numeric index starting from 1. + +The following function filters are also supported: + + [text()] Keep elements with non-empty text. + [text()='val'] Keep elements whose text matches val. + [local-name()='val'] Keep elements whose un-prefixed tag matches val. + [name()='val'] Keep elements whose full tag exactly matches val. + [namespace-prefix()='val'] Keep elements whose namespace prefix matches val. + [namespace-uri()='val'] Keep elements whose namespace URI matches val. + +Here are some examples of Path strings: + +- Select the bookstore child element of the root element: + /bookstore + +- Beginning from the root element, select the title elements of all +descendant book elements having a 'category' attribute of 'WEB': + //book[@category='WEB']/title + +- Beginning from the current element, select the first descendant +book element with a title child element containing the text 'Great +Expectations': + .//book[title='Great Expectations'][1] + +- Beginning from the current element, select all child elements of +book elements with an attribute 'language' set to 'english': + ./book/*[@language='english'] + +- Beginning from the current element, select all child elements of +book elements containing the text 'special': + ./book/*[text()='special'] + +- Beginning from the current element, select all descendant book +elements whose title child element has a 'language' attribute of 'french': + .//book/title[@language='french']/.. + +- Beginning from the current element, select all book elements +belonging to the http://www.w3.org/TR/html4/ namespace: + .//book[namespace-uri()='http://www.w3.org/TR/html4/'] + +*/ +type Path struct { + segments []segment +} + +// ErrPath is returned by path functions when an invalid etree path is provided. +type ErrPath string + +// Error returns the string describing a path error. +func (err ErrPath) Error() string { + return "etree: " + string(err) +} + +// CompilePath creates an optimized version of an XPath-like string that +// can be used to query elements in an element tree. +func CompilePath(path string) (Path, error) { + var comp compiler + segments := comp.parsePath(path) + if comp.err != ErrPath("") { + return Path{nil}, comp.err + } + return Path{segments}, nil +} + +// MustCompilePath creates an optimized version of an XPath-like string that +// can be used to query elements in an element tree. Panics if an error +// occurs. Use this function to create Paths when you know the path is +// valid (i.e., if it's hard-coded). +func MustCompilePath(path string) Path { + p, err := CompilePath(path) + if err != nil { + panic(err) + } + return p +} + +// A segment is a portion of a path between "/" characters. +// It contains one selector and zero or more [filters]. +type segment struct { + sel selector + filters []filter +} + +func (seg *segment) apply(e *Element, p *pather) { + seg.sel.apply(e, p) + for _, f := range seg.filters { + f.apply(p) + } +} + +// A selector selects XML elements for consideration by the +// path traversal. +type selector interface { + apply(e *Element, p *pather) +} + +// A filter pares down a list of candidate XML elements based +// on a path filter in [brackets]. +type filter interface { + apply(p *pather) +} + +// A pather is helper object that traverses an element tree using +// a Path object. It collects and deduplicates all elements matching +// the path query. +type pather struct { + queue fifo + results []*Element + inResults map[*Element]bool + candidates []*Element + scratch []*Element // used by filters +} + +// A node represents an element and the remaining path segments that +// should be applied against it by the pather. +type node struct { + e *Element + segments []segment +} + +func newPather() *pather { + return &pather{ + results: make([]*Element, 0), + inResults: make(map[*Element]bool), + candidates: make([]*Element, 0), + scratch: make([]*Element, 0), + } +} + +// traverse follows the path from the element e, collecting +// and then returning all elements that match the path's selectors +// and filters. +func (p *pather) traverse(e *Element, path Path) []*Element { + for p.queue.add(node{e, path.segments}); p.queue.len() > 0; { + p.eval(p.queue.remove().(node)) + } + return p.results +} + +// eval evalutes the current path node by applying the remaining +// path's selector rules against the node's element. +func (p *pather) eval(n node) { + p.candidates = p.candidates[0:0] + seg, remain := n.segments[0], n.segments[1:] + seg.apply(n.e, p) + + if len(remain) == 0 { + for _, c := range p.candidates { + if in := p.inResults[c]; !in { + p.inResults[c] = true + p.results = append(p.results, c) + } + } + } else { + for _, c := range p.candidates { + p.queue.add(node{c, remain}) + } + } +} + +// A compiler generates a compiled path from a path string. +type compiler struct { + err ErrPath +} + +// parsePath parses an XPath-like string describing a path +// through an element tree and returns a slice of segment +// descriptors. +func (c *compiler) parsePath(path string) []segment { + // If path ends with //, fix it + if strings.HasSuffix(path, "//") { + path = path + "*" + } + + var segments []segment + + // Check for an absolute path + if strings.HasPrefix(path, "/") { + segments = append(segments, segment{new(selectRoot), []filter{}}) + path = path[1:] + } + + // Split path into segments + for _, s := range splitPath(path) { + segments = append(segments, c.parseSegment(s)) + if c.err != ErrPath("") { + break + } + } + return segments +} + +func splitPath(path string) []string { + pieces := make([]string, 0) + start := 0 + inquote := false + for i := 0; i+1 <= len(path); i++ { + if path[i] == '\'' { + inquote = !inquote + } else if path[i] == '/' && !inquote { + pieces = append(pieces, path[start:i]) + start = i + 1 + } + } + return append(pieces, path[start:]) +} + +// parseSegment parses a path segment between / characters. +func (c *compiler) parseSegment(path string) segment { + pieces := strings.Split(path, "[") + seg := segment{ + sel: c.parseSelector(pieces[0]), + filters: []filter{}, + } + for i := 1; i < len(pieces); i++ { + fpath := pieces[i] + if fpath[len(fpath)-1] != ']' { + c.err = ErrPath("path has invalid filter [brackets].") + break + } + seg.filters = append(seg.filters, c.parseFilter(fpath[:len(fpath)-1])) + } + return seg +} + +// parseSelector parses a selector at the start of a path segment. +func (c *compiler) parseSelector(path string) selector { + switch path { + case ".": + return new(selectSelf) + case "..": + return new(selectParent) + case "*": + return new(selectChildren) + case "": + return new(selectDescendants) + default: + return newSelectChildrenByTag(path) + } +} + +var fnTable = map[string]struct { + hasFn func(e *Element) bool + getValFn func(e *Element) string +}{ + "local-name": {nil, (*Element).name}, + "name": {nil, (*Element).FullTag}, + "namespace-prefix": {nil, (*Element).namespacePrefix}, + "namespace-uri": {nil, (*Element).NamespaceURI}, + "text": {(*Element).hasText, (*Element).Text}, +} + +// parseFilter parses a path filter contained within [brackets]. +func (c *compiler) parseFilter(path string) filter { + if len(path) == 0 { + c.err = ErrPath("path contains an empty filter expression.") + return nil + } + + // Filter contains [@attr='val'], [fn()='val'], or [tag='val']? + eqindex := strings.Index(path, "='") + if eqindex >= 0 { + rindex := nextIndex(path, "'", eqindex+2) + if rindex != len(path)-1 { + c.err = ErrPath("path has mismatched filter quotes.") + return nil + } + + key := path[:eqindex] + value := path[eqindex+2 : rindex] + + switch { + case key[0] == '@': + return newFilterAttrVal(key[1:], value) + case strings.HasSuffix(key, "()"): + fn := key[:len(key)-2] + if t, ok := fnTable[fn]; ok && t.getValFn != nil { + return newFilterFuncVal(t.getValFn, value) + } + c.err = ErrPath("path has unknown function " + fn) + return nil + default: + return newFilterChildText(key, value) + } + } + + // Filter contains [@attr], [N], [tag] or [fn()] + switch { + case path[0] == '@': + return newFilterAttr(path[1:]) + case strings.HasSuffix(path, "()"): + fn := path[:len(path)-2] + if t, ok := fnTable[fn]; ok && t.hasFn != nil { + return newFilterFunc(t.hasFn) + } + c.err = ErrPath("path has unknown function " + fn) + return nil + case isInteger(path): + pos, _ := strconv.Atoi(path) + switch { + case pos > 0: + return newFilterPos(pos - 1) + default: + return newFilterPos(pos) + } + default: + return newFilterChild(path) + } +} + +// selectSelf selects the current element into the candidate list. +type selectSelf struct{} + +func (s *selectSelf) apply(e *Element, p *pather) { + p.candidates = append(p.candidates, e) +} + +// selectRoot selects the element's root node. +type selectRoot struct{} + +func (s *selectRoot) apply(e *Element, p *pather) { + root := e + for root.parent != nil { + root = root.parent + } + p.candidates = append(p.candidates, root) +} + +// selectParent selects the element's parent into the candidate list. +type selectParent struct{} + +func (s *selectParent) apply(e *Element, p *pather) { + if e.parent != nil { + p.candidates = append(p.candidates, e.parent) + } +} + +// selectChildren selects the element's child elements into the +// candidate list. +type selectChildren struct{} + +func (s *selectChildren) apply(e *Element, p *pather) { + for _, c := range e.Child { + if c, ok := c.(*Element); ok { + p.candidates = append(p.candidates, c) + } + } +} + +// selectDescendants selects all descendant child elements +// of the element into the candidate list. +type selectDescendants struct{} + +func (s *selectDescendants) apply(e *Element, p *pather) { + var queue fifo + for queue.add(e); queue.len() > 0; { + e := queue.remove().(*Element) + p.candidates = append(p.candidates, e) + for _, c := range e.Child { + if c, ok := c.(*Element); ok { + queue.add(c) + } + } + } +} + +// selectChildrenByTag selects into the candidate list all child +// elements of the element having the specified tag. +type selectChildrenByTag struct { + space, tag string +} + +func newSelectChildrenByTag(path string) *selectChildrenByTag { + s, l := spaceDecompose(path) + return &selectChildrenByTag{s, l} +} + +func (s *selectChildrenByTag) apply(e *Element, p *pather) { + for _, c := range e.Child { + if c, ok := c.(*Element); ok && spaceMatch(s.space, c.Space) && s.tag == c.Tag { + p.candidates = append(p.candidates, c) + } + } +} + +// filterPos filters the candidate list, keeping only the +// candidate at the specified index. +type filterPos struct { + index int +} + +func newFilterPos(pos int) *filterPos { + return &filterPos{pos} +} + +func (f *filterPos) apply(p *pather) { + if f.index >= 0 { + if f.index < len(p.candidates) { + p.scratch = append(p.scratch, p.candidates[f.index]) + } + } else { + if -f.index <= len(p.candidates) { + p.scratch = append(p.scratch, p.candidates[len(p.candidates)+f.index]) + } + } + p.candidates, p.scratch = p.scratch, p.candidates[0:0] +} + +// filterAttr filters the candidate list for elements having +// the specified attribute. +type filterAttr struct { + space, key string +} + +func newFilterAttr(str string) *filterAttr { + s, l := spaceDecompose(str) + return &filterAttr{s, l} +} + +func (f *filterAttr) apply(p *pather) { + for _, c := range p.candidates { + for _, a := range c.Attr { + if spaceMatch(f.space, a.Space) && f.key == a.Key { + p.scratch = append(p.scratch, c) + break + } + } + } + p.candidates, p.scratch = p.scratch, p.candidates[0:0] +} + +// filterAttrVal filters the candidate list for elements having +// the specified attribute with the specified value. +type filterAttrVal struct { + space, key, val string +} + +func newFilterAttrVal(str, value string) *filterAttrVal { + s, l := spaceDecompose(str) + return &filterAttrVal{s, l, value} +} + +func (f *filterAttrVal) apply(p *pather) { + for _, c := range p.candidates { + for _, a := range c.Attr { + if spaceMatch(f.space, a.Space) && f.key == a.Key && f.val == a.Value { + p.scratch = append(p.scratch, c) + break + } + } + } + p.candidates, p.scratch = p.scratch, p.candidates[0:0] +} + +// filterFunc filters the candidate list for elements satisfying a custom +// boolean function. +type filterFunc struct { + fn func(e *Element) bool +} + +func newFilterFunc(fn func(e *Element) bool) *filterFunc { + return &filterFunc{fn} +} + +func (f *filterFunc) apply(p *pather) { + for _, c := range p.candidates { + if f.fn(c) { + p.scratch = append(p.scratch, c) + } + } + p.candidates, p.scratch = p.scratch, p.candidates[0:0] +} + +// filterFuncVal filters the candidate list for elements containing a value +// matching the result of a custom function. +type filterFuncVal struct { + fn func(e *Element) string + val string +} + +func newFilterFuncVal(fn func(e *Element) string, value string) *filterFuncVal { + return &filterFuncVal{fn, value} +} + +func (f *filterFuncVal) apply(p *pather) { + for _, c := range p.candidates { + if f.fn(c) == f.val { + p.scratch = append(p.scratch, c) + } + } + p.candidates, p.scratch = p.scratch, p.candidates[0:0] +} + +// filterChild filters the candidate list for elements having +// a child element with the specified tag. +type filterChild struct { + space, tag string +} + +func newFilterChild(str string) *filterChild { + s, l := spaceDecompose(str) + return &filterChild{s, l} +} + +func (f *filterChild) apply(p *pather) { + for _, c := range p.candidates { + for _, cc := range c.Child { + if cc, ok := cc.(*Element); ok && + spaceMatch(f.space, cc.Space) && + f.tag == cc.Tag { + p.scratch = append(p.scratch, c) + } + } + } + p.candidates, p.scratch = p.scratch, p.candidates[0:0] +} + +// filterChildText filters the candidate list for elements having +// a child element with the specified tag and text. +type filterChildText struct { + space, tag, text string +} + +func newFilterChildText(str, text string) *filterChildText { + s, l := spaceDecompose(str) + return &filterChildText{s, l, text} +} + +func (f *filterChildText) apply(p *pather) { + for _, c := range p.candidates { + for _, cc := range c.Child { + if cc, ok := cc.(*Element); ok && + spaceMatch(f.space, cc.Space) && + f.tag == cc.Tag && + f.text == cc.Text() { + p.scratch = append(p.scratch, c) + } + } + } + p.candidates, p.scratch = p.scratch, p.candidates[0:0] +} diff --git a/vendor/modules.txt b/vendor/modules.txt index aa316f7..73aabb7 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,3 +1,6 @@ +# github.com/beevik/etree v1.1.0 +## explicit +github.com/beevik/etree # github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f github.com/xeipuuv/gojsonpointer # github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415