Skip to content
This repository has been archived by the owner on Nov 1, 2022. It is now read-only.

Commit

Permalink
Harden yaml stream parsing of manifests files
Browse files Browse the repository at this point in the history
Flux choked end-of-document markers (`...`).

To avoid complicating the existing multidoc parser (stolen from kubernetes) I
abused go-yaml's Decoder to obtain the raw documents from the stream by
unmarshalling to an interface{} and marshalling again.
  • Loading branch information
Alfonso Acosta committed Apr 13, 2019
1 parent 2c0176c commit f97508e
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 52 deletions.
71 changes: 19 additions & 52 deletions cluster/kubernetes/resource/load.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
package resource

import (
"bufio"
"bytes"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"

"github.com/pkg/errors"
"gopkg.in/yaml.v2"
)

// Load takes paths to directories or files, and creates an object set
Expand Down Expand Up @@ -128,21 +129,24 @@ func looksLikeChart(dir string) bool {
// constructs an object set from the resources represented therein.
func ParseMultidoc(multidoc []byte, source string) (map[string]KubeManifest, error) {
objs := map[string]KubeManifest{}
chunks := bufio.NewScanner(bytes.NewReader(multidoc))
initialBuffer := make([]byte, 4096) // Matches startBufSize in bufio/scan.go
chunks.Buffer(initialBuffer, 1024*1024) // Allow growth to 1MB
chunks.Split(splitYAMLDocument)

decoder := yaml.NewDecoder(bytes.NewReader(multidoc))
var obj KubeManifest
var err error
for chunks.Scan() {
// It's not guaranteed that the return value of Bytes() will not be mutated later:
// https://golang.org/pkg/bufio/#Scanner.Bytes
// But we will be snaffling it away, so make a copy.
bytes := chunks.Bytes()
bytes2 := make([]byte, len(bytes), cap(bytes))
copy(bytes2, bytes)
if obj, err = unmarshalObject(source, bytes2); err != nil {
for {
// In order to use the decoder to extract raw documents
// from the stream, we decode generically and encode again
// The result it the raw document (pretty-printed and
// without comments though)
var val interface{}
if err := decoder.Decode(&val); err != nil {
break
}
bytes, err := yaml.Marshal(val)
if err != nil {
return nil, errors.Wrapf(err, "parsing YAML doc from %q", source)
}

if obj, err = unmarshalObject(source, bytes); err != nil {
return nil, errors.Wrapf(err, "parsing YAML doc from %q", source)
}
if obj == nil {
Expand All @@ -159,45 +163,8 @@ func ParseMultidoc(multidoc []byte, source string) (map[string]KubeManifest, err
}
}

if err := chunks.Err(); err != nil {
if err != io.EOF {
return objs, errors.Wrapf(err, "scanning multidoc from %q", source)
}
return objs, nil
}

// ---
// Taken directly from https://github.com/kubernetes/apimachinery/blob/master/pkg/util/yaml/decoder.go.

const yamlSeparator = "\n---"

// splitYAMLDocument is a bufio.SplitFunc for splitting YAML streams into individual documents.
func splitYAMLDocument(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
sep := len([]byte(yamlSeparator))
if i := bytes.Index(data, []byte(yamlSeparator)); i >= 0 {
// We have a potential document terminator
i += sep
after := data[i:]
if len(after) == 0 {
// we can't read any more characters
if atEOF {
return len(data), data[:len(data)-sep], nil
}
return 0, nil, nil
}
if j := bytes.IndexByte(after, '\n'); j >= 0 {
return i + j + 1, data[0 : i-sep], nil
}
return 0, nil, nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil
}
// Request more data.
return 0, nil, nil
}

// ---
21 changes: 21 additions & 0 deletions cluster/kubernetes/resource/load_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,27 @@ data:
}
}

func TestParseBoundaryMarkers(t *testing.T) {
doc := `---
kind: ConfigMap
metadata:
name: bigmap
---
...
---
...
---
...
---
...
`
buffer := bytes.NewBufferString(doc)

resources, err := ParseMultidoc(buffer.Bytes(), "test")
assert.NoError(t, err)
assert.Len(t, resources, 1)
}

func TestParseCronJob(t *testing.T) {
doc := `---
apiVersion: batch/v1beta1
Expand Down

0 comments on commit f97508e

Please sign in to comment.