forgejo/vendor/gopkg.in/yaml.v2/decode.go

816 lines
20 KiB
Go
Raw Normal View History

2016-12-29 10:03:40 +09:00
package yaml
import (
"encoding"
"encoding/base64"
"fmt"
"io"
2016-12-29 10:03:40 +09:00
"math"
"reflect"
"strconv"
"time"
)
const (
documentNode = 1 << iota
mappingNode
sequenceNode
scalarNode
aliasNode
)
type node struct {
kind int
line, column int
tag string
// For an alias node, alias holds the resolved alias.
alias *node
value string
implicit bool
children []*node
anchors map[string]*node
2016-12-29 10:03:40 +09:00
}
// ----------------------------------------------------------------------------
// Parser, produces a node tree out of a libyaml event stream.
type parser struct {
parser yaml_parser_t
event yaml_event_t
doc *node
doneInit bool
2016-12-29 10:03:40 +09:00
}
func newParser(b []byte) *parser {
p := parser{}
if !yaml_parser_initialize(&p.parser) {
panic("failed to initialize YAML emitter")
}
if len(b) == 0 {
b = []byte{'\n'}
}
yaml_parser_set_input_string(&p.parser, b)
return &p
}
2016-12-29 10:03:40 +09:00
func newParserFromReader(r io.Reader) *parser {
p := parser{}
if !yaml_parser_initialize(&p.parser) {
panic("failed to initialize YAML emitter")
2016-12-29 10:03:40 +09:00
}
yaml_parser_set_input_reader(&p.parser, r)
2016-12-29 10:03:40 +09:00
return &p
}
func (p *parser) init() {
if p.doneInit {
return
}
p.expect(yaml_STREAM_START_EVENT)
p.doneInit = true
}
2016-12-29 10:03:40 +09:00
func (p *parser) destroy() {
if p.event.typ != yaml_NO_EVENT {
yaml_event_delete(&p.event)
}
yaml_parser_delete(&p.parser)
}
// expect consumes an event from the event stream and
// checks that it's of the expected type.
func (p *parser) expect(e yaml_event_type_t) {
if p.event.typ == yaml_NO_EVENT {
if !yaml_parser_parse(&p.parser, &p.event) {
p.fail()
2016-12-29 10:03:40 +09:00
}
}
if p.event.typ == yaml_STREAM_END_EVENT {
failf("attempted to go past the end of stream; corrupted value?")
}
if p.event.typ != e {
p.parser.problem = fmt.Sprintf("expected %s event but got %s", e, p.event.typ)
p.fail()
}
yaml_event_delete(&p.event)
p.event.typ = yaml_NO_EVENT
}
// peek peeks at the next event in the event stream,
// puts the results into p.event and returns the event type.
func (p *parser) peek() yaml_event_type_t {
if p.event.typ != yaml_NO_EVENT {
return p.event.typ
2016-12-29 10:03:40 +09:00
}
if !yaml_parser_parse(&p.parser, &p.event) {
p.fail()
}
return p.event.typ
2016-12-29 10:03:40 +09:00
}
func (p *parser) fail() {
var where string
var line int
if p.parser.problem_mark.line != 0 {
line = p.parser.problem_mark.line
// Scanner errors don't iterate line before returning error
if p.parser.error == yaml_SCANNER_ERROR {
line++
}
2016-12-29 10:03:40 +09:00
} else if p.parser.context_mark.line != 0 {
line = p.parser.context_mark.line
}
if line != 0 {
where = "line " + strconv.Itoa(line) + ": "
}
var msg string
if len(p.parser.problem) > 0 {
msg = p.parser.problem
} else {
msg = "unknown problem parsing YAML content"
}
failf("%s%s", where, msg)
}
func (p *parser) anchor(n *node, anchor []byte) {
if anchor != nil {
p.doc.anchors[string(anchor)] = n
}
}
func (p *parser) parse() *node {
p.init()
switch p.peek() {
2016-12-29 10:03:40 +09:00
case yaml_SCALAR_EVENT:
return p.scalar()
case yaml_ALIAS_EVENT:
return p.alias()
case yaml_MAPPING_START_EVENT:
return p.mapping()
case yaml_SEQUENCE_START_EVENT:
return p.sequence()
case yaml_DOCUMENT_START_EVENT:
return p.document()
case yaml_STREAM_END_EVENT:
// Happens when attempting to decode an empty buffer.
return nil
default:
panic("attempted to parse unknown event: " + p.event.typ.String())
2016-12-29 10:03:40 +09:00
}
}
func (p *parser) node(kind int) *node {
return &node{
kind: kind,
line: p.event.start_mark.line,
column: p.event.start_mark.column,
}
}
func (p *parser) document() *node {
n := p.node(documentNode)
n.anchors = make(map[string]*node)
p.doc = n
p.expect(yaml_DOCUMENT_START_EVENT)
2016-12-29 10:03:40 +09:00
n.children = append(n.children, p.parse())
p.expect(yaml_DOCUMENT_END_EVENT)
2016-12-29 10:03:40 +09:00
return n
}
func (p *parser) alias() *node {
n := p.node(aliasNode)
n.value = string(p.event.anchor)
n.alias = p.doc.anchors[n.value]
if n.alias == nil {
failf("unknown anchor '%s' referenced", n.value)
}
p.expect(yaml_ALIAS_EVENT)
2016-12-29 10:03:40 +09:00
return n
}
func (p *parser) scalar() *node {
n := p.node(scalarNode)
n.value = string(p.event.value)
n.tag = string(p.event.tag)
n.implicit = p.event.implicit
p.anchor(n, p.event.anchor)
p.expect(yaml_SCALAR_EVENT)
2016-12-29 10:03:40 +09:00
return n
}
func (p *parser) sequence() *node {
n := p.node(sequenceNode)
p.anchor(n, p.event.anchor)
p.expect(yaml_SEQUENCE_START_EVENT)
for p.peek() != yaml_SEQUENCE_END_EVENT {
2016-12-29 10:03:40 +09:00
n.children = append(n.children, p.parse())
}
p.expect(yaml_SEQUENCE_END_EVENT)
2016-12-29 10:03:40 +09:00
return n
}
func (p *parser) mapping() *node {
n := p.node(mappingNode)
p.anchor(n, p.event.anchor)
p.expect(yaml_MAPPING_START_EVENT)
for p.peek() != yaml_MAPPING_END_EVENT {
2016-12-29 10:03:40 +09:00
n.children = append(n.children, p.parse(), p.parse())
}
p.expect(yaml_MAPPING_END_EVENT)
2016-12-29 10:03:40 +09:00
return n
}
// ----------------------------------------------------------------------------
// Decoder, unmarshals a node into a provided value.
type decoder struct {
doc *node
aliases map[*node]bool
2016-12-29 10:03:40 +09:00
mapType reflect.Type
terrors []string
strict bool
decodeCount int
aliasCount int
aliasDepth int
2016-12-29 10:03:40 +09:00
}
var (
mapItemType = reflect.TypeOf(MapItem{})
durationType = reflect.TypeOf(time.Duration(0))
defaultMapType = reflect.TypeOf(map[interface{}]interface{}{})
ifaceType = defaultMapType.Elem()
timeType = reflect.TypeOf(time.Time{})
ptrTimeType = reflect.TypeOf(&time.Time{})
2016-12-29 10:03:40 +09:00
)
func newDecoder(strict bool) *decoder {
d := &decoder{mapType: defaultMapType, strict: strict}
d.aliases = make(map[*node]bool)
2016-12-29 10:03:40 +09:00
return d
}
func (d *decoder) terror(n *node, tag string, out reflect.Value) {
if n.tag != "" {
tag = n.tag
}
value := n.value
if tag != yaml_SEQ_TAG && tag != yaml_MAP_TAG {
if len(value) > 10 {
value = " `" + value[:7] + "...`"
} else {
value = " `" + value + "`"
}
}
d.terrors = append(d.terrors, fmt.Sprintf("line %d: cannot unmarshal %s%s into %s", n.line+1, shortTag(tag), value, out.Type()))
}
func (d *decoder) callUnmarshaler(n *node, u Unmarshaler) (good bool) {
terrlen := len(d.terrors)
err := u.UnmarshalYAML(func(v interface{}) (err error) {
defer handleErr(&err)
d.unmarshal(n, reflect.ValueOf(v))
if len(d.terrors) > terrlen {
issues := d.terrors[terrlen:]
d.terrors = d.terrors[:terrlen]
return &TypeError{issues}
}
return nil
})
if e, ok := err.(*TypeError); ok {
d.terrors = append(d.terrors, e.Errors...)
return false
}
if err != nil {
fail(err)
}
return true
}
// d.prepare initializes and dereferences pointers and calls UnmarshalYAML
// if a value is found to implement it.
// It returns the initialized and dereferenced out value, whether
// unmarshalling was already done by UnmarshalYAML, and if so whether
// its types unmarshalled appropriately.
//
// If n holds a null value, prepare returns before doing anything.
func (d *decoder) prepare(n *node, out reflect.Value) (newout reflect.Value, unmarshaled, good bool) {
if n.tag == yaml_NULL_TAG || n.kind == scalarNode && n.tag == "" && (n.value == "null" || n.value == "~" || n.value == "" && n.implicit) {
2016-12-29 10:03:40 +09:00
return out, false, false
}
again := true
for again {
again = false
if out.Kind() == reflect.Ptr {
if out.IsNil() {
out.Set(reflect.New(out.Type().Elem()))
}
out = out.Elem()
again = true
}
if out.CanAddr() {
if u, ok := out.Addr().Interface().(Unmarshaler); ok {
good = d.callUnmarshaler(n, u)
return out, true, good
}
}
}
return out, false, false
}
const (
// 400,000 decode operations is ~500kb of dense object declarations, or
// ~5kb of dense object declarations with 10000% alias expansion
alias_ratio_range_low = 400000
// 4,000,000 decode operations is ~5MB of dense object declarations, or
// ~4.5MB of dense object declarations with 10% alias expansion
alias_ratio_range_high = 4000000
// alias_ratio_range is the range over which we scale allowed alias ratios
alias_ratio_range = float64(alias_ratio_range_high - alias_ratio_range_low)
)
func allowedAliasRatio(decodeCount int) float64 {
switch {
case decodeCount <= alias_ratio_range_low:
// allow 99% to come from alias expansion for small-to-medium documents
return 0.99
case decodeCount >= alias_ratio_range_high:
// allow 10% to come from alias expansion for very large documents
return 0.10
default:
// scale smoothly from 99% down to 10% over the range.
// this maps to 396,000 - 400,000 allowed alias-driven decodes over the range.
// 400,000 decode operations is ~100MB of allocations in worst-case scenarios (single-item maps).
return 0.99 - 0.89*(float64(decodeCount-alias_ratio_range_low)/alias_ratio_range)
}
}
2016-12-29 10:03:40 +09:00
func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) {
d.decodeCount++
if d.aliasDepth > 0 {
d.aliasCount++
}
if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > allowedAliasRatio(d.decodeCount) {
failf("document contains excessive aliasing")
}
2016-12-29 10:03:40 +09:00
switch n.kind {
case documentNode:
return d.document(n, out)
case aliasNode:
return d.alias(n, out)
}
out, unmarshaled, good := d.prepare(n, out)
if unmarshaled {
return good
}
switch n.kind {
case scalarNode:
good = d.scalar(n, out)
case mappingNode:
good = d.mapping(n, out)
case sequenceNode:
good = d.sequence(n, out)
default:
panic("internal error: unknown node kind: " + strconv.Itoa(n.kind))
}
return good
}
func (d *decoder) document(n *node, out reflect.Value) (good bool) {
if len(n.children) == 1 {
d.doc = n
d.unmarshal(n.children[0], out)
return true
}
return false
}
func (d *decoder) alias(n *node, out reflect.Value) (good bool) {
if d.aliases[n] {
// TODO this could actually be allowed in some circumstances.
2016-12-29 10:03:40 +09:00
failf("anchor '%s' value contains itself", n.value)
}
d.aliases[n] = true
d.aliasDepth++
good = d.unmarshal(n.alias, out)
d.aliasDepth--
delete(d.aliases, n)
2016-12-29 10:03:40 +09:00
return good
}
var zeroValue reflect.Value
func resetMap(out reflect.Value) {
for _, k := range out.MapKeys() {
out.SetMapIndex(k, zeroValue)
}
}
func (d *decoder) scalar(n *node, out reflect.Value) bool {
2016-12-29 10:03:40 +09:00
var tag string
var resolved interface{}
if n.tag == "" && !n.implicit {
tag = yaml_STR_TAG
resolved = n.value
} else {
tag, resolved = resolve(n.tag, n.value)
if tag == yaml_BINARY_TAG {
data, err := base64.StdEncoding.DecodeString(resolved.(string))
if err != nil {
failf("!!binary value contains invalid base64 data")
}
resolved = string(data)
}
}
if resolved == nil {
if out.Kind() == reflect.Map && !out.CanAddr() {
resetMap(out)
} else {
out.Set(reflect.Zero(out.Type()))
}
return true
}
if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() {
// We've resolved to exactly the type we want, so use that.
out.Set(resolvedv)
return true
}
// Perhaps we can use the value as a TextUnmarshaler to
// set its value.
if out.CanAddr() {
u, ok := out.Addr().Interface().(encoding.TextUnmarshaler)
if ok {
var text []byte
if tag == yaml_BINARY_TAG {
text = []byte(resolved.(string))
} else {
// We let any value be unmarshaled into TextUnmarshaler.
// That might be more lax than we'd like, but the
// TextUnmarshaler itself should bowl out any dubious values.
text = []byte(n.value)
}
err := u.UnmarshalText(text)
2016-12-29 10:03:40 +09:00
if err != nil {
fail(err)
}
return true
}
}
switch out.Kind() {
case reflect.String:
if tag == yaml_BINARY_TAG {
out.SetString(resolved.(string))
return true
}
if resolved != nil {
2016-12-29 10:03:40 +09:00
out.SetString(n.value)
return true
2016-12-29 10:03:40 +09:00
}
case reflect.Interface:
if resolved == nil {
out.Set(reflect.Zero(out.Type()))
} else if tag == yaml_TIMESTAMP_TAG {
// It looks like a timestamp but for backward compatibility
// reasons we set it as a string, so that code that unmarshals
// timestamp-like values into interface{} will continue to
// see a string and not a time.Time.
// TODO(v3) Drop this.
out.Set(reflect.ValueOf(n.value))
2016-12-29 10:03:40 +09:00
} else {
out.Set(reflect.ValueOf(resolved))
}
return true
2016-12-29 10:03:40 +09:00
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
switch resolved := resolved.(type) {
case int:
if !out.OverflowInt(int64(resolved)) {
out.SetInt(int64(resolved))
return true
2016-12-29 10:03:40 +09:00
}
case int64:
if !out.OverflowInt(resolved) {
out.SetInt(resolved)
return true
2016-12-29 10:03:40 +09:00
}
case uint64:
if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) {
out.SetInt(int64(resolved))
return true
2016-12-29 10:03:40 +09:00
}
case float64:
if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) {
out.SetInt(int64(resolved))
return true
2016-12-29 10:03:40 +09:00
}
case string:
if out.Type() == durationType {
d, err := time.ParseDuration(resolved)
if err == nil {
out.SetInt(int64(d))
return true
2016-12-29 10:03:40 +09:00
}
}
}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
switch resolved := resolved.(type) {
case int:
if resolved >= 0 && !out.OverflowUint(uint64(resolved)) {
out.SetUint(uint64(resolved))
return true
2016-12-29 10:03:40 +09:00
}
case int64:
if resolved >= 0 && !out.OverflowUint(uint64(resolved)) {
out.SetUint(uint64(resolved))
return true
2016-12-29 10:03:40 +09:00
}
case uint64:
if !out.OverflowUint(uint64(resolved)) {
out.SetUint(uint64(resolved))
return true
2016-12-29 10:03:40 +09:00
}
case float64:
if resolved <= math.MaxUint64 && !out.OverflowUint(uint64(resolved)) {
out.SetUint(uint64(resolved))
return true
2016-12-29 10:03:40 +09:00
}
}
case reflect.Bool:
switch resolved := resolved.(type) {
case bool:
out.SetBool(resolved)
return true
2016-12-29 10:03:40 +09:00
}
case reflect.Float32, reflect.Float64:
switch resolved := resolved.(type) {
case int:
out.SetFloat(float64(resolved))
return true
2016-12-29 10:03:40 +09:00
case int64:
out.SetFloat(float64(resolved))
return true
2016-12-29 10:03:40 +09:00
case uint64:
out.SetFloat(float64(resolved))
return true
2016-12-29 10:03:40 +09:00
case float64:
out.SetFloat(resolved)
return true
}
case reflect.Struct:
if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() {
out.Set(resolvedv)
return true
2016-12-29 10:03:40 +09:00
}
case reflect.Ptr:
if out.Type().Elem() == reflect.TypeOf(resolved) {
// TODO DOes this make sense? When is out a Ptr except when decoding a nil value?
elem := reflect.New(out.Type().Elem())
elem.Elem().Set(reflect.ValueOf(resolved))
out.Set(elem)
return true
2016-12-29 10:03:40 +09:00
}
}
d.terror(n, tag, out)
return false
2016-12-29 10:03:40 +09:00
}
func settableValueOf(i interface{}) reflect.Value {
v := reflect.ValueOf(i)
sv := reflect.New(v.Type()).Elem()
sv.Set(v)
return sv
}
func (d *decoder) sequence(n *node, out reflect.Value) (good bool) {
l := len(n.children)
var iface reflect.Value
switch out.Kind() {
case reflect.Slice:
out.Set(reflect.MakeSlice(out.Type(), l, l))
case reflect.Array:
if l != out.Len() {
failf("invalid array: want %d elements but got %d", out.Len(), l)
}
2016-12-29 10:03:40 +09:00
case reflect.Interface:
// No type hints. Will have to use a generic sequence.
iface = out
out = settableValueOf(make([]interface{}, l))
default:
d.terror(n, yaml_SEQ_TAG, out)
return false
}
et := out.Type().Elem()
j := 0
for i := 0; i < l; i++ {
e := reflect.New(et).Elem()
if ok := d.unmarshal(n.children[i], e); ok {
out.Index(j).Set(e)
j++
}
}
if out.Kind() != reflect.Array {
out.Set(out.Slice(0, j))
}
2016-12-29 10:03:40 +09:00
if iface.IsValid() {
iface.Set(out)
}
return true
}
func (d *decoder) mapping(n *node, out reflect.Value) (good bool) {
switch out.Kind() {
case reflect.Struct:
return d.mappingStruct(n, out)
case reflect.Slice:
return d.mappingSlice(n, out)
case reflect.Map:
// okay
case reflect.Interface:
if d.mapType.Kind() == reflect.Map {
iface := out
out = reflect.MakeMap(d.mapType)
iface.Set(out)
} else {
slicev := reflect.New(d.mapType).Elem()
if !d.mappingSlice(n, slicev) {
return false
}
out.Set(slicev)
return true
}
default:
d.terror(n, yaml_MAP_TAG, out)
return false
}
outt := out.Type()
kt := outt.Key()
et := outt.Elem()
mapType := d.mapType
if outt.Key() == ifaceType && outt.Elem() == ifaceType {
d.mapType = outt
}
if out.IsNil() {
out.Set(reflect.MakeMap(outt))
}
l := len(n.children)
for i := 0; i < l; i += 2 {
if isMerge(n.children[i]) {
d.merge(n.children[i+1], out)
continue
}
k := reflect.New(kt).Elem()
if d.unmarshal(n.children[i], k) {
kkind := k.Kind()
if kkind == reflect.Interface {
kkind = k.Elem().Kind()
}
if kkind == reflect.Map || kkind == reflect.Slice {
failf("invalid map key: %#v", k.Interface())
}
e := reflect.New(et).Elem()
if d.unmarshal(n.children[i+1], e) {
d.setMapIndex(n.children[i+1], out, k, e)
2016-12-29 10:03:40 +09:00
}
}
}
d.mapType = mapType
return true
}
func (d *decoder) setMapIndex(n *node, out, k, v reflect.Value) {
if d.strict && out.MapIndex(k) != zeroValue {
d.terrors = append(d.terrors, fmt.Sprintf("line %d: key %#v already set in map", n.line+1, k.Interface()))
return
}
out.SetMapIndex(k, v)
}
2016-12-29 10:03:40 +09:00
func (d *decoder) mappingSlice(n *node, out reflect.Value) (good bool) {
outt := out.Type()
if outt.Elem() != mapItemType {
d.terror(n, yaml_MAP_TAG, out)
return false
}
mapType := d.mapType
d.mapType = outt
var slice []MapItem
var l = len(n.children)
for i := 0; i < l; i += 2 {
if isMerge(n.children[i]) {
d.merge(n.children[i+1], out)
continue
}
item := MapItem{}
k := reflect.ValueOf(&item.Key).Elem()
if d.unmarshal(n.children[i], k) {
v := reflect.ValueOf(&item.Value).Elem()
if d.unmarshal(n.children[i+1], v) {
slice = append(slice, item)
}
}
}
out.Set(reflect.ValueOf(slice))
d.mapType = mapType
return true
}
func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) {
sinfo, err := getStructInfo(out.Type())
if err != nil {
panic(err)
}
name := settableValueOf("")
l := len(n.children)
var inlineMap reflect.Value
var elemType reflect.Type
if sinfo.InlineMap != -1 {
inlineMap = out.Field(sinfo.InlineMap)
inlineMap.Set(reflect.New(inlineMap.Type()).Elem())
elemType = inlineMap.Type().Elem()
}
var doneFields []bool
if d.strict {
doneFields = make([]bool, len(sinfo.FieldsList))
}
2016-12-29 10:03:40 +09:00
for i := 0; i < l; i += 2 {
ni := n.children[i]
if isMerge(ni) {
d.merge(n.children[i+1], out)
continue
}
if !d.unmarshal(ni, name) {
continue
}
if info, ok := sinfo.FieldsMap[name.String()]; ok {
if d.strict {
if doneFields[info.Id] {
d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s already set in type %s", ni.line+1, name.String(), out.Type()))
continue
}
doneFields[info.Id] = true
}
2016-12-29 10:03:40 +09:00
var field reflect.Value
if info.Inline == nil {
field = out.Field(info.Num)
} else {
field = out.FieldByIndex(info.Inline)
}
d.unmarshal(n.children[i+1], field)
} else if sinfo.InlineMap != -1 {
if inlineMap.IsNil() {
inlineMap.Set(reflect.MakeMap(inlineMap.Type()))
}
value := reflect.New(elemType).Elem()
d.unmarshal(n.children[i+1], value)
d.setMapIndex(n.children[i+1], inlineMap, name, value)
} else if d.strict {
d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s not found in type %s", ni.line+1, name.String(), out.Type()))
2016-12-29 10:03:40 +09:00
}
}
return true
}
func failWantMap() {
failf("map merge requires map or sequence of maps as the value")
}
func (d *decoder) merge(n *node, out reflect.Value) {
switch n.kind {
case mappingNode:
d.unmarshal(n, out)
case aliasNode:
if n.alias != nil && n.alias.kind != mappingNode {
2016-12-29 10:03:40 +09:00
failWantMap()
}
d.unmarshal(n, out)
case sequenceNode:
// Step backwards as earlier nodes take precedence.
for i := len(n.children) - 1; i >= 0; i-- {
ni := n.children[i]
if ni.kind == aliasNode {
if ni.alias != nil && ni.alias.kind != mappingNode {
2016-12-29 10:03:40 +09:00
failWantMap()
}
} else if ni.kind != mappingNode {
failWantMap()
}
d.unmarshal(ni, out)
}
default:
failWantMap()
}
}
func isMerge(n *node) bool {
return n.kind == scalarNode && n.value == "<<" && (n.implicit == true || n.tag == yaml_MERGE_TAG)
}