From c5eff9405a3eabd72727d564f631299d796d8f9e Mon Sep 17 00:00:00 2001 From: Ben Luddy Date: Tue, 13 Feb 2024 16:49:20 -0500 Subject: [PATCH] Treat map keys matching the same struct field as duplicates. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This resolves the case where two map keys could match the same struct field without being treated as duplicate map keys. The DupMapKey documentation has been updated accordingly. A map key that matches a previously-matched field will no longer quietly fall back to a case-insensitive match and is now always treated as a duplicate key. Case-sensitive matching of map key to struct field name is now a map lookup to improve the quadratic worst-case: │ manykeys-before.txt │ manykeys-after.txt │ │ sec/op │ sec/op vs base │ UnmarshalMapToStruct/default_options/many_fields_one_key_per_field 24.137µ ± 4% 5.240µ ± 1% -78.29% (p=0.000 n=10) │ manykeys-before.txt │ manykeys-after.txt │ │ B/op │ B/op vs base │ UnmarshalMapToStruct/default_options/many_fields_one_key_per_field 112.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10) │ manykeys-before.txt │ manykeys-after.txt │ │ allocs/op │ allocs/op vs base │ UnmarshalMapToStruct/default_options/many_fields_one_key_per_field 1.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10) This fix cleared the way to optimize the way duplicate keys are tracked, especially for inputs that contain no unknown fields: │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ UnmarshalMapToStruct/default_options/all_known_fields 642.7n ± 3% 686.3n ± 2% +6.78% (p=0.000 n=10) UnmarshalMapToStruct/default_options/all_known_duplicate_fields 1611.5n ± 1% 479.3n ± 1% -70.26% (p=0.000 n=10) UnmarshalMapToStruct/default_options/all_unknown_fields 1.731µ ± 3% 1.161µ ± 1% -32.96% (p=0.000 n=10) UnmarshalMapToStruct/default_options/all_unknown_duplicate_fields 1.726µ ± 2% 1.056µ ± 1% -38.79% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown/all_known_fields 638.3n ± 1% 687.2n ± 2% +7.66% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown/all_known_duplicate_fields 476.1n ± 1% 477.6n ± 2% ~ (p=0.197 n=10) UnmarshalMapToStruct/reject_unknown/all_unknown_fields 448.1n ± 1% 384.8n ± 1% -14.12% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown/all_unknown_duplicate_fields 447.9n ± 0% 380.4n ± 1% -15.06% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_known_fields 1825.5n ± 0% 675.4n ± 2% -63.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_known_duplicate_fields 744.3n ± 0% 414.4n ± 2% -44.31% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_unknown_fields 2.885µ ± 1% 3.213µ ± 1% +11.35% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_unknown_duplicate_fields 844.8n ± 0% 703.9n ± 2% -16.68% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_fields 1828.5n ± 0% 684.4n ± 2% -62.57% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_duplicate_fields 744.3n ± 0% 407.3n ± 1% -45.27% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_fields 643.4n ± 0% 419.4n ± 2% -34.82% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_duplicate_fields 642.1n ± 0% 419.6n ± 2% -34.65% (p=0.000 n=10) geomean 936.7n 629.1n -32.84% │ before.txt │ after.txt │ │ B/op │ B/op vs base │ UnmarshalMapToStruct/default_options/all_known_fields 16.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/default_options/all_known_duplicate_fields 16.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/default_options/all_unknown_fields 16.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/default_options/all_unknown_duplicate_fields 16.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown/all_known_fields 16.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown/all_known_duplicate_fields 24.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown/all_unknown_fields 24.000 ± 0% 8.000 ± 0% -66.67% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown/all_unknown_duplicate_fields 24.000 ± 0% 8.000 ± 0% -66.67% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_known_fields 800.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_known_duplicate_fields 648.00 ± 0% 40.00 ± 0% -93.83% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_unknown_fields 800.0 ± 0% 1285.0 ± 0% +60.62% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_unknown_duplicate_fields 648.0 ± 0% 248.0 ± 0% -61.73% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_fields 800.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_duplicate_fields 648.00 ± 0% 40.00 ± 0% -93.83% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_fields 616.00 ± 0% 24.00 ± 0% -96.10% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_duplicate_fields 616.00 ± 0% 24.00 ± 0% -96.10% (p=0.000 n=10) geomean 113.6 ? ¹ ² ¹ summaries must be >0 to compute geomean ² ratios must be >0 to compute geomean │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ UnmarshalMapToStruct/default_options/all_known_fields 1.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/default_options/all_known_duplicate_fields 1.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/default_options/all_unknown_fields 1.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/default_options/all_unknown_duplicate_fields 1.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown/all_known_fields 1.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown/all_known_duplicate_fields 2.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown/all_unknown_fields 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown/all_unknown_duplicate_fields 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_known_fields 15.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_known_duplicate_fields 5.000 ± 0% 2.000 ± 0% -60.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_unknown_fields 15.00 ± 0% 17.00 ± 0% +13.33% (p=0.000 n=10) UnmarshalMapToStruct/reject_duplicate/all_unknown_duplicate_fields 5.000 ± 0% 5.000 ± 0% ~ (p=1.000 n=10) ¹ UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_fields 15.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_duplicate_fields 5.000 ± 0% 2.000 ± 0% -60.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_fields 4.000 ± 0% 2.000 ± 0% -50.00% (p=0.000 n=10) UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_duplicate_fields 4.000 ± 0% 2.000 ± 0% -50.00% (p=0.000 n=10) geomean 3.043 ? ² ³ ¹ all samples are equal ² summaries must be >0 to compute geomean ³ ratios must be >0 to compute geomean Signed-off-by: Ben Luddy --- cache.go | 28 ++++++++--- decode.go | 144 ++++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 130 insertions(+), 42 deletions(-) diff --git a/cache.go b/cache.go index 8a4a5c87..3d7f5bc0 100644 --- a/cache.go +++ b/cache.go @@ -6,6 +6,7 @@ package cbor import ( "bytes" "errors" + "fmt" "reflect" "sort" "strconv" @@ -84,9 +85,10 @@ func newTypeInfo(t reflect.Type) *typeInfo { } type decodingStructType struct { - fields fields - err error - toArray bool + fields fields + fieldIndicesByName map[string]int + err error + toArray bool } func getDecodingStructType(t reflect.Type) *decodingStructType { @@ -98,12 +100,12 @@ func getDecodingStructType(t reflect.Type) *decodingStructType { toArray := hasToArrayOption(structOptions) - var err error + var errs []error for i := 0; i < len(flds); i++ { if flds[i].keyAsInt { nameAsInt, numErr := strconv.Atoi(flds[i].name) if numErr != nil { - err = errors.New("cbor: failed to parse field name \"" + flds[i].name + "\" to int (" + numErr.Error() + ")") + errs = append(errs, errors.New("cbor: failed to parse field name \""+flds[i].name+"\" to int ("+numErr.Error()+")")) break } flds[i].nameAsInt = int64(nameAsInt) @@ -112,7 +114,21 @@ func getDecodingStructType(t reflect.Type) *decodingStructType { flds[i].typInfo = getTypeInfo(flds[i].typ) } - structType := &decodingStructType{fields: flds, err: err, toArray: toArray} + fieldIndicesByName := make(map[string]int, len(flds)) + for i, fld := range flds { + if _, ok := fieldIndicesByName[fld.name]; ok { + errs = append(errs, fmt.Errorf("cbor: two or more fields of %v have the same name %q", t, fld.name)) + continue + } + fieldIndicesByName[fld.name] = i + } + + structType := &decodingStructType{ + fields: flds, + fieldIndicesByName: fieldIndicesByName, + err: errors.Join(errs...), + toArray: toArray, + } decodingStructTypeCache.Store(t, structType) return structType } diff --git a/decode.go b/decode.go index 0b44124d..e1e07a59 100644 --- a/decode.go +++ b/decode.go @@ -207,7 +207,12 @@ func (e *UnknownFieldError) Error() string { return fmt.Sprintf("cbor: found unknown field at map element index %d", e.Index) } -// DupMapKeyMode specifies how to enforce duplicate map key. +// DupMapKeyMode specifies how to enforce duplicate map key. Two map keys are considered duplicates if: +// 1. When decoding into a struct, both keys match the same struct field. The keys are also +// considered duplicates if neither matches any field and decoding to interface{} would produce +// equal (==) values for both keys. +// 2. When decoding into a map, both keys are equal (==) when decoded into values of the +// destination map's key type. type DupMapKeyMode int const ( @@ -1893,20 +1898,32 @@ func (d *decoder) parseMapToStruct(v reflect.Value, tInfo *typeInfo) error { //n count := int(val) // Keeps track of matched struct fields - foundFldIdx := make([]bool, len(structType.fields)) + var foundFldIdx []bool + { + const maxStackFields = 128 + if nfields := len(structType.fields); nfields <= maxStackFields { + // For structs with typical field counts, expect that this can be + // stack-allocated. + var a [maxStackFields]bool + foundFldIdx = a[:nfields] + } else { + foundFldIdx = make([]bool, len(structType.fields)) + } + } // Keeps track of CBOR map keys to detect duplicate map key keyCount := 0 var mapKeys map[interface{}]struct{} - if d.dm.dupMapKey == DupMapKeyEnforcedAPF { - mapKeys = make(map[interface{}]struct{}, len(structType.fields)) - } errOnUnknownField := (d.dm.extraReturnErrors & ExtraDecErrorUnknownField) > 0 +MapEntryLoop: for j := 0; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ { var f *field - var k interface{} // Used by duplicate map key detection + + // If duplicate field detection is enabled and the key at index j did not match any + // field, k will hold the map key. + var k interface{} t := d.nextCBORType() if t == cborTypeTextString || (t == cborTypeByteString && d.dm.fieldNameByteString == FieldNameByteStringAllowed) { @@ -1924,30 +1941,61 @@ func (d *decoder) parseMapToStruct(v reflect.Value, tInfo *typeInfo) error { //n keyBytes, _ = d.parseByteString() } - keyLen := len(keyBytes) - // Find field with exact match - for i := 0; i < len(structType.fields); i++ { + // Check for exact match on field name. + if i, ok := structType.fieldIndicesByName[string(keyBytes)]; ok { fld := structType.fields[i] - if !foundFldIdx[i] && len(fld.name) == keyLen && fld.name == string(keyBytes) { + + if !foundFldIdx[i] { f = fld foundFldIdx[i] = true - break + } else if d.dm.dupMapKey == DupMapKeyEnforcedAPF { + err = &DupMapKeyError{fld.name, j} + d.skip() // skip value + j++ + // skip the rest of the map + for ; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ { + d.skip() + d.skip() + } + return err + } else { + // discard repeated match + d.skip() + continue MapEntryLoop } } + // Find field with case-insensitive match if f == nil && d.dm.fieldNameMatching == FieldNameMatchingPreferCaseSensitive { + keyLen := len(keyBytes) keyString := string(keyBytes) for i := 0; i < len(structType.fields); i++ { fld := structType.fields[i] - if !foundFldIdx[i] && len(fld.name) == keyLen && strings.EqualFold(fld.name, keyString) { - f = fld - foundFldIdx[i] = true + if len(fld.name) == keyLen && strings.EqualFold(fld.name, keyString) { + if !foundFldIdx[i] { + f = fld + foundFldIdx[i] = true + } else if d.dm.dupMapKey == DupMapKeyEnforcedAPF { + err = &DupMapKeyError{keyString, j} + d.skip() // skip value + j++ + // skip the rest of the map + for ; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ { + d.skip() + d.skip() + } + return err + } else { + // discard repeated match + d.skip() + continue MapEntryLoop + } break } } } - if d.dm.dupMapKey == DupMapKeyEnforcedAPF { + if d.dm.dupMapKey == DupMapKeyEnforcedAPF && f == nil { k = string(keyBytes) } } else if t <= cborTypeNegativeInt { // uint/int @@ -1975,14 +2023,30 @@ func (d *decoder) parseMapToStruct(v reflect.Value, tInfo *typeInfo) error { //n // Find field for i := 0; i < len(structType.fields); i++ { fld := structType.fields[i] - if !foundFldIdx[i] && fld.keyAsInt && fld.nameAsInt == nameAsInt { - f = fld - foundFldIdx[i] = true + if fld.keyAsInt && fld.nameAsInt == nameAsInt { + if !foundFldIdx[i] { + f = fld + foundFldIdx[i] = true + } else if d.dm.dupMapKey == DupMapKeyEnforcedAPF { + err = &DupMapKeyError{nameAsInt, j} + d.skip() // skip value + j++ + // skip the rest of the map + for ; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ { + d.skip() + d.skip() + } + return err + } else { + // discard repeated match + d.skip() + continue MapEntryLoop + } break } } - if d.dm.dupMapKey == DupMapKeyEnforcedAPF { + if d.dm.dupMapKey == DupMapKeyEnforcedAPF && f == nil { k = nameAsInt } } else { @@ -2010,23 +2074,6 @@ func (d *decoder) parseMapToStruct(v reflect.Value, tInfo *typeInfo) error { //n } } - if d.dm.dupMapKey == DupMapKeyEnforcedAPF { - mapKeys[k] = struct{}{} - newKeyCount := len(mapKeys) - if newKeyCount == keyCount { - err = &DupMapKeyError{k, j} - d.skip() // skip value - j++ - // skip the rest of the map - for ; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ { - d.skip() - d.skip() - } - return err - } - keyCount = newKeyCount - } - if f == nil { if errOnUnknownField { err = &UnknownFieldError{j} @@ -2039,6 +2086,31 @@ func (d *decoder) parseMapToStruct(v reflect.Value, tInfo *typeInfo) error { //n } return err } + + // Two map keys that match the same struct field are immediately considered + // duplicates. This check detects duplicates between two map keys that do + // not match a struct field. If unknown field errors are enabled, then this + // check is never reached. + if d.dm.dupMapKey == DupMapKeyEnforcedAPF { + if mapKeys == nil { + mapKeys = make(map[interface{}]struct{}, 1) + } + mapKeys[k] = struct{}{} + newKeyCount := len(mapKeys) + if newKeyCount == keyCount { + err = &DupMapKeyError{k, j} + d.skip() // skip value + j++ + // skip the rest of the map + for ; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ { + d.skip() + d.skip() + } + return err + } + keyCount = newKeyCount + } + d.skip() // Skip value continue }