Update prebuilts to go1.10 ab/4625579
Test: m -j blueprint_tools
Change-Id: I12d0286a2978fcbafa50880625700ba69c4581d8
diff --git a/src/encoding/asn1/asn1.go b/src/encoding/asn1/asn1.go
index b8e2770..26868a3 100644
--- a/src/encoding/asn1/asn1.go
+++ b/src/encoding/asn1/asn1.go
@@ -372,13 +372,32 @@
return
}
+// NumericString
+
+// parseNumericString parses an ASN.1 NumericString from the given byte array
+// and returns it.
+func parseNumericString(bytes []byte) (ret string, err error) {
+ for _, b := range bytes {
+ if !isNumeric(b) {
+ return "", SyntaxError{"NumericString contains invalid character"}
+ }
+ }
+ return string(bytes), nil
+}
+
+// isNumeric reports whether the given b is in the ASN.1 NumericString set.
+func isNumeric(b byte) bool {
+ return '0' <= b && b <= '9' ||
+ b == ' '
+}
+
// PrintableString
-// parsePrintableString parses a ASN.1 PrintableString from the given byte
+// parsePrintableString parses an ASN.1 PrintableString from the given byte
// array and returns it.
func parsePrintableString(bytes []byte) (ret string, err error) {
for _, b := range bytes {
- if !isPrintable(b) {
+ if !isPrintable(b, allowAsterisk, allowAmpersand) {
err = SyntaxError{"PrintableString contains invalid character"}
return
}
@@ -387,8 +406,21 @@
return
}
+type asteriskFlag bool
+type ampersandFlag bool
+
+const (
+ allowAsterisk asteriskFlag = true
+ rejectAsterisk asteriskFlag = false
+
+ allowAmpersand ampersandFlag = true
+ rejectAmpersand ampersandFlag = false
+)
+
// isPrintable reports whether the given b is in the ASN.1 PrintableString set.
-func isPrintable(b byte) bool {
+// If asterisk is allowAsterisk then '*' is also allowed, reflecting existing
+// practice. If ampersand is allowAmpersand then '&' is allowed as well.
+func isPrintable(b byte, asterisk asteriskFlag, ampersand ampersandFlag) bool {
return 'a' <= b && b <= 'z' ||
'A' <= b && b <= 'Z' ||
'0' <= b && b <= '9' ||
@@ -401,12 +433,17 @@
// This is technically not allowed in a PrintableString.
// However, x509 certificates with wildcard strings don't
// always use the correct string type so we permit it.
- b == '*'
+ (bool(asterisk) && b == '*') ||
+ // This is not technically allowed either. However, not
+ // only is it relatively common, but there are also a
+ // handful of CA certificates that contain it. At least
+ // one of which will not expire until 2027.
+ (bool(ampersand) && b == '&')
}
// IA5String
-// parseIA5String parses a ASN.1 IA5String (ASCII string) from the given
+// parseIA5String parses an ASN.1 IA5String (ASCII string) from the given
// byte slice and returns it.
func parseIA5String(bytes []byte) (ret string, err error) {
for _, b := range bytes {
@@ -421,7 +458,7 @@
// T61String
-// parseT61String parses a ASN.1 T61String (8-bit clean string) from the given
+// parseT61String parses an ASN.1 T61String (8-bit clean string) from the given
// byte slice and returns it.
func parseT61String(bytes []byte) (ret string, err error) {
return string(bytes), nil
@@ -429,7 +466,7 @@
// UTF8String
-// parseUTF8String parses a ASN.1 UTF8String (raw UTF-8) from the given byte
+// parseUTF8String parses an ASN.1 UTF8String (raw UTF-8) from the given byte
// array and returns it.
func parseUTF8String(bytes []byte) (ret string, err error) {
if !utf8.Valid(bytes) {
@@ -536,7 +573,7 @@
// a number of ASN.1 values from the given byte slice and returns them as a
// slice of Go values of the given type.
func parseSequenceOf(bytes []byte, sliceType reflect.Type, elemType reflect.Type) (ret reflect.Value, err error) {
- expectedTag, compoundType, ok := getUniversalType(elemType)
+ matchAny, expectedTag, compoundType, ok := getUniversalType(elemType)
if !ok {
err = StructuralError{"unknown Go type for slice"}
return
@@ -552,7 +589,7 @@
return
}
switch t.tag {
- case TagIA5String, TagGeneralString, TagT61String, TagUTF8String:
+ case TagIA5String, TagGeneralString, TagT61String, TagUTF8String, TagNumericString:
// We pretend that various other string types are
// PRINTABLE STRINGs so that a sequence of them can be
// parsed into a []string.
@@ -562,7 +599,7 @@
t.tag = TagUTCTime
}
- if t.class != ClassUniversal || t.isCompound != compoundType || t.tag != expectedTag {
+ if !matchAny && (t.class != ClassUniversal || t.isCompound != compoundType || t.tag != expectedTag) {
err = StructuralError{"sequence tag mismatch"}
return
}
@@ -617,23 +654,6 @@
return
}
- // Deal with raw values.
- if fieldType == rawValueType {
- var t tagAndLength
- t, offset, err = parseTagAndLength(bytes, offset)
- if err != nil {
- return
- }
- if invalidLength(offset, t.length, len(bytes)) {
- err = SyntaxError{"data truncated"}
- return
- }
- result := RawValue{t.class, t.tag, t.isCompound, bytes[offset : offset+t.length], bytes[initOffset : offset+t.length]}
- offset += t.length
- v.Set(reflect.ValueOf(result))
- return
- }
-
// Deal with the ANY type.
if ifaceType := fieldType; ifaceType.Kind() == reflect.Interface && ifaceType.NumMethod() == 0 {
var t tagAndLength
@@ -651,6 +671,8 @@
switch t.tag {
case TagPrintableString:
result, err = parsePrintableString(innerBytes)
+ case TagNumericString:
+ result, err = parseNumericString(innerBytes)
case TagIA5String:
result, err = parseIA5String(innerBytes)
case TagT61String:
@@ -682,11 +704,6 @@
}
return
}
- universalTag, compoundType, ok1 := getUniversalType(fieldType)
- if !ok1 {
- err = StructuralError{fmt.Sprintf("unknown Go type: %v", fieldType)}
- return
- }
t, offset, err := parseTagAndLength(bytes, offset)
if err != nil {
@@ -702,7 +719,9 @@
return
}
if t.class == expectedClass && t.tag == *params.tag && (t.length == 0 || t.isCompound) {
- if t.length > 0 {
+ if fieldType == rawValueType {
+ // The inner element should not be parsed for RawValues.
+ } else if t.length > 0 {
t, offset, err = parseTagAndLength(bytes, offset)
if err != nil {
return
@@ -727,6 +746,12 @@
}
}
+ matchAny, universalTag, compoundType, ok1 := getUniversalType(fieldType)
+ if !ok1 {
+ err = StructuralError{fmt.Sprintf("unknown Go type: %v", fieldType)}
+ return
+ }
+
// Special case for strings: all the ASN.1 string types map to the Go
// type string. getUniversalType returns the tag for PrintableString
// when it sees a string, so if we see a different string type on the
@@ -734,7 +759,7 @@
if universalTag == TagPrintableString {
if t.class == ClassUniversal {
switch t.tag {
- case TagIA5String, TagGeneralString, TagT61String, TagUTF8String:
+ case TagIA5String, TagGeneralString, TagT61String, TagUTF8String, TagNumericString:
universalTag = t.tag
}
} else if params.stringType != 0 {
@@ -752,21 +777,25 @@
universalTag = TagSet
}
+ matchAnyClassAndTag := matchAny
expectedClass := ClassUniversal
expectedTag := universalTag
if !params.explicit && params.tag != nil {
expectedClass = ClassContextSpecific
expectedTag = *params.tag
+ matchAnyClassAndTag = false
}
if !params.explicit && params.application && params.tag != nil {
expectedClass = ClassApplication
expectedTag = *params.tag
+ matchAnyClassAndTag = false
}
// We have unwrapped any explicit tagging at this point.
- if t.class != expectedClass || t.tag != expectedTag || t.isCompound != compoundType {
+ if !matchAnyClassAndTag && (t.class != expectedClass || t.tag != expectedTag) ||
+ (!matchAny && t.isCompound != compoundType) {
// Tags don't match. Again, it could be an optional element.
ok := setDefaultValue(v, params)
if ok {
@@ -785,6 +814,10 @@
// We deal with the structures defined in this package first.
switch fieldType {
+ case rawValueType:
+ result := RawValue{t.class, t.tag, t.isCompound, innerBytes, bytes[initOffset:offset]}
+ v.Set(reflect.ValueOf(result))
+ return
case objectIdentifierType:
newSlice, err1 := parseObjectIdentifier(innerBytes)
v.Set(reflect.MakeSlice(v.Type(), len(newSlice), len(newSlice)))
@@ -904,6 +937,8 @@
switch universalTag {
case TagPrintableString:
v, err = parsePrintableString(innerBytes)
+ case TagNumericString:
+ v, err = parseNumericString(innerBytes)
case TagIA5String:
v, err = parseIA5String(innerBytes)
case TagT61String:
@@ -977,7 +1012,7 @@
//
// An ASN.1 UTCTIME or GENERALIZEDTIME can be written to a time.Time.
//
-// An ASN.1 PrintableString or IA5String can be written to a string.
+// An ASN.1 PrintableString, IA5String, or NumericString can be written to a string.
//
// Any of the above ASN.1 values can be written to an interface{}.
// The value stored in the interface has the corresponding Go type.
@@ -992,7 +1027,7 @@
//
// The following tags on struct fields have special meaning to Unmarshal:
//
-// application specifies that a APPLICATION tag is used
+// application specifies that an APPLICATION tag is used
// default:x sets the default value for optional integer fields (only used if optional is also present)
// explicit specifies that an additional, explicit tag wraps the implicit one
// optional marks the field as ASN.1 OPTIONAL
diff --git a/src/encoding/asn1/asn1_test.go b/src/encoding/asn1/asn1_test.go
index c9eda40..5e67dc5 100644
--- a/src/encoding/asn1/asn1_test.go
+++ b/src/encoding/asn1/asn1_test.go
@@ -424,6 +424,7 @@
{"generalized", fieldParameters{timeType: TagGeneralizedTime}},
{"utc", fieldParameters{timeType: TagUTCTime}},
{"printable", fieldParameters{stringType: TagPrintableString}},
+ {"numeric", fieldParameters{stringType: TagNumericString}},
{"optional", fieldParameters{optional: true}},
{"explicit", fieldParameters{explicit: true, tag: new(int)}},
{"application", fieldParameters{application: true, tag: new(int)}},
@@ -486,6 +487,8 @@
{[]byte{0x02, 0x01, 0x10}, newInt(16)},
{[]byte{0x13, 0x04, 't', 'e', 's', 't'}, newString("test")},
{[]byte{0x16, 0x04, 't', 'e', 's', 't'}, newString("test")},
+ // Ampersand is allowed in PrintableString due to mistakes by major CAs.
+ {[]byte{0x13, 0x05, 't', 'e', 's', 't', '&'}, newString("test&")},
{[]byte{0x16, 0x04, 't', 'e', 's', 't'}, &RawValue{0, 22, false, []byte("test"), []byte("\x16\x04test")}},
{[]byte{0x04, 0x04, 1, 2, 3, 4}, &RawValue{0, 4, false, []byte{1, 2, 3, 4}, []byte{4, 4, 1, 2, 3, 4}}},
{[]byte{0x30, 0x03, 0x81, 0x01, 0x01}, &TestContextSpecificTags{1}},
@@ -496,6 +499,7 @@
{[]byte{0x30, 0x0b, 0x13, 0x03, 0x66, 0x6f, 0x6f, 0x02, 0x01, 0x22, 0x02, 0x01, 0x33}, &TestElementsAfterString{"foo", 0x22, 0x33}},
{[]byte{0x30, 0x05, 0x02, 0x03, 0x12, 0x34, 0x56}, &TestBigInt{big.NewInt(0x123456)}},
{[]byte{0x30, 0x0b, 0x31, 0x09, 0x02, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x01, 0x03}, &TestSet{Ints: []int{1, 2, 3}}},
+ {[]byte{0x12, 0x0b, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' '}, newString("0123456789 ")},
}
func TestUnmarshal(t *testing.T) {
@@ -1015,7 +1019,7 @@
t.Fatal(err)
}
if !bytes.Equal(NullBytes, marshaled) {
- t.Errorf("Expected Marshal of NullRawValue to yeild %x, got %x", NullBytes, marshaled)
+ t.Errorf("Expected Marshal of NullRawValue to yield %x, got %x", NullBytes, marshaled)
}
unmarshaled := RawValue{}
@@ -1033,3 +1037,60 @@
t.Errorf("Expected Unmarshal of NullBytes to yield %v, got %v", NullRawValue, unmarshaled)
}
}
+
+func TestExplicitTagRawValueStruct(t *testing.T) {
+ type foo struct {
+ A RawValue `asn1:"optional,explicit,tag:5"`
+ B []byte `asn1:"optional,explicit,tag:6"`
+ }
+ before := foo{B: []byte{1, 2, 3}}
+ derBytes, err := Marshal(before)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var after foo
+ if rest, err := Unmarshal(derBytes, &after); err != nil || len(rest) != 0 {
+ t.Fatal(err)
+ }
+
+ got := fmt.Sprintf("%#v", after)
+ want := fmt.Sprintf("%#v", before)
+ if got != want {
+ t.Errorf("got %s, want %s (DER: %x)", got, want, derBytes)
+ }
+}
+
+func TestTaggedRawValue(t *testing.T) {
+ type taggedRawValue struct {
+ A RawValue `asn1:"tag:5"`
+ }
+ type untaggedRawValue struct {
+ A RawValue
+ }
+ const isCompound = 0x20
+ const tag = 5
+
+ tests := []struct {
+ shouldMatch bool
+ derBytes []byte
+ }{
+ {false, []byte{0x30, 3, TagInteger, 1, 1}},
+ {true, []byte{0x30, 3, (ClassContextSpecific << 6) | tag, 1, 1}},
+ {true, []byte{0x30, 3, (ClassContextSpecific << 6) | tag | isCompound, 1, 1}},
+ {false, []byte{0x30, 3, (ClassApplication << 6) | tag | isCompound, 1, 1}},
+ }
+
+ for i, test := range tests {
+ var tagged taggedRawValue
+ if _, err := Unmarshal(test.derBytes, &tagged); (err == nil) != test.shouldMatch {
+ t.Errorf("#%d: unexpected result parsing %x: %s", i, test.derBytes, err)
+ }
+
+ // An untagged RawValue should accept anything.
+ var untagged untaggedRawValue
+ if _, err := Unmarshal(test.derBytes, &untagged); err != nil {
+ t.Errorf("#%d: unexpected failure parsing %x with untagged RawValue: %s", i, test.derBytes, err)
+ }
+ }
+}
diff --git a/src/encoding/asn1/common.go b/src/encoding/asn1/common.go
index cd93b27..a6589a5 100644
--- a/src/encoding/asn1/common.go
+++ b/src/encoding/asn1/common.go
@@ -30,6 +30,7 @@
TagUTF8String = 12
TagSequence = 16
TagSet = 17
+ TagNumericString = 18
TagPrintableString = 19
TagT61String = 20
TagIA5String = 22
@@ -106,6 +107,8 @@
ret.stringType = TagIA5String
case part == "printable":
ret.stringType = TagPrintableString
+ case part == "numeric":
+ ret.stringType = TagNumericString
case part == "utf8":
ret.stringType = TagUTF8String
case strings.HasPrefix(part, "default:"):
@@ -136,36 +139,38 @@
// Given a reflected Go type, getUniversalType returns the default tag number
// and expected compound flag.
-func getUniversalType(t reflect.Type) (tagNumber int, isCompound, ok bool) {
+func getUniversalType(t reflect.Type) (matchAny bool, tagNumber int, isCompound, ok bool) {
switch t {
+ case rawValueType:
+ return true, -1, false, true
case objectIdentifierType:
- return TagOID, false, true
+ return false, TagOID, false, true
case bitStringType:
- return TagBitString, false, true
+ return false, TagBitString, false, true
case timeType:
- return TagUTCTime, false, true
+ return false, TagUTCTime, false, true
case enumeratedType:
- return TagEnum, false, true
+ return false, TagEnum, false, true
case bigIntType:
- return TagInteger, false, true
+ return false, TagInteger, false, true
}
switch t.Kind() {
case reflect.Bool:
- return TagBoolean, false, true
+ return false, TagBoolean, false, true
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
- return TagInteger, false, true
+ return false, TagInteger, false, true
case reflect.Struct:
- return TagSequence, true, true
+ return false, TagSequence, true, true
case reflect.Slice:
if t.Elem().Kind() == reflect.Uint8 {
- return TagOctetString, false, true
+ return false, TagOctetString, false, true
}
if strings.HasSuffix(t.Name(), "SET") {
- return TagSet, true, true
+ return false, TagSet, true, true
}
- return TagSequence, true, true
+ return false, TagSequence, true, true
case reflect.String:
- return TagPrintableString, false, true
+ return false, TagPrintableString, false, true
}
- return 0, false, false
+ return false, 0, false, false
}
diff --git a/src/encoding/asn1/marshal.go b/src/encoding/asn1/marshal.go
index fdadb39..3e85651 100644
--- a/src/encoding/asn1/marshal.go
+++ b/src/encoding/asn1/marshal.go
@@ -18,7 +18,7 @@
byteFFEncoder encoder = byteEncoder(0xff)
)
-// encoder represents a ASN.1 element that is waiting to be marshaled.
+// encoder represents an ASN.1 element that is waiting to be marshaled.
type encoder interface {
// Len returns the number of bytes needed to marshal this element.
Len() int
@@ -268,7 +268,13 @@
func makePrintableString(s string) (e encoder, err error) {
for i := 0; i < len(s); i++ {
- if !isPrintable(s[i]) {
+ // The asterisk is often used in PrintableString, even though
+ // it is invalid. If a PrintableString was specifically
+ // requested then the asterisk is permitted by this code.
+ // Ampersand is allowed in parsing due a handful of CA
+ // certificates, however when making new certificates
+ // it is rejected.
+ if !isPrintable(s[i], allowAsterisk, rejectAmpersand) {
return nil, StructuralError{"PrintableString contains invalid character"}
}
}
@@ -286,6 +292,16 @@
return stringEncoder(s), nil
}
+func makeNumericString(s string) (e encoder, err error) {
+ for i := 0; i < len(s); i++ {
+ if !isNumeric(s[i]) {
+ return nil, StructuralError{"NumericString contains invalid character"}
+ }
+ }
+
+ return stringEncoder(s), nil
+}
+
func makeUTF8String(s string) encoder {
return stringEncoder(s)
}
@@ -503,6 +519,8 @@
return makeIA5String(v.String())
case TagPrintableString:
return makePrintableString(v.String())
+ case TagNumericString:
+ return makeNumericString(v.String())
default:
return makeUTF8String(v.String()), nil
}
@@ -556,11 +574,10 @@
return t, nil
}
- tag, isCompound, ok := getUniversalType(v.Type())
- if !ok {
+ matchAny, tag, isCompound, ok := getUniversalType(v.Type())
+ if !ok || matchAny {
return nil, StructuralError{fmt.Sprintf("unknown Go type: %v", v.Type())}
}
- class := ClassUniversal
if params.timeType != 0 && tag != TagUTCTime {
return nil, StructuralError{"explicit time type given to non-time member"}
@@ -577,7 +594,7 @@
// a PrintableString if the character set in the string is
// sufficiently limited, otherwise we'll use a UTF8String.
for _, r := range v.String() {
- if r >= utf8.RuneSelf || !isPrintable(byte(r)) {
+ if r >= utf8.RuneSelf || !isPrintable(byte(r), rejectAsterisk, rejectAmpersand) {
if !utf8.ValidString(v.String()) {
return nil, errors.New("asn1: string not valid UTF-8")
}
@@ -610,27 +627,33 @@
bodyLen := t.body.Len()
- if params.explicit {
- t.tag = bytesEncoder(appendTagAndLength(t.scratch[:0], tagAndLength{class, tag, bodyLen, isCompound}))
-
- tt := new(taggedEncoder)
-
- tt.body = t
-
- tt.tag = bytesEncoder(appendTagAndLength(tt.scratch[:0], tagAndLength{
- class: ClassContextSpecific,
- tag: *params.tag,
- length: bodyLen + t.tag.Len(),
- isCompound: true,
- }))
-
- return tt, nil
- }
-
+ class := ClassUniversal
if params.tag != nil {
+ if params.application {
+ class = ClassApplication
+ } else {
+ class = ClassContextSpecific
+ }
+
+ if params.explicit {
+ t.tag = bytesEncoder(appendTagAndLength(t.scratch[:0], tagAndLength{ClassUniversal, tag, bodyLen, isCompound}))
+
+ tt := new(taggedEncoder)
+
+ tt.body = t
+
+ tt.tag = bytesEncoder(appendTagAndLength(tt.scratch[:0], tagAndLength{
+ class: class,
+ tag: *params.tag,
+ length: bodyLen + t.tag.Len(),
+ isCompound: true,
+ }))
+
+ return tt, nil
+ }
+
// implicit tag.
tag = *params.tag
- class = ClassContextSpecific
}
t.tag = bytesEncoder(appendTagAndLength(t.scratch[:0], tagAndLength{class, tag, bodyLen, isCompound}))
@@ -650,7 +673,13 @@
// utc: causes time.Time to be marshaled as ASN.1, UTCTime values
// generalized: causes time.Time to be marshaled as ASN.1, GeneralizedTime values
func Marshal(val interface{}) ([]byte, error) {
- e, err := makeField(reflect.ValueOf(val), fieldParameters{})
+ return MarshalWithParams(val, "")
+}
+
+// MarshalWithParams allows field parameters to be specified for the
+// top-level element. The form of the params is the same as the field tags.
+func MarshalWithParams(val interface{}, params string) ([]byte, error) {
+ e, err := makeField(reflect.ValueOf(val), parseFieldParameters(params))
if err != nil {
return nil, err
}
diff --git a/src/encoding/asn1/marshal_test.go b/src/encoding/asn1/marshal_test.go
index 10db1aa..4f755a1 100644
--- a/src/encoding/asn1/marshal_test.go
+++ b/src/encoding/asn1/marshal_test.go
@@ -59,6 +59,10 @@
A string `asn1:"printable"`
}
+type genericStringTest struct {
+ A string
+}
+
type optionalRawValueTest struct {
A RawValue `asn1:"optional"`
}
@@ -71,6 +75,15 @@
A int `asn1:"optional,default:1"`
}
+type applicationTest struct {
+ A int `asn1:"application,tag:0"`
+ B int `asn1:"application,tag:1,explicit"`
+}
+
+type numericStringTest struct {
+ A string `asn1:"numeric"`
+}
+
type testSET []int
var PST = time.FixedZone("PST", -8*60*60)
@@ -142,6 +155,9 @@
{optionalRawValueTest{}, "3000"},
{printableStringTest{"test"}, "3006130474657374"},
{printableStringTest{"test*"}, "30071305746573742a"},
+ {genericStringTest{"test"}, "3006130474657374"},
+ {genericStringTest{"test*"}, "30070c05746573742a"},
+ {genericStringTest{"test&"}, "30070c057465737426"},
{rawContentsStruct{nil, 64}, "3003020140"},
{rawContentsStruct{[]byte{0x30, 3, 1, 2, 3}, 64}, "3003010203"},
{RawValue{Tag: 1, Class: 2, IsCompound: false, Bytes: []byte{1, 2, 3}}, "8103010203"},
@@ -152,6 +168,8 @@
{defaultTest{0}, "3003020100"},
{defaultTest{1}, "3000"},
{defaultTest{2}, "3003020102"},
+ {applicationTest{1, 2}, "30084001016103020102"},
+ {numericStringTest{"1 9"}, "30051203312039"},
}
func TestMarshal(t *testing.T) {
@@ -168,6 +186,31 @@
}
}
+type marshalWithParamsTest struct {
+ in interface{}
+ params string
+ out string // hex encoded
+}
+
+var marshalWithParamsTests = []marshalWithParamsTest{
+ {intStruct{10}, "set", "310302010a"},
+ {intStruct{10}, "application", "600302010a"},
+}
+
+func TestMarshalWithParams(t *testing.T) {
+ for i, test := range marshalWithParamsTests {
+ data, err := MarshalWithParams(test.in, test.params)
+ if err != nil {
+ t.Errorf("#%d failed: %s", i, err)
+ }
+ out, _ := hex.DecodeString(test.out)
+ if !bytes.Equal(out, data) {
+ t.Errorf("#%d got: %x want %x\n\t%q\n\t%q", i, data, out, data, out)
+
+ }
+ }
+}
+
type marshalErrTest struct {
in interface{}
err string
@@ -175,6 +218,9 @@
var marshalErrTests = []marshalErrTest{
{bigIntStruct{nil}, "empty integer"},
+ {numericStringTest{"a"}, "invalid character"},
+ {ia5StringTest{"\xb0"}, "invalid character"},
+ {printableStringTest{"!"}, "invalid character"},
}
func TestMarshalError(t *testing.T) {
diff --git a/src/encoding/base32/base32.go b/src/encoding/base32/base32.go
index bf341b5..e72ba74 100644
--- a/src/encoding/base32/base32.go
+++ b/src/encoding/base32/base32.go
@@ -130,8 +130,19 @@
}
// Encode 5-bit blocks using the base32 alphabet
- for i := 0; i < 8; i++ {
- if len(dst) > i {
+ size := len(dst)
+ if size >= 8 {
+ // Common case, unrolled for extra performance
+ dst[0] = enc.encode[b[0]]
+ dst[1] = enc.encode[b[1]]
+ dst[2] = enc.encode[b[2]]
+ dst[3] = enc.encode[b[3]]
+ dst[4] = enc.encode[b[4]]
+ dst[5] = enc.encode[b[5]]
+ dst[6] = enc.encode[b[6]]
+ dst[7] = enc.encode[b[7]]
+ } else {
+ for i := 0; i < size; i++ {
dst[i] = enc.encode[b[i]]
}
}
diff --git a/src/encoding/base64/base64.go b/src/encoding/base64/base64.go
index b208f9e..9a99370 100644
--- a/src/encoding/base64/base64.go
+++ b/src/encoding/base64/base64.go
@@ -6,6 +6,7 @@
package base64
import (
+ "encoding/binary"
"io"
"strconv"
)
@@ -269,121 +270,110 @@
return "illegal base64 data at input byte " + strconv.FormatInt(int64(e), 10)
}
-// decode is like Decode but returns an additional 'end' value, which
-// indicates if end-of-message padding or a partial quantum was encountered
-// and thus any additional data is an error.
-func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
- si := 0
+// decodeQuantum decodes up to 4 base64 bytes. It takes for parameters
+// the destination buffer dst, the source buffer src and an index in the
+// source buffer si.
+// It returns the number of bytes read from src, the number of bytes written
+// to dst, and an error, if any.
+func (enc *Encoding) decodeQuantum(dst, src []byte, si int) (nsi, n int, err error) {
+ // Decode quantum using the base64 alphabet
+ var dbuf [4]byte
+ dinc, dlen := 3, 4
- for si < len(src) && !end {
- // Decode quantum using the base64 alphabet
- var dbuf [4]byte
- dinc, dlen := 3, 4
-
- for j := 0; j < len(dbuf); j++ {
- if len(src) == si {
- switch {
- case j == 0:
- return n, false, nil
- case j == 1, enc.padChar != NoPadding:
- return n, false, CorruptInputError(si - j)
- }
- dinc, dlen, end = j-1, j, true
- break
+ for j := 0; j < len(dbuf); j++ {
+ if len(src) == si {
+ switch {
+ case j == 0:
+ return si, 0, nil
+ case j == 1, enc.padChar != NoPadding:
+ return si, 0, CorruptInputError(si - j)
}
- in := src[si]
+ dinc, dlen = j-1, j
+ break
+ }
+ in := src[si]
+ si++
+
+ out := enc.decodeMap[in]
+ if out != 0xff {
+ dbuf[j] = out
+ continue
+ }
+
+ if in == '\n' || in == '\r' {
+ j--
+ continue
+ }
+
+ if rune(in) != enc.padChar {
+ return si, 0, CorruptInputError(si - 1)
+ }
+
+ // We've reached the end and there's padding
+ switch j {
+ case 0, 1:
+ // incorrect padding
+ return si, 0, CorruptInputError(si - 1)
+ case 2:
+ // "==" is expected, the first "=" is already consumed.
+ // skip over newlines
+ for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
+ si++
+ }
+ if si == len(src) {
+ // not enough padding
+ return si, 0, CorruptInputError(len(src))
+ }
+ if rune(src[si]) != enc.padChar {
+ // incorrect padding
+ return si, 0, CorruptInputError(si - 1)
+ }
si++
-
- out := enc.decodeMap[in]
- if out != 0xFF {
- dbuf[j] = out
- continue
- }
-
- if in == '\n' || in == '\r' {
- j--
- continue
- }
- if rune(in) == enc.padChar {
- // We've reached the end and there's padding
- switch j {
- case 0, 1:
- // incorrect padding
- return n, false, CorruptInputError(si - 1)
- case 2:
- // "==" is expected, the first "=" is already consumed.
- // skip over newlines
- for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
- si++
- }
- if si == len(src) {
- // not enough padding
- return n, false, CorruptInputError(len(src))
- }
- if rune(src[si]) != enc.padChar {
- // incorrect padding
- return n, false, CorruptInputError(si - 1)
- }
-
- si++
- }
- // skip over newlines
- for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
- si++
- }
- if si < len(src) {
- // trailing garbage
- err = CorruptInputError(si)
- }
- dinc, dlen, end = 3, j, true
- break
- }
- return n, false, CorruptInputError(si - 1)
}
- // Convert 4x 6bit source bytes into 3 bytes
- val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3])
- dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16)
- switch dlen {
- case 4:
- dst[2] = dbuf[2]
- dbuf[2] = 0
- fallthrough
- case 3:
- dst[1] = dbuf[1]
- if enc.strict && dbuf[2] != 0 {
- return n, end, CorruptInputError(si - 1)
- }
- dbuf[1] = 0
- fallthrough
- case 2:
- dst[0] = dbuf[0]
- if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) {
- return n, end, CorruptInputError(si - 2)
- }
+ // skip over newlines
+ for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
+ si++
}
- dst = dst[dinc:]
- n += dlen - 1
+ if si < len(src) {
+ // trailing garbage
+ err = CorruptInputError(si)
+ }
+ dinc, dlen = 3, j
+ break
}
- return n, end, err
-}
+ // Convert 4x 6bit source bytes into 3 bytes
+ val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3])
+ dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16)
+ switch dlen {
+ case 4:
+ dst[2] = dbuf[2]
+ dbuf[2] = 0
+ fallthrough
+ case 3:
+ dst[1] = dbuf[1]
+ if enc.strict && dbuf[2] != 0 {
+ return si, 0, CorruptInputError(si - 1)
+ }
+ dbuf[1] = 0
+ fallthrough
+ case 2:
+ dst[0] = dbuf[0]
+ if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) {
+ return si, 0, CorruptInputError(si - 2)
+ }
+ }
+ dst = dst[dinc:]
-// Decode decodes src using the encoding enc. It writes at most
-// DecodedLen(len(src)) bytes to dst and returns the number of bytes
-// written. If src contains invalid base64 data, it will return the
-// number of bytes successfully written and CorruptInputError.
-// New line characters (\r and \n) are ignored.
-func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
- n, _, err = enc.decode(dst, src)
- return
+ return si, dlen - 1, err
}
// DecodeString returns the bytes represented by the base64 string s.
func (enc *Encoding) DecodeString(s string) ([]byte, error) {
dbuf := make([]byte, enc.DecodedLen(len(s)))
- n, _, err := enc.decode(dbuf, []byte(s))
+ n, err := enc.Decode(dbuf, []byte(s))
return dbuf[:n], err
}
@@ -392,7 +382,6 @@
readErr error // error from r.Read
enc *Encoding
r io.Reader
- end bool // saw end of message
buf [1024]byte // leftover input
nbuf int
out []byte // leftover decoded output
@@ -430,9 +419,8 @@
if d.enc.padChar == NoPadding && d.nbuf > 0 {
// Decode final fragment, without padding.
var nw int
- nw, _, d.err = d.enc.decode(d.outbuf[:], d.buf[:d.nbuf])
+ nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:d.nbuf])
d.nbuf = 0
- d.end = true
d.out = d.outbuf[:nw]
n = copy(p, d.out)
d.out = d.out[n:]
@@ -454,18 +442,138 @@
nr := d.nbuf / 4 * 4
nw := d.nbuf / 4 * 3
if nw > len(p) {
- nw, d.end, d.err = d.enc.decode(d.outbuf[:], d.buf[:nr])
+ nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:nr])
d.out = d.outbuf[:nw]
n = copy(p, d.out)
d.out = d.out[n:]
} else {
- n, d.end, d.err = d.enc.decode(p, d.buf[:nr])
+ n, d.err = d.enc.Decode(p, d.buf[:nr])
}
d.nbuf -= nr
copy(d.buf[:d.nbuf], d.buf[nr:])
return n, d.err
}
+// Decode decodes src using the encoding enc. It writes at most
+// DecodedLen(len(src)) bytes to dst and returns the number of bytes
+// written. If src contains invalid base64 data, it will return the
+// number of bytes successfully written and CorruptInputError.
+// New line characters (\r and \n) are ignored.
+func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
+ if len(src) == 0 {
+ return 0, nil
+ }
+
+ si := 0
+ ilen := len(src)
+ olen := len(dst)
+ for strconv.IntSize >= 64 && ilen-si >= 8 && olen-n >= 8 {
+ if ok := enc.decode64(dst[n:], src[si:]); ok {
+ n += 6
+ si += 8
+ } else {
+ var ninc int
+ si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
+ n += ninc
+ if err != nil {
+ return n, err
+ }
+ }
+ }
+
+ for ilen-si >= 4 && olen-n >= 4 {
+ if ok := enc.decode32(dst[n:], src[si:]); ok {
+ n += 3
+ si += 4
+ } else {
+ var ninc int
+ si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
+ n += ninc
+ if err != nil {
+ return n, err
+ }
+ }
+ }
+
+ for si < len(src) {
+ var ninc int
+ si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
+ n += ninc
+ if err != nil {
+ return n, err
+ }
+ }
+ return n, err
+}
+
+// decode32 tries to decode 4 base64 char into 3 bytes.
+// len(dst) and len(src) must both be >= 4.
+// Returns true if decode succeeded.
+func (enc *Encoding) decode32(dst, src []byte) bool {
+ var dn, n uint32
+ if n = uint32(enc.decodeMap[src[0]]); n == 0xff {
+ return false
+ }
+ dn |= n << 26
+ if n = uint32(enc.decodeMap[src[1]]); n == 0xff {
+ return false
+ }
+ dn |= n << 20
+ if n = uint32(enc.decodeMap[src[2]]); n == 0xff {
+ return false
+ }
+ dn |= n << 14
+ if n = uint32(enc.decodeMap[src[3]]); n == 0xff {
+ return false
+ }
+ dn |= n << 8
+
+ binary.BigEndian.PutUint32(dst, dn)
+ return true
+}
+
+// decode64 tries to decode 8 base64 char into 6 bytes.
+// len(dst) and len(src) must both be >= 8.
+// Returns true if decode succeeded.
+func (enc *Encoding) decode64(dst, src []byte) bool {
+ var dn, n uint64
+ if n = uint64(enc.decodeMap[src[0]]); n == 0xff {
+ return false
+ }
+ dn |= n << 58
+ if n = uint64(enc.decodeMap[src[1]]); n == 0xff {
+ return false
+ }
+ dn |= n << 52
+ if n = uint64(enc.decodeMap[src[2]]); n == 0xff {
+ return false
+ }
+ dn |= n << 46
+ if n = uint64(enc.decodeMap[src[3]]); n == 0xff {
+ return false
+ }
+ dn |= n << 40
+ if n = uint64(enc.decodeMap[src[4]]); n == 0xff {
+ return false
+ }
+ dn |= n << 34
+ if n = uint64(enc.decodeMap[src[5]]); n == 0xff {
+ return false
+ }
+ dn |= n << 28
+ if n = uint64(enc.decodeMap[src[6]]); n == 0xff {
+ return false
+ }
+ dn |= n << 22
+ if n = uint64(enc.decodeMap[src[7]]); n == 0xff {
+ return false
+ }
+ dn |= n << 16
+
+ binary.BigEndian.PutUint64(dst, dn)
+ return true
+}
+
type newlineFilteringReader struct {
wrapped io.Reader
}
diff --git a/src/encoding/base64/base64_test.go b/src/encoding/base64/base64_test.go
index 05011fb..9f5c493 100644
--- a/src/encoding/base64/base64_test.go
+++ b/src/encoding/base64/base64_test.go
@@ -152,12 +152,9 @@
for _, tt := range encodingTests {
encoded := tt.conv(p.encoded)
dbuf := make([]byte, tt.enc.DecodedLen(len(encoded)))
- count, end, err := tt.enc.decode(dbuf, []byte(encoded))
+ count, err := tt.enc.Decode(dbuf, []byte(encoded))
testEqual(t, "Decode(%q) = error %v, want %v", encoded, err, error(nil))
testEqual(t, "Decode(%q) = length %v, want %v", encoded, count, len(p.decoded))
- if len(encoded) > 0 {
- testEqual(t, "Decode(%q) = end %v, want %v", encoded, end, len(p.decoded)%3 != 0)
- }
testEqual(t, "Decode(%q) = %q, want %q", encoded, string(dbuf[0:count]), p.decoded)
dbuf, err = tt.enc.DecodeString(encoded)
diff --git a/src/encoding/binary/binary_test.go b/src/encoding/binary/binary_test.go
index 0547bee..af40257 100644
--- a/src/encoding/binary/binary_test.go
+++ b/src/encoding/binary/binary_test.go
@@ -109,6 +109,7 @@
var src = []byte{1, 2, 3, 4, 5, 6, 7, 8}
var res = []int32{0x01020304, 0x05060708}
+var putbuf = []byte{0, 0, 0, 0, 0, 0, 0, 0}
func checkResult(t *testing.T, dir string, order ByteOrder, err error, have, want interface{}) {
if err != nil {
@@ -502,25 +503,42 @@
}
func BenchmarkPutUint16(b *testing.B) {
- buf := [2]byte{}
b.SetBytes(2)
for i := 0; i < b.N; i++ {
- BigEndian.PutUint16(buf[:], uint16(i))
+ BigEndian.PutUint16(putbuf[:], uint16(i))
}
}
func BenchmarkPutUint32(b *testing.B) {
- buf := [4]byte{}
b.SetBytes(4)
for i := 0; i < b.N; i++ {
- BigEndian.PutUint32(buf[:], uint32(i))
+ BigEndian.PutUint32(putbuf[:], uint32(i))
}
}
func BenchmarkPutUint64(b *testing.B) {
- buf := [8]byte{}
b.SetBytes(8)
for i := 0; i < b.N; i++ {
- BigEndian.PutUint64(buf[:], uint64(i))
+ BigEndian.PutUint64(putbuf[:], uint64(i))
+ }
+}
+func BenchmarkLittleEndianPutUint16(b *testing.B) {
+ b.SetBytes(2)
+ for i := 0; i < b.N; i++ {
+ LittleEndian.PutUint16(putbuf[:], uint16(i))
+ }
+}
+
+func BenchmarkLittleEndianPutUint32(b *testing.B) {
+ b.SetBytes(4)
+ for i := 0; i < b.N; i++ {
+ LittleEndian.PutUint32(putbuf[:], uint32(i))
+ }
+}
+
+func BenchmarkLittleEndianPutUint64(b *testing.B) {
+ b.SetBytes(8)
+ for i := 0; i < b.N; i++ {
+ LittleEndian.PutUint64(putbuf[:], uint64(i))
}
}
diff --git a/src/encoding/binary/example_test.go b/src/encoding/binary/example_test.go
index a8b8dba..6f892c2 100644
--- a/src/encoding/binary/example_test.go
+++ b/src/encoding/binary/example_test.go
@@ -51,6 +51,32 @@
// Output: 3.141592653589793
}
+func ExampleRead_multi() {
+ b := []byte{0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40, 0xff, 0x01, 0x02, 0x03, 0xbe, 0xef}
+ r := bytes.NewReader(b)
+
+ var data struct {
+ PI float64
+ Uate uint8
+ Mine [3]byte
+ Too uint16
+ }
+
+ if err := binary.Read(r, binary.LittleEndian, &data); err != nil {
+ fmt.Println("binary.Read failed:", err)
+ }
+
+ fmt.Println(data.PI)
+ fmt.Println(data.Uate)
+ fmt.Printf("% x\n", data.Mine)
+ fmt.Println(data.Too)
+ // Output:
+ // 3.141592653589793
+ // 255
+ // 01 02 03
+ // 61374
+}
+
func ExampleByteOrder_put() {
b := make([]byte, 4)
binary.LittleEndian.PutUint16(b[0:], 0x03e8)
diff --git a/src/encoding/csv/reader.go b/src/encoding/csv/reader.go
index a3497c8..2efc7ad 100644
--- a/src/encoding/csv/reader.go
+++ b/src/encoding/csv/reader.go
@@ -58,44 +58,67 @@
"fmt"
"io"
"unicode"
+ "unicode/utf8"
)
// A ParseError is returned for parsing errors.
-// The first line is 1. The first column is 0.
+// Line numbers are 1-indexed and columns are 0-indexed.
type ParseError struct {
- Line int // Line where the error occurred
- Column int // Column (rune index) where the error occurred
- Err error // The actual error
+ StartLine int // Line where the record starts
+ Line int // Line where the error occurred
+ Column int // Column (rune index) where the error occurred
+ Err error // The actual error
}
func (e *ParseError) Error() string {
- return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err)
+ if e.Err == ErrFieldCount {
+ return fmt.Sprintf("record on line %d: %v", e.Line, e.Err)
+ }
+ if e.StartLine != e.Line {
+ return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err)
+ }
+ return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err)
}
-// These are the errors that can be returned in ParseError.Error
+// These are the errors that can be returned in ParseError.Err.
var (
- ErrTrailingComma = errors.New("extra delimiter at end of line") // no longer used
+ ErrTrailingComma = errors.New("extra delimiter at end of line") // Deprecated: No longer used.
ErrBareQuote = errors.New("bare \" in non-quoted-field")
- ErrQuote = errors.New("extraneous \" in field")
- ErrFieldCount = errors.New("wrong number of fields in line")
+ ErrQuote = errors.New("extraneous or missing \" in quoted-field")
+ ErrFieldCount = errors.New("wrong number of fields")
)
+var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
+
+func validDelim(r rune) bool {
+ return r != 0 && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
+}
+
// A Reader reads records from a CSV-encoded file.
//
// As returned by NewReader, a Reader expects input conforming to RFC 4180.
// The exported fields can be changed to customize the details before the
// first call to Read or ReadAll.
//
-//
+// The Reader converts all \r\n sequences in its input to plain \n,
+// including in multiline field values, so that the returned data does
+// not depend on which line-ending convention an input file uses.
type Reader struct {
// Comma is the field delimiter.
// It is set to comma (',') by NewReader.
+ // Comma must be a valid rune and must not be \r, \n,
+ // or the Unicode replacement character (0xFFFD).
Comma rune
+
// Comment, if not 0, is the comment character. Lines beginning with the
// Comment character without preceding whitespace are ignored.
// With leading whitespace the Comment character becomes part of the
// field, even if TrimLeadingSpace is true.
+ // Comment must be a valid rune and must not be \r, \n,
+ // or the Unicode replacement character (0xFFFD).
+ // It must also not be equal to Comma.
Comment rune
+
// FieldsPerRecord is the number of expected fields per record.
// If FieldsPerRecord is positive, Read requires each record to
// have the given number of fields. If FieldsPerRecord is 0, Read sets it to
@@ -103,31 +126,41 @@
// have the same field count. If FieldsPerRecord is negative, no check is
// made and records may have a variable number of fields.
FieldsPerRecord int
+
// If LazyQuotes is true, a quote may appear in an unquoted field and a
// non-doubled quote may appear in a quoted field.
- LazyQuotes bool
- TrailingComma bool // ignored; here for backwards compatibility
+ LazyQuotes bool
+
// If TrimLeadingSpace is true, leading white space in a field is ignored.
// This is done even if the field delimiter, Comma, is white space.
TrimLeadingSpace bool
+
// ReuseRecord controls whether calls to Read may return a slice sharing
// the backing array of the previous call's returned slice for performance.
// By default, each call to Read returns newly allocated memory owned by the caller.
ReuseRecord bool
- line int
- column int
- r *bufio.Reader
- // lineBuffer holds the unescaped fields read by readField, one after another.
+ TrailingComma bool // Deprecated: No longer used.
+
+ r *bufio.Reader
+
+ // numLine is the current line being read in the CSV file.
+ numLine int
+
+ // rawBuffer is a line buffer only used by the readLine method.
+ rawBuffer []byte
+
+ // recordBuffer holds the unescaped fields, one after another.
// The fields can be accessed by using the indexes in fieldIndexes.
- // Example: for the row `a,"b","c""d",e` lineBuffer will contain `abc"de` and
- // fieldIndexes will contain the indexes 0, 1, 2, 5.
- lineBuffer bytes.Buffer
- // Indexes of fields inside lineBuffer
- // The i'th field starts at offset fieldIndexes[i] in lineBuffer.
+ // E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de`
+ // and fieldIndexes will contain the indexes [1, 2, 5, 6].
+ recordBuffer []byte
+
+ // fieldIndexes is an index of fields inside recordBuffer.
+ // The i'th field ends at offset fieldIndexes[i] in recordBuffer.
fieldIndexes []int
- // only used when ReuseRecord == true
+ // lastRecord is a record cache and only used when ReuseRecord == true.
lastRecord []string
}
@@ -139,15 +172,6 @@
}
}
-// error creates a new ParseError based on err.
-func (r *Reader) error(err error) error {
- return &ParseError{
- Line: r.line,
- Column: r.column,
- Err: err,
- }
-}
-
// Read reads one record (a slice of fields) from r.
// If the record has an unexpected number of fields,
// Read returns the record along with the error ErrFieldCount.
@@ -163,7 +187,6 @@
} else {
record, err = r.readRecord(nil)
}
-
return record, err
}
@@ -185,226 +208,192 @@
}
}
-// readRecord reads and parses a single csv record from r.
-// Unlike parseRecord, readRecord handles FieldsPerRecord.
-// If dst has enough capacity it will be used for the returned record.
-func (r *Reader) readRecord(dst []string) (record []string, err error) {
- for {
- record, err = r.parseRecord(dst)
- if record != nil {
- break
+// readLine reads the next line (with the trailing endline).
+// If EOF is hit without a trailing endline, it will be omitted.
+// If some bytes were read, then the error is never io.EOF.
+// The result is only valid until the next call to readLine.
+func (r *Reader) readLine() ([]byte, error) {
+ line, err := r.r.ReadSlice('\n')
+ if err == bufio.ErrBufferFull {
+ r.rawBuffer = append(r.rawBuffer[:0], line...)
+ for err == bufio.ErrBufferFull {
+ line, err = r.r.ReadSlice('\n')
+ r.rawBuffer = append(r.rawBuffer, line...)
}
- if err != nil {
- return nil, err
+ line = r.rawBuffer
+ }
+ if len(line) > 0 && err == io.EOF {
+ err = nil
+ // For backwards compatibility, drop trailing \r before EOF.
+ if line[len(line)-1] == '\r' {
+ line = line[:len(line)-1]
}
}
+ r.numLine++
+ // Normalize \r\n to \n on all input lines.
+ if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
+ line[n-2] = '\n'
+ line = line[:n-1]
+ }
+ return line, err
+}
+// lengthNL reports the number of bytes for the trailing \n.
+func lengthNL(b []byte) int {
+ if len(b) > 0 && b[len(b)-1] == '\n' {
+ return 1
+ }
+ return 0
+}
+
+// nextRune returns the next rune in b or utf8.RuneError.
+func nextRune(b []byte) rune {
+ r, _ := utf8.DecodeRune(b)
+ return r
+}
+
+func (r *Reader) readRecord(dst []string) ([]string, error) {
+ if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
+ return nil, errInvalidDelim
+ }
+
+ // Read line (automatically skipping past empty lines and any comments).
+ var line, fullLine []byte
+ var errRead error
+ for errRead == nil {
+ line, errRead = r.readLine()
+ if r.Comment != 0 && nextRune(line) == r.Comment {
+ line = nil
+ continue // Skip comment lines
+ }
+ if errRead == nil && len(line) == lengthNL(line) {
+ line = nil
+ continue // Skip empty lines
+ }
+ fullLine = line
+ break
+ }
+ if errRead == io.EOF {
+ return nil, errRead
+ }
+
+ // Parse each field in the record.
+ var err error
+ const quoteLen = len(`"`)
+ commaLen := utf8.RuneLen(r.Comma)
+ recLine := r.numLine // Starting line for record
+ r.recordBuffer = r.recordBuffer[:0]
+ r.fieldIndexes = r.fieldIndexes[:0]
+parseField:
+ for {
+ if r.TrimLeadingSpace {
+ line = bytes.TrimLeftFunc(line, unicode.IsSpace)
+ }
+ if len(line) == 0 || line[0] != '"' {
+ // Non-quoted string field
+ i := bytes.IndexRune(line, r.Comma)
+ field := line
+ if i >= 0 {
+ field = field[:i]
+ } else {
+ field = field[:len(field)-lengthNL(field)]
+ }
+ // Check to make sure a quote does not appear in field.
+ if !r.LazyQuotes {
+ if j := bytes.IndexByte(field, '"'); j >= 0 {
+ col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
+ err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
+ break parseField
+ }
+ }
+ r.recordBuffer = append(r.recordBuffer, field...)
+ r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ if i >= 0 {
+ line = line[i+commaLen:]
+ continue parseField
+ }
+ break parseField
+ } else {
+ // Quoted string field
+ line = line[quoteLen:]
+ for {
+ i := bytes.IndexByte(line, '"')
+ if i >= 0 {
+ // Hit next quote.
+ r.recordBuffer = append(r.recordBuffer, line[:i]...)
+ line = line[i+quoteLen:]
+ switch rn := nextRune(line); {
+ case rn == '"':
+ // `""` sequence (append quote).
+ r.recordBuffer = append(r.recordBuffer, '"')
+ line = line[quoteLen:]
+ case rn == r.Comma:
+ // `",` sequence (end of field).
+ line = line[commaLen:]
+ r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ continue parseField
+ case lengthNL(line) == len(line):
+ // `"\n` sequence (end of line).
+ r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ break parseField
+ case r.LazyQuotes:
+ // `"` sequence (bare quote).
+ r.recordBuffer = append(r.recordBuffer, '"')
+ default:
+ // `"*` sequence (invalid non-escaped quote).
+ col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])
+ err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
+ break parseField
+ }
+ } else if len(line) > 0 {
+ // Hit end of line (copy all data so far).
+ r.recordBuffer = append(r.recordBuffer, line...)
+ if errRead != nil {
+ break parseField
+ }
+ line, errRead = r.readLine()
+ if errRead == io.EOF {
+ errRead = nil
+ }
+ fullLine = line
+ } else {
+ // Abrupt end of file (EOF or error).
+ if !r.LazyQuotes && errRead == nil {
+ col := utf8.RuneCount(fullLine)
+ err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
+ break parseField
+ }
+ r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ break parseField
+ }
+ }
+ }
+ }
+ if err == nil {
+ err = errRead
+ }
+
+ // Create a single string and create slices out of it.
+ // This pins the memory of the fields together, but allocates once.
+ str := string(r.recordBuffer) // Convert to string once to batch allocations
+ dst = dst[:0]
+ if cap(dst) < len(r.fieldIndexes) {
+ dst = make([]string, len(r.fieldIndexes))
+ }
+ dst = dst[:len(r.fieldIndexes)]
+ var preIdx int
+ for i, idx := range r.fieldIndexes {
+ dst[i] = str[preIdx:idx]
+ preIdx = idx
+ }
+
+ // Check or update the expected fields per record.
if r.FieldsPerRecord > 0 {
- if len(record) != r.FieldsPerRecord {
- r.column = 0 // report at start of record
- return record, r.error(ErrFieldCount)
+ if len(dst) != r.FieldsPerRecord && err == nil {
+ err = &ParseError{StartLine: recLine, Line: recLine, Err: ErrFieldCount}
}
} else if r.FieldsPerRecord == 0 {
- r.FieldsPerRecord = len(record)
+ r.FieldsPerRecord = len(dst)
}
- return record, nil
-}
-
-// readRune reads one rune from r, folding \r\n to \n and keeping track
-// of how far into the line we have read. r.column will point to the start
-// of this rune, not the end of this rune.
-func (r *Reader) readRune() (rune, error) {
- r1, _, err := r.r.ReadRune()
-
- // Handle \r\n here. We make the simplifying assumption that
- // anytime \r is followed by \n that it can be folded to \n.
- // We will not detect files which contain both \r\n and bare \n.
- if r1 == '\r' {
- r1, _, err = r.r.ReadRune()
- if err == nil {
- if r1 != '\n' {
- r.r.UnreadRune()
- r1 = '\r'
- }
- }
- }
- r.column++
- return r1, err
-}
-
-// skip reads runes up to and including the rune delim or until error.
-func (r *Reader) skip(delim rune) error {
- for {
- r1, err := r.readRune()
- if err != nil {
- return err
- }
- if r1 == delim {
- return nil
- }
- }
-}
-
-// parseRecord reads and parses a single csv record from r.
-// If dst has enough capacity it will be used for the returned fields.
-func (r *Reader) parseRecord(dst []string) (fields []string, err error) {
- // Each record starts on a new line. We increment our line
- // number (lines start at 1, not 0) and set column to -1
- // so as we increment in readRune it points to the character we read.
- r.line++
- r.column = -1
-
- // Peek at the first rune. If it is an error we are done.
- // If we support comments and it is the comment character
- // then skip to the end of line.
-
- r1, _, err := r.r.ReadRune()
- if err != nil {
- return nil, err
- }
-
- if r.Comment != 0 && r1 == r.Comment {
- return nil, r.skip('\n')
- }
- r.r.UnreadRune()
-
- r.lineBuffer.Reset()
- r.fieldIndexes = r.fieldIndexes[:0]
-
- // At this point we have at least one field.
- for {
- idx := r.lineBuffer.Len()
-
- haveField, delim, err := r.parseField()
- if haveField {
- r.fieldIndexes = append(r.fieldIndexes, idx)
- }
-
- if delim == '\n' || err == io.EOF {
- if len(r.fieldIndexes) == 0 {
- return nil, err
- }
- break
- }
-
- if err != nil {
- return nil, err
- }
- }
-
- fieldCount := len(r.fieldIndexes)
- // Using this approach (creating a single string and taking slices of it)
- // means that a single reference to any of the fields will retain the whole
- // string. The risk of a nontrivial space leak caused by this is considered
- // minimal and a tradeoff for better performance through the combined
- // allocations.
- line := r.lineBuffer.String()
-
- if cap(dst) >= fieldCount {
- fields = dst[:fieldCount]
- } else {
- fields = make([]string, fieldCount)
- }
-
- for i, idx := range r.fieldIndexes {
- if i == fieldCount-1 {
- fields[i] = line[idx:]
- } else {
- fields[i] = line[idx:r.fieldIndexes[i+1]]
- }
- }
-
- return fields, nil
-}
-
-// parseField parses the next field in the record. The read field is
-// appended to r.lineBuffer. Delim is the first character not part of the field
-// (r.Comma or '\n').
-func (r *Reader) parseField() (haveField bool, delim rune, err error) {
- r1, err := r.readRune()
- for err == nil && r.TrimLeadingSpace && r1 != '\n' && unicode.IsSpace(r1) {
- r1, err = r.readRune()
- }
-
- if err == io.EOF && r.column != 0 {
- return true, 0, err
- }
- if err != nil {
- return false, 0, err
- }
-
- switch r1 {
- case r.Comma:
- // will check below
-
- case '\n':
- // We are a trailing empty field or a blank line
- if r.column == 0 {
- return false, r1, nil
- }
- return true, r1, nil
-
- case '"':
- // quoted field
- Quoted:
- for {
- r1, err = r.readRune()
- if err != nil {
- if err == io.EOF {
- if r.LazyQuotes {
- return true, 0, err
- }
- return false, 0, r.error(ErrQuote)
- }
- return false, 0, err
- }
- switch r1 {
- case '"':
- r1, err = r.readRune()
- if err != nil || r1 == r.Comma {
- break Quoted
- }
- if r1 == '\n' {
- return true, r1, nil
- }
- if r1 != '"' {
- if !r.LazyQuotes {
- r.column--
- return false, 0, r.error(ErrQuote)
- }
- // accept the bare quote
- r.lineBuffer.WriteRune('"')
- }
- case '\n':
- r.line++
- r.column = -1
- }
- r.lineBuffer.WriteRune(r1)
- }
-
- default:
- // unquoted field
- for {
- r.lineBuffer.WriteRune(r1)
- r1, err = r.readRune()
- if err != nil || r1 == r.Comma {
- break
- }
- if r1 == '\n' {
- return true, r1, nil
- }
- if !r.LazyQuotes && r1 == '"' {
- return false, 0, r.error(ErrBareQuote)
- }
- }
- }
-
- if err != nil {
- if err == io.EOF {
- return true, 0, err
- }
- return false, 0, err
- }
-
- return true, r1, nil
+ return dst, err
}
diff --git a/src/encoding/csv/reader_test.go b/src/encoding/csv/reader_test.go
index 5ab1b61..1fc69f9 100644
--- a/src/encoding/csv/reader_test.go
+++ b/src/encoding/csv/reader_test.go
@@ -9,45 +9,38 @@
"reflect"
"strings"
"testing"
+ "unicode/utf8"
)
-var readTests = []struct {
- Name string
- Input string
- Output [][]string
- UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
+func TestRead(t *testing.T) {
+ tests := []struct {
+ Name string
+ Input string
+ Output [][]string
+ Error error
- // These fields are copied into the Reader
- Comma rune
- Comment rune
- FieldsPerRecord int
- LazyQuotes bool
- TrailingComma bool
- TrimLeadingSpace bool
- ReuseRecord bool
-
- Error string
- Line int // Expected error line if != 0
- Column int // Expected error column if line != 0
-}{
- {
+ // These fields are copied into the Reader
+ Comma rune
+ Comment rune
+ UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
+ FieldsPerRecord int
+ LazyQuotes bool
+ TrimLeadingSpace bool
+ ReuseRecord bool
+ }{{
Name: "Simple",
Input: "a,b,c\n",
Output: [][]string{{"a", "b", "c"}},
- },
- {
+ }, {
Name: "CRLF",
Input: "a,b\r\nc,d\r\n",
Output: [][]string{{"a", "b"}, {"c", "d"}},
- },
- {
+ }, {
Name: "BareCR",
Input: "a,b\rc,d\r\n",
Output: [][]string{{"a", "b\rc", "d"}},
- },
- {
- Name: "RFC4180test",
- UseFieldsPerRecord: true,
+ }, {
+ Name: "RFC4180test",
Input: `#field1,field2,field3
"aaa","bb
b","ccc"
@@ -60,163 +53,139 @@
{"a,a", `b"bb`, "ccc"},
{"zzz", "yyy", "xxx"},
},
- },
- {
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0,
+ }, {
Name: "NoEOLTest",
Input: "a,b,c",
Output: [][]string{{"a", "b", "c"}},
- },
- {
+ }, {
Name: "Semicolon",
- Comma: ';',
Input: "a;b;c\n",
Output: [][]string{{"a", "b", "c"}},
- },
- {
+ Comma: ';',
+ }, {
Name: "MultiLine",
Input: `"two
line","one line","three
line
field"`,
Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
- },
- {
+ }, {
Name: "BlankLine",
Input: "a,b,c\n\nd,e,f\n\n",
Output: [][]string{
{"a", "b", "c"},
{"d", "e", "f"},
},
- },
- {
- Name: "BlankLineFieldCount",
- Input: "a,b,c\n\nd,e,f\n\n",
- UseFieldsPerRecord: true,
+ }, {
+ Name: "BlankLineFieldCount",
+ Input: "a,b,c\n\nd,e,f\n\n",
Output: [][]string{
{"a", "b", "c"},
{"d", "e", "f"},
},
- },
- {
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0,
+ }, {
Name: "TrimSpace",
Input: " a, b, c\n",
- TrimLeadingSpace: true,
Output: [][]string{{"a", "b", "c"}},
- },
- {
+ TrimLeadingSpace: true,
+ }, {
Name: "LeadingSpace",
Input: " a, b, c\n",
Output: [][]string{{" a", " b", " c"}},
- },
- {
+ }, {
Name: "Comment",
- Comment: '#',
Input: "#1,2,3\na,b,c\n#comment",
Output: [][]string{{"a", "b", "c"}},
- },
- {
+ Comment: '#',
+ }, {
Name: "NoComment",
Input: "#1,2,3\na,b,c",
Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
- },
- {
+ }, {
Name: "LazyQuotes",
- LazyQuotes: true,
Input: `a "word","1"2",a","b`,
Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
- },
- {
- Name: "BareQuotes",
LazyQuotes: true,
+ }, {
+ Name: "BareQuotes",
Input: `a "word","1"2",a"`,
Output: [][]string{{`a "word"`, `1"2`, `a"`}},
- },
- {
- Name: "BareDoubleQuotes",
LazyQuotes: true,
+ }, {
+ Name: "BareDoubleQuotes",
Input: `a""b,c`,
Output: [][]string{{`a""b`, `c`}},
- },
- {
+ LazyQuotes: true,
+ }, {
Name: "BadDoubleQuotes",
Input: `a""b,c`,
- Error: `bare " in non-quoted-field`, Line: 1, Column: 1,
- },
- {
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
+ }, {
Name: "TrimQuote",
Input: ` "a"," b",c`,
- TrimLeadingSpace: true,
Output: [][]string{{"a", " b", "c"}},
- },
- {
+ TrimLeadingSpace: true,
+ }, {
Name: "BadBareQuote",
Input: `a "word","b"`,
- Error: `bare " in non-quoted-field`, Line: 1, Column: 2,
- },
- {
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote},
+ }, {
Name: "BadTrailingQuote",
Input: `"a word",b"`,
- Error: `bare " in non-quoted-field`, Line: 1, Column: 10,
- },
- {
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote},
+ }, {
Name: "ExtraneousQuote",
Input: `"a "word","b"`,
- Error: `extraneous " in field`, Line: 1, Column: 3,
- },
- {
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote},
+ }, {
Name: "BadFieldCount",
- UseFieldsPerRecord: true,
Input: "a,b,c\nd,e",
- Error: "wrong number of fields", Line: 2,
- },
- {
+ Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount},
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0,
+ }, {
Name: "BadFieldCount1",
+ Input: `a,b,c`,
+ Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
UseFieldsPerRecord: true,
FieldsPerRecord: 2,
- Input: `a,b,c`,
- Error: "wrong number of fields", Line: 1,
- },
- {
+ }, {
Name: "FieldCount",
Input: "a,b,c\nd,e",
Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
- },
- {
+ }, {
Name: "TrailingCommaEOF",
Input: "a,b,c,",
Output: [][]string{{"a", "b", "c", ""}},
- },
- {
+ }, {
Name: "TrailingCommaEOL",
Input: "a,b,c,\n",
Output: [][]string{{"a", "b", "c", ""}},
- },
- {
+ }, {
Name: "TrailingCommaSpaceEOF",
- TrimLeadingSpace: true,
Input: "a,b,c, ",
Output: [][]string{{"a", "b", "c", ""}},
- },
- {
- Name: "TrailingCommaSpaceEOL",
TrimLeadingSpace: true,
+ }, {
+ Name: "TrailingCommaSpaceEOL",
Input: "a,b,c, \n",
Output: [][]string{{"a", "b", "c", ""}},
- },
- {
- Name: "TrailingCommaLine3",
TrimLeadingSpace: true,
+ }, {
+ Name: "TrailingCommaLine3",
Input: "a,b,c\nd,e,f\ng,hi,",
Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
- },
- {
+ TrimLeadingSpace: true,
+ }, {
Name: "NotTrailingComma3",
Input: "a,b,c, \n",
Output: [][]string{{"a", "b", "c", " "}},
- },
- {
- Name: "CommaFieldTest",
- TrailingComma: true,
+ }, {
+ Name: "CommaFieldTest",
Input: `x,y,z,w
x,y,z,
x,y,,
@@ -240,67 +209,201 @@
{"x", "", "", ""},
{"", "", "", ""},
},
- },
- {
- Name: "TrailingCommaIneffective1",
- TrailingComma: true,
- TrimLeadingSpace: true,
- Input: "a,b,\nc,d,e",
+ }, {
+ Name: "TrailingCommaIneffective1",
+ Input: "a,b,\nc,d,e",
Output: [][]string{
{"a", "b", ""},
{"c", "d", "e"},
},
- },
- {
- Name: "TrailingCommaIneffective2",
- TrailingComma: false,
TrimLeadingSpace: true,
- Input: "a,b,\nc,d,e",
- Output: [][]string{
- {"a", "b", ""},
- {"c", "d", "e"},
- },
- },
- {
- Name: "ReadAllReuseRecord",
- ReuseRecord: true,
- Input: "a,b\nc,d",
+ }, {
+ Name: "ReadAllReuseRecord",
+ Input: "a,b\nc,d",
Output: [][]string{
{"a", "b"},
{"c", "d"},
},
- },
-}
+ ReuseRecord: true,
+ }, {
+ Name: "StartLine1", // Issue 19019
+ Input: "a,\"b\nc\"d,e",
+ Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
+ }, {
+ Name: "StartLine2",
+ Input: "a,b\n\"d\n\n,e",
+ Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
+ }, {
+ Name: "CRLFInQuotedField", // Issue 21201
+ Input: "A,\"Hello\r\nHi\",B\r\n",
+ Output: [][]string{
+ {"A", "Hello\nHi", "B"},
+ },
+ }, {
+ Name: "BinaryBlobField", // Issue 19410
+ Input: "x09\x41\xb4\x1c,aktau",
+ Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
+ }, {
+ Name: "TrailingCR",
+ Input: "field1,field2\r",
+ Output: [][]string{{"field1", "field2"}},
+ }, {
+ Name: "QuotedTrailingCR",
+ Input: "\"field\"\r",
+ Output: [][]string{{"field"}},
+ }, {
+ Name: "QuotedTrailingCRCR",
+ Input: "\"field\"\r\r",
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
+ }, {
+ Name: "FieldCR",
+ Input: "field\rfield\r",
+ Output: [][]string{{"field\rfield"}},
+ }, {
+ Name: "FieldCRCR",
+ Input: "field\r\rfield\r\r",
+ Output: [][]string{{"field\r\rfield\r"}},
+ }, {
+ Name: "FieldCRCRLF",
+ Input: "field\r\r\nfield\r\r\n",
+ Output: [][]string{{"field\r"}, {"field\r"}},
+ }, {
+ Name: "FieldCRCRLFCR",
+ Input: "field\r\r\n\rfield\r\r\n\r",
+ Output: [][]string{{"field\r"}, {"\rfield\r"}},
+ }, {
+ Name: "FieldCRCRLFCRCR",
+ Input: "field\r\r\n\r\rfield\r\r\n\r\r",
+ Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
+ }, {
+ Name: "MultiFieldCRCRLFCRCR",
+ Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
+ Output: [][]string{
+ {"field1", "field2\r"},
+ {"\r\rfield1", "field2\r"},
+ {"\r\r", ""},
+ },
+ }, {
+ Name: "NonASCIICommaAndComment",
+ Input: "a£b,c£ \td,e\n€ comment\n",
+ Output: [][]string{{"a", "b,c", "d,e"}},
+ TrimLeadingSpace: true,
+ Comma: '£',
+ Comment: '€',
+ }, {
+ Name: "NonASCIICommaAndCommentWithQuotes",
+ Input: "a€\" b,\"€ c\nλ comment\n",
+ Output: [][]string{{"a", " b,", " c"}},
+ Comma: '€',
+ Comment: 'λ',
+ }, {
+ // λ and θ start with the same byte.
+ // This tests that the parser doesn't confuse such characters.
+ Name: "NonASCIICommaConfusion",
+ Input: "\"abθcd\"λefθgh",
+ Output: [][]string{{"abθcd", "efθgh"}},
+ Comma: 'λ',
+ Comment: '€',
+ }, {
+ Name: "NonASCIICommentConfusion",
+ Input: "λ\nλ\nθ\nλ\n",
+ Output: [][]string{{"λ"}, {"λ"}, {"λ"}},
+ Comment: 'θ',
+ }, {
+ Name: "QuotedFieldMultipleLF",
+ Input: "\"\n\n\n\n\"",
+ Output: [][]string{{"\n\n\n\n"}},
+ }, {
+ Name: "MultipleCRLF",
+ Input: "\r\n\r\n\r\n\r\n",
+ }, {
+ // The implementation may read each line in several chunks if it doesn't fit entirely
+ // in the read buffer, so we should test the code to handle that condition.
+ Name: "HugeLines",
+ Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
+ Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
+ Comment: '#',
+ }, {
+ Name: "QuoteWithTrailingCRLF",
+ Input: "\"foo\"bar\"\r\n",
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
+ }, {
+ Name: "LazyQuoteWithTrailingCRLF",
+ Input: "\"foo\"bar\"\r\n",
+ Output: [][]string{{`foo"bar`}},
+ LazyQuotes: true,
+ }, {
+ Name: "DoubleQuoteWithTrailingCRLF",
+ Input: "\"foo\"\"bar\"\r\n",
+ Output: [][]string{{`foo"bar`}},
+ }, {
+ Name: "EvenQuotes",
+ Input: `""""""""`,
+ Output: [][]string{{`"""`}},
+ }, {
+ Name: "OddQuotes",
+ Input: `"""""""`,
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote},
+ }, {
+ Name: "LazyOddQuotes",
+ Input: `"""""""`,
+ Output: [][]string{{`"""`}},
+ LazyQuotes: true,
+ }, {
+ Name: "BadComma1",
+ Comma: '\n',
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadComma2",
+ Comma: '\r',
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadComma3",
+ Comma: utf8.RuneError,
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadComment1",
+ Comment: '\n',
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadComment2",
+ Comment: '\r',
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadComment3",
+ Comment: utf8.RuneError,
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadCommaComment",
+ Comma: 'X',
+ Comment: 'X',
+ Error: errInvalidDelim,
+ }}
-func TestRead(t *testing.T) {
- for _, tt := range readTests {
- r := NewReader(strings.NewReader(tt.Input))
- r.Comment = tt.Comment
- if tt.UseFieldsPerRecord {
- r.FieldsPerRecord = tt.FieldsPerRecord
- } else {
- r.FieldsPerRecord = -1
- }
- r.LazyQuotes = tt.LazyQuotes
- r.TrailingComma = tt.TrailingComma
- r.TrimLeadingSpace = tt.TrimLeadingSpace
- r.ReuseRecord = tt.ReuseRecord
- if tt.Comma != 0 {
- r.Comma = tt.Comma
- }
- out, err := r.ReadAll()
- perr, _ := err.(*ParseError)
- if tt.Error != "" {
- if err == nil || !strings.Contains(err.Error(), tt.Error) {
- t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
- } else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) {
- t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column)
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ r := NewReader(strings.NewReader(tt.Input))
+
+ if tt.Comma != 0 {
+ r.Comma = tt.Comma
}
- } else if err != nil {
- t.Errorf("%s: unexpected error %v", tt.Name, err)
- } else if !reflect.DeepEqual(out, tt.Output) {
- t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output)
- }
+ r.Comment = tt.Comment
+ if tt.UseFieldsPerRecord {
+ r.FieldsPerRecord = tt.FieldsPerRecord
+ } else {
+ r.FieldsPerRecord = -1
+ }
+ r.LazyQuotes = tt.LazyQuotes
+ r.TrimLeadingSpace = tt.TrimLeadingSpace
+ r.ReuseRecord = tt.ReuseRecord
+
+ out, err := r.ReadAll()
+ if !reflect.DeepEqual(err, tt.Error) {
+ t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error)
+ } else if !reflect.DeepEqual(out, tt.Output) {
+ t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output)
+ }
+ })
}
}
diff --git a/src/encoding/csv/writer.go b/src/encoding/csv/writer.go
index 84b7aa1..ef3594e 100644
--- a/src/encoding/csv/writer.go
+++ b/src/encoding/csv/writer.go
@@ -20,7 +20,7 @@
//
// Comma is the field delimiter.
//
-// If UseCRLF is true, the Writer ends each record with \r\n instead of \n.
+// If UseCRLF is true, the Writer ends each output line with \r\n instead of \n.
type Writer struct {
Comma rune // Field delimiter (set to ',' by NewWriter)
UseCRLF bool // True to use \r\n as the line terminator
@@ -38,6 +38,10 @@
// Writer writes a single CSV record to w along with any necessary quoting.
// A record is a slice of strings with each string being one field.
func (w *Writer) Write(record []string) error {
+ if !validDelim(w.Comma) {
+ return errInvalidDelim
+ }
+
for n, field := range record {
if n > 0 {
if _, err := w.w.WriteRune(w.Comma); err != nil {
diff --git a/src/encoding/gob/codec_test.go b/src/encoding/gob/codec_test.go
index eb9f306..8f7b6f3 100644
--- a/src/encoding/gob/codec_test.go
+++ b/src/encoding/gob/codec_test.go
@@ -1321,6 +1321,7 @@
var singletons = []interface{}{
true,
7,
+ uint(10),
3.2,
"hello",
[3]int{11, 22, 33},
diff --git a/src/encoding/gob/debug.go b/src/encoding/gob/debug.go
index d69d36f..8f93742 100644
--- a/src/encoding/gob/debug.go
+++ b/src/encoding/gob/debug.go
@@ -594,7 +594,7 @@
x := deb.int64()
fmt.Fprintf(os.Stderr, "%s%d\n", indent, x)
case tUint:
- x := deb.int64()
+ x := deb.uint64()
fmt.Fprintf(os.Stderr, "%s%d\n", indent, x)
case tFloat:
x := deb.uint64()
diff --git a/src/encoding/gob/decode.go b/src/encoding/gob/decode.go
index 8dece42..2da913f 100644
--- a/src/encoding/gob/decode.go
+++ b/src/encoding/gob/decode.go
@@ -1038,6 +1038,8 @@
// typeString returns a human-readable description of the type identified by remoteId.
func (dec *Decoder) typeString(remoteId typeId) string {
+ typeLock.Lock()
+ defer typeLock.Unlock()
if t := idToType[remoteId]; t != nil {
// globally known type.
return t.string()
diff --git a/src/encoding/gob/decoder.go b/src/encoding/gob/decoder.go
index 8e0b1dd..5ef0388 100644
--- a/src/encoding/gob/decoder.go
+++ b/src/encoding/gob/decoder.go
@@ -55,7 +55,7 @@
// recvType loads the definition of a type.
func (dec *Decoder) recvType(id typeId) {
- // Have we already seen this type? That's an error
+ // Have we already seen this type? That's an error
if id < firstUserId || dec.wireType[id] != nil {
dec.err = errors.New("gob: duplicate type received")
return
@@ -99,10 +99,8 @@
// Read the data
dec.buf.Size(nbytes)
_, dec.err = io.ReadFull(dec.r, dec.buf.Bytes())
- if dec.err != nil {
- if dec.err == io.EOF {
- dec.err = io.ErrUnexpectedEOF
- }
+ if dec.err == io.EOF {
+ dec.err = io.ErrUnexpectedEOF
}
}
diff --git a/src/encoding/gob/doc.go b/src/encoding/gob/doc.go
index db734ec..fa53431 100644
--- a/src/encoding/gob/doc.go
+++ b/src/encoding/gob/doc.go
@@ -381,7 +381,7 @@
07 // this value is 7 bytes long
ff 82 // the type number, 65 (1 byte (-FF) followed by 65<<1)
01 // add one to field number, yielding field 0
- 2c // encoding of signed "22" (0x22 = 44 = 22<<1); Point.x = 22
+ 2c // encoding of signed "22" (0x2c = 44 = 22<<1); Point.x = 22
01 // add one to field number, yielding field 1
42 // encoding of signed "33" (0x42 = 66 = 33<<1); Point.y = 33
00 // end of structure
diff --git a/src/encoding/gob/type_test.go b/src/encoding/gob/type_test.go
index 14f25d8..934270e 100644
--- a/src/encoding/gob/type_test.go
+++ b/src/encoding/gob/type_test.go
@@ -7,6 +7,7 @@
import (
"bytes"
"reflect"
+ "sync"
"testing"
)
@@ -218,3 +219,44 @@
<-c
}
}
+
+// Issue 23328. Note that this test name is known to cmd/dist/test.go.
+func TestTypeRace(t *testing.T) {
+ c := make(chan bool)
+ var wg sync.WaitGroup
+ for i := 0; i < 2; i++ {
+ wg.Add(1)
+ go func(i int) {
+ defer wg.Done()
+ var buf bytes.Buffer
+ enc := NewEncoder(&buf)
+ dec := NewDecoder(&buf)
+ var x interface{}
+ switch i {
+ case 0:
+ x = &N1{}
+ case 1:
+ x = &N2{}
+ default:
+ t.Errorf("bad i %d", i)
+ return
+ }
+ m := make(map[string]string)
+ <-c
+ if err := enc.Encode(x); err != nil {
+ t.Error(err)
+ return
+ }
+ if err := enc.Encode(x); err != nil {
+ t.Error(err)
+ return
+ }
+ if err := dec.Decode(&m); err == nil {
+ t.Error("decode unexpectedly succeeded")
+ return
+ }
+ }(i)
+ }
+ close(c)
+ wg.Wait()
+}
diff --git a/src/encoding/hex/hex.go b/src/encoding/hex/hex.go
index 2768f1b..e4df6cb 100644
--- a/src/encoding/hex/hex.go
+++ b/src/encoding/hex/hex.go
@@ -31,7 +31,9 @@
return len(src) * 2
}
-// ErrLength results from decoding an odd length slice.
+// ErrLength reports an attempt to decode an odd-length input
+// using Decode or DecodeString.
+// The stream-based Decoder returns io.ErrUnexpectedEOF instead of ErrLength.
var ErrLength = errors.New("encoding/hex: odd length hex string")
// InvalidByteError values describe errors resulting from an invalid byte in a hex string.
@@ -50,24 +52,30 @@
//
// Decode expects that src contain only hexadecimal
// characters and that src should have an even length.
+// If the input is malformed, Decode returns the number
+// of bytes decoded before the error.
func Decode(dst, src []byte) (int, error) {
- if len(src)%2 == 1 {
- return 0, ErrLength
- }
-
- for i := 0; i < len(src)/2; i++ {
+ var i int
+ for i = 0; i < len(src)/2; i++ {
a, ok := fromHexChar(src[i*2])
if !ok {
- return 0, InvalidByteError(src[i*2])
+ return i, InvalidByteError(src[i*2])
}
b, ok := fromHexChar(src[i*2+1])
if !ok {
- return 0, InvalidByteError(src[i*2+1])
+ return i, InvalidByteError(src[i*2+1])
}
dst[i] = (a << 4) | b
}
-
- return len(src) / 2, nil
+ if len(src)%2 == 1 {
+ // Check for invalid char before reporting bad length,
+ // since the invalid char (if present) is an earlier problem.
+ if _, ok := fromHexChar(src[i*2]); !ok {
+ return i, InvalidByteError(src[i*2])
+ }
+ return i, ErrLength
+ }
+ return i, nil
}
// fromHexChar converts a hex character into its value and a success flag.
@@ -92,14 +100,17 @@
}
// DecodeString returns the bytes represented by the hexadecimal string s.
+//
+// DecodeString expects that src contain only hexadecimal
+// characters and that src should have an even length.
+// If the input is malformed, DecodeString returns a string
+// containing the bytes decoded before the error.
func DecodeString(s string) ([]byte, error) {
src := []byte(s)
- dst := make([]byte, DecodedLen(len(src)))
- _, err := Decode(dst, src)
- if err != nil {
- return nil, err
- }
- return dst, nil
+ // We can use the source slice itself as the destination
+ // because the decode loop increments by one and then the 'seen' byte is not used anymore.
+ n, err := Decode(src, src)
+ return src[:n], err
}
// Dump returns a string that contains a hex dump of the given data. The format
@@ -112,6 +123,81 @@
return buf.String()
}
+// bufferSize is the number of hexadecimal characters to buffer in encoder and decoder.
+const bufferSize = 1024
+
+type encoder struct {
+ w io.Writer
+ err error
+ out [bufferSize]byte // output buffer
+}
+
+// NewEncoder returns an io.Writer that writes lowercase hexadecimal characters to w.
+func NewEncoder(w io.Writer) io.Writer {
+ return &encoder{w: w}
+}
+
+func (e *encoder) Write(p []byte) (n int, err error) {
+ for len(p) > 0 && e.err == nil {
+ chunkSize := bufferSize / 2
+ if len(p) < chunkSize {
+ chunkSize = len(p)
+ }
+
+ var written int
+ encoded := Encode(e.out[:], p[:chunkSize])
+ written, e.err = e.w.Write(e.out[:encoded])
+ n += written / 2
+ p = p[chunkSize:]
+ }
+ return n, e.err
+}
+
+type decoder struct {
+ r io.Reader
+ err error
+ in []byte // input buffer (encoded form)
+ arr [bufferSize]byte // backing array for in
+}
+
+// NewDecoder returns an io.Reader that decodes hexadecimal characters from r.
+// NewDecoder expects that r contain only an even number of hexadecimal characters.
+func NewDecoder(r io.Reader) io.Reader {
+ return &decoder{r: r}
+}
+
+func (d *decoder) Read(p []byte) (n int, err error) {
+ // Fill internal buffer with sufficient bytes to decode
+ if len(d.in) < 2 && d.err == nil {
+ var numCopy, numRead int
+ numCopy = copy(d.arr[:], d.in) // Copies either 0 or 1 bytes
+ numRead, d.err = d.r.Read(d.arr[numCopy:])
+ d.in = d.arr[:numCopy+numRead]
+ if d.err == io.EOF && len(d.in)%2 != 0 {
+ if _, ok := fromHexChar(d.in[len(d.in)-1]); !ok {
+ d.err = InvalidByteError(d.in[len(d.in)-1])
+ } else {
+ d.err = io.ErrUnexpectedEOF
+ }
+ }
+ }
+
+ // Decode internal buffer into output buffer
+ if numAvail := len(d.in) / 2; len(p) > numAvail {
+ p = p[:numAvail]
+ }
+ numDec, err := Decode(p, d.in[:len(p)*2])
+ d.in = d.in[2*numDec:]
+ if err != nil {
+ d.in, d.err = nil, err // Decode error; discard input remainder
+ }
+
+ if len(d.in) < 2 {
+ return numDec, d.err // Only expose errors when buffer fully consumed
+ }
+ return numDec, nil
+}
+
// Dumper returns a WriteCloser that writes a hex dump of all written data to
// w. The format of the dump matches the output of `hexdump -C` on the command
// line.
diff --git a/src/encoding/hex/hex_test.go b/src/encoding/hex/hex_test.go
index 64dabbd..b6bab21 100644
--- a/src/encoding/hex/hex_test.go
+++ b/src/encoding/hex/hex_test.go
@@ -7,6 +7,9 @@
import (
"bytes"
"fmt"
+ "io"
+ "io/ioutil"
+ "strings"
"testing"
)
@@ -75,37 +78,86 @@
}
}
-type errTest struct {
+var errTests = []struct {
in string
- err string
+ out string
+ err error
+}{
+ {"", "", nil},
+ {"0", "", ErrLength},
+ {"zd4aa", "", InvalidByteError('z')},
+ {"d4aaz", "\xd4\xaa", InvalidByteError('z')},
+ {"30313", "01", ErrLength},
+ {"0g", "", InvalidByteError('g')},
+ {"00gg", "\x00", InvalidByteError('g')},
+ {"0\x01", "", InvalidByteError('\x01')},
+ {"ffeed", "\xff\xee", ErrLength},
}
-var errTests = []errTest{
- {"0", "encoding/hex: odd length hex string"},
- {"0g", "encoding/hex: invalid byte: U+0067 'g'"},
- {"00gg", "encoding/hex: invalid byte: U+0067 'g'"},
- {"0\x01", "encoding/hex: invalid byte: U+0001"},
-}
-
-func TestInvalidErr(t *testing.T) {
- for i, test := range errTests {
- dst := make([]byte, DecodedLen(len(test.in)))
- _, err := Decode(dst, []byte(test.in))
- if err == nil {
- t.Errorf("#%d: expected error; got none", i)
- } else if err.Error() != test.err {
- t.Errorf("#%d: got: %v want: %v", i, err, test.err)
+func TestDecodeErr(t *testing.T) {
+ for _, tt := range errTests {
+ out := make([]byte, len(tt.in)+10)
+ n, err := Decode(out, []byte(tt.in))
+ if string(out[:n]) != tt.out || err != tt.err {
+ t.Errorf("Decode(%q) = %q, %v, want %q, %v", tt.in, string(out[:n]), err, tt.out, tt.err)
}
}
}
-func TestInvalidStringErr(t *testing.T) {
- for i, test := range errTests {
- _, err := DecodeString(test.in)
- if err == nil {
- t.Errorf("#%d: expected error; got none", i)
- } else if err.Error() != test.err {
- t.Errorf("#%d: got: %v want: %v", i, err, test.err)
+func TestDecodeStringErr(t *testing.T) {
+ for _, tt := range errTests {
+ out, err := DecodeString(tt.in)
+ if string(out) != tt.out || err != tt.err {
+ t.Errorf("DecodeString(%q) = %q, %v, want %q, %v", tt.in, out, err, tt.out, tt.err)
+ }
+ }
+}
+
+func TestEncoderDecoder(t *testing.T) {
+ for _, multiplier := range []int{1, 128, 192} {
+ for _, test := range encDecTests {
+ input := bytes.Repeat(test.dec, multiplier)
+ output := strings.Repeat(test.enc, multiplier)
+
+ var buf bytes.Buffer
+ enc := NewEncoder(&buf)
+ r := struct{ io.Reader }{bytes.NewReader(input)} // io.Reader only; not io.WriterTo
+ if n, err := io.CopyBuffer(enc, r, make([]byte, 7)); n != int64(len(input)) || err != nil {
+ t.Errorf("encoder.Write(%q*%d) = (%d, %v), want (%d, nil)", test.dec, multiplier, n, err, len(input))
+ continue
+ }
+
+ if encDst := buf.String(); encDst != output {
+ t.Errorf("buf(%q*%d) = %v, want %v", test.dec, multiplier, encDst, output)
+ continue
+ }
+
+ dec := NewDecoder(&buf)
+ var decBuf bytes.Buffer
+ w := struct{ io.Writer }{&decBuf} // io.Writer only; not io.ReaderFrom
+ if _, err := io.CopyBuffer(w, dec, make([]byte, 7)); err != nil || decBuf.Len() != len(input) {
+ t.Errorf("decoder.Read(%q*%d) = (%d, %v), want (%d, nil)", test.enc, multiplier, decBuf.Len(), err, len(input))
+ }
+
+ if !bytes.Equal(decBuf.Bytes(), input) {
+ t.Errorf("decBuf(%q*%d) = %v, want %v", test.dec, multiplier, decBuf.Bytes(), input)
+ continue
+ }
+ }
+ }
+}
+
+func TestDecoderErr(t *testing.T) {
+ for _, tt := range errTests {
+ dec := NewDecoder(strings.NewReader(tt.in))
+ out, err := ioutil.ReadAll(dec)
+ wantErr := tt.err
+ // Decoder is reading from stream, so it reports io.ErrUnexpectedEOF instead of ErrLength.
+ if wantErr == ErrLength {
+ wantErr = io.ErrUnexpectedEOF
+ }
+ if string(out) != tt.out || err != wantErr {
+ t.Errorf("NewDecoder(%q) = %q, %v, want %q, %v", tt.in, out, err, tt.out, wantErr)
}
}
}
diff --git a/src/encoding/json/bench_test.go b/src/encoding/json/bench_test.go
index 85d7ae0..42439eb 100644
--- a/src/encoding/json/bench_test.go
+++ b/src/encoding/json/bench_test.go
@@ -133,6 +133,21 @@
b.SetBytes(int64(len(codeJSON)))
}
+func BenchmarkUnicodeDecoder(b *testing.B) {
+ j := []byte(`"\uD83D\uDE01"`)
+ b.SetBytes(int64(len(j)))
+ r := bytes.NewReader(j)
+ dec := NewDecoder(r)
+ var out string
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ if err := dec.Decode(&out); err != nil {
+ b.Fatal("Decode:", err)
+ }
+ r.Seek(0, 0)
+ }
+}
+
func BenchmarkDecoderStream(b *testing.B) {
b.StopTimer()
var buf bytes.Buffer
diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go
index 420a07e..536f25d 100644
--- a/src/encoding/json/decode.go
+++ b/src/encoding/json/decode.go
@@ -44,8 +44,9 @@
//
// To unmarshal JSON into a struct, Unmarshal matches incoming object
// keys to the keys used by Marshal (either the struct field name or its tag),
-// preferring an exact match but also accepting a case-insensitive match.
-// Unmarshal will only set exported fields of the struct.
+// preferring an exact match but also accepting a case-insensitive match. By
+// default, object keys which don't have a corresponding struct field are
+// ignored (see Decoder.DisallowUnknownFields for an alternative).
//
// To unmarshal JSON into an interface value,
// Unmarshal stores one of these in the interface value:
@@ -138,7 +139,8 @@
// An UnmarshalFieldError describes a JSON object key that
// led to an unexported (and therefore unwritable) struct field.
-// (No longer used; kept for compatibility.)
+//
+// Deprecated: No longer used; kept for compatibility.
type UnmarshalFieldError struct {
Key string
Type reflect.Type
@@ -274,8 +276,9 @@
Struct string
Field string
}
- savedError error
- useNumber bool
+ savedError error
+ useNumber bool
+ disallowUnknownFields bool
}
// errPhase is used for errors that should not happen unless
@@ -508,7 +511,7 @@
switch v.Kind() {
case reflect.Interface:
if v.NumMethod() == 0 {
- // Decoding into nil interface? Switch to non-reflect code.
+ // Decoding into nil interface? Switch to non-reflect code.
v.Set(reflect.ValueOf(d.arrayInterface()))
return
}
@@ -612,7 +615,7 @@
}
v = pv
- // Decoding into nil interface? Switch to non-reflect code.
+ // Decoding into nil interface? Switch to non-reflect code.
if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
v.Set(reflect.ValueOf(d.objectInterface()))
return
@@ -704,6 +707,19 @@
for _, i := range f.index {
if subv.Kind() == reflect.Ptr {
if subv.IsNil() {
+ // If a struct embeds a pointer to an unexported type,
+ // it is not possible to set a newly allocated value
+ // since the field is unexported.
+ //
+ // See https://golang.org/issue/21357
+ if !subv.CanSet() {
+ d.saveError(fmt.Errorf("json: cannot set embedded pointer to unexported struct: %v", subv.Type().Elem()))
+ // Invalidate subv to ensure d.value(subv) skips over
+ // the JSON value without assigning it to subv.
+ subv = reflect.Value{}
+ destring = false
+ break
+ }
subv.Set(reflect.New(subv.Type().Elem()))
}
subv = subv.Elem()
@@ -712,6 +728,8 @@
}
d.errorContext.Field = f.name
d.errorContext.Struct = v.Type().Name()
+ } else if d.disallowUnknownFields {
+ d.saveError(fmt.Errorf("json: unknown field %q", key))
}
}
@@ -1143,11 +1161,21 @@
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
return -1
}
- r, err := strconv.ParseUint(string(s[2:6]), 16, 64)
- if err != nil {
- return -1
+ var r rune
+ for _, c := range s[2:6] {
+ switch {
+ case '0' <= c && c <= '9':
+ c = c - '0'
+ case 'a' <= c && c <= 'f':
+ c = c - 'a' + 10
+ case 'A' <= c && c <= 'F':
+ c = c - 'A' + 10
+ default:
+ return -1
+ }
+ r = r*16 + rune(c)
}
- return rune(r)
+ return r
}
// unquote converts a quoted JSON string literal s into an actual string t.
@@ -1190,7 +1218,7 @@
b := make([]byte, len(s)+2*utf8.UTFMax)
w := copy(b, s[0:r])
for r < len(s) {
- // Out of room? Can only happen if s is full of
+ // Out of room? Can only happen if s is full of
// malformed UTF-8 and we're replacing each
// byte with RuneError.
if w >= len(b)-2*utf8.UTFMax {
diff --git a/src/encoding/json/decode_test.go b/src/encoding/json/decode_test.go
index bd38ddd..34b7ec6 100644
--- a/src/encoding/json/decode_test.go
+++ b/src/encoding/json/decode_test.go
@@ -88,7 +88,7 @@
}
func (u *unmarshalerText) UnmarshalText(b []byte) error {
- pos := bytes.Index(b, []byte(":"))
+ pos := bytes.IndexByte(b, ':')
if pos == -1 {
return errors.New("missing separator")
}
@@ -372,12 +372,13 @@
}
type unmarshalTest struct {
- in string
- ptr interface{}
- out interface{}
- err error
- useNumber bool
- golden bool
+ in string
+ ptr interface{}
+ out interface{}
+ err error
+ useNumber bool
+ golden bool
+ disallowUnknownFields bool
}
type B struct {
@@ -401,6 +402,7 @@
{in: "null", ptr: new(interface{}), out: nil},
{in: `{"X": [1,2,3], "Y": 4}`, ptr: new(T), out: T{Y: 4}, err: &UnmarshalTypeError{"array", reflect.TypeOf(""), 7, "T", "X"}},
{in: `{"x": 1}`, ptr: new(tx), out: tx{}},
+ {in: `{"x": 1}`, ptr: new(tx), err: fmt.Errorf("json: unknown field \"x\""), disallowUnknownFields: true},
{in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: float64(1), F2: int32(2), F3: Number("3")}},
{in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: Number("1"), F2: int32(2), F3: Number("3")}, useNumber: true},
{in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(interface{}), out: ifaceNumAsFloat64},
@@ -415,10 +417,13 @@
// Z has a "-" tag.
{in: `{"Y": 1, "Z": 2}`, ptr: new(T), out: T{Y: 1}},
+ {in: `{"Y": 1, "Z": 2}`, ptr: new(T), err: fmt.Errorf("json: unknown field \"Z\""), disallowUnknownFields: true},
{in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), out: U{Alphabet: "abc"}},
+ {in: `{"alpha": "abc", "alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true},
{in: `{"alpha": "abc"}`, ptr: new(U), out: U{Alphabet: "abc"}},
{in: `{"alphabet": "xyz"}`, ptr: new(U), out: U{}},
+ {in: `{"alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true},
// syntax errors
{in: `{"X": "foo", "Y"}`, err: &SyntaxError{"invalid character '}' after object key", 17}},
@@ -611,9 +616,21 @@
},
{
in: `{"X": 1,"Y":2}`,
+ ptr: new(S5),
+ err: fmt.Errorf("json: unknown field \"X\""),
+ disallowUnknownFields: true,
+ },
+ {
+ in: `{"X": 1,"Y":2}`,
ptr: new(S10),
out: S10{S13: S13{S8: S8{S9: S9{Y: 2}}}},
},
+ {
+ in: `{"X": 1,"Y":2}`,
+ ptr: new(S10),
+ err: fmt.Errorf("json: unknown field \"X\""),
+ disallowUnknownFields: true,
+ },
// invalid UTF-8 is coerced to valid UTF-8.
{
@@ -793,6 +810,62 @@
{in: `{"B": "False"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "False" into bool`)},
{in: `{"B": "null"}`, ptr: new(B), out: B{false}},
{in: `{"B": "nul"}`, ptr: new(B), err: errors.New(`json: invalid use of ,string struct tag, trying to unmarshal "nul" into bool`)},
+
+ // additional tests for disallowUnknownFields
+ {
+ in: `{
+ "Level0": 1,
+ "Level1b": 2,
+ "Level1c": 3,
+ "x": 4,
+ "Level1a": 5,
+ "LEVEL1B": 6,
+ "e": {
+ "Level1a": 8,
+ "Level1b": 9,
+ "Level1c": 10,
+ "Level1d": 11,
+ "x": 12
+ },
+ "Loop1": 13,
+ "Loop2": 14,
+ "X": 15,
+ "Y": 16,
+ "Z": 17,
+ "Q": 18,
+ "extra": true
+ }`,
+ ptr: new(Top),
+ err: fmt.Errorf("json: unknown field \"extra\""),
+ disallowUnknownFields: true,
+ },
+ {
+ in: `{
+ "Level0": 1,
+ "Level1b": 2,
+ "Level1c": 3,
+ "x": 4,
+ "Level1a": 5,
+ "LEVEL1B": 6,
+ "e": {
+ "Level1a": 8,
+ "Level1b": 9,
+ "Level1c": 10,
+ "Level1d": 11,
+ "x": 12,
+ "extra": null
+ },
+ "Loop1": 13,
+ "Loop2": 14,
+ "X": 15,
+ "Y": 16,
+ "Z": 17,
+ "Q": 18
+ }`,
+ ptr: new(Top),
+ err: fmt.Errorf("json: unknown field \"extra\""),
+ disallowUnknownFields: true,
+ },
}
func TestMarshal(t *testing.T) {
@@ -911,6 +984,9 @@
if tt.useNumber {
dec.UseNumber()
}
+ if tt.disallowUnknownFields {
+ dec.DisallowUnknownFields()
+ }
if err := dec.Decode(v.Interface()); !reflect.DeepEqual(err, tt.err) {
t.Errorf("#%d: %v, want %v", i, err, tt.err)
continue
@@ -1117,7 +1193,8 @@
Foo string `json:"bar"`
Foo2 string `json:"bar2,dummyopt"`
- IntStr int64 `json:",string"`
+ IntStr int64 `json:",string"`
+ UintptrStr uintptr `json:",string"`
PBool *bool
PInt *int
@@ -1171,24 +1248,25 @@
}
var allValue = All{
- Bool: true,
- Int: 2,
- Int8: 3,
- Int16: 4,
- Int32: 5,
- Int64: 6,
- Uint: 7,
- Uint8: 8,
- Uint16: 9,
- Uint32: 10,
- Uint64: 11,
- Uintptr: 12,
- Float32: 14.1,
- Float64: 15.1,
- Foo: "foo",
- Foo2: "foo2",
- IntStr: 42,
- String: "16",
+ Bool: true,
+ Int: 2,
+ Int8: 3,
+ Int16: 4,
+ Int32: 5,
+ Int64: 6,
+ Uint: 7,
+ Uint8: 8,
+ Uint16: 9,
+ Uint32: 10,
+ Uint64: 11,
+ Uintptr: 12,
+ Float32: 14.1,
+ Float64: 15.1,
+ Foo: "foo",
+ Foo2: "foo2",
+ IntStr: 42,
+ UintptrStr: 44,
+ String: "16",
Map: map[string]Small{
"17": {Tag: "tag17"},
"18": {Tag: "tag18"},
@@ -1250,6 +1328,7 @@
"bar": "foo",
"bar2": "foo2",
"IntStr": "42",
+ "UintptrStr": "44",
"PBool": null,
"PInt": null,
"PInt8": null,
@@ -1342,6 +1421,7 @@
"bar": "",
"bar2": "",
"IntStr": "0",
+ "UintptrStr": "0",
"PBool": true,
"PInt": 2,
"PInt8": 3,
@@ -2008,3 +2088,81 @@
t.Fatalf("Unmarshal: %v", err)
}
}
+
+// Test unmarshal behavior with regards to embedded pointers to unexported structs.
+// If unallocated, this returns an error because unmarshal cannot set the field.
+// Issue 21357.
+func TestUnmarshalEmbeddedPointerUnexported(t *testing.T) {
+ type (
+ embed1 struct{ Q int }
+ embed2 struct{ Q int }
+ embed3 struct {
+ Q int64 `json:",string"`
+ }
+ S1 struct {
+ *embed1
+ R int
+ }
+ S2 struct {
+ *embed1
+ Q int
+ }
+ S3 struct {
+ embed1
+ R int
+ }
+ S4 struct {
+ *embed1
+ embed2
+ }
+ S5 struct {
+ *embed3
+ R int
+ }
+ )
+
+ tests := []struct {
+ in string
+ ptr interface{}
+ out interface{}
+ err error
+ }{{
+ // Error since we cannot set S1.embed1, but still able to set S1.R.
+ in: `{"R":2,"Q":1}`,
+ ptr: new(S1),
+ out: &S1{R: 2},
+ err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed1"),
+ }, {
+ // The top level Q field takes precedence.
+ in: `{"Q":1}`,
+ ptr: new(S2),
+ out: &S2{Q: 1},
+ }, {
+ // No issue with non-pointer variant.
+ in: `{"R":2,"Q":1}`,
+ ptr: new(S3),
+ out: &S3{embed1: embed1{Q: 1}, R: 2},
+ }, {
+ // No error since both embedded structs have field R, which annihilate each other.
+ // Thus, no attempt is made at setting S4.embed1.
+ in: `{"R":2}`,
+ ptr: new(S4),
+ out: new(S4),
+ }, {
+ // Error since we cannot set S5.embed1, but still able to set S5.R.
+ in: `{"R":2,"Q":1}`,
+ ptr: new(S5),
+ out: &S5{R: 2},
+ err: fmt.Errorf("json: cannot set embedded pointer to unexported struct: json.embed3"),
+ }}
+
+ for i, tt := range tests {
+ err := Unmarshal([]byte(tt.in), tt.ptr)
+ if !reflect.DeepEqual(err, tt.err) {
+ t.Errorf("#%d: %v, want %v", i, err, tt.err)
+ }
+ if !reflect.DeepEqual(tt.ptr, tt.out) {
+ t.Errorf("#%d: mismatch\ngot: %#+v\nwant: %#+v", i, tt.ptr, tt.out)
+ }
+ }
+}
diff --git a/src/encoding/json/encode.go b/src/encoding/json/encode.go
index 0371f0a..1e45e44 100644
--- a/src/encoding/json/encode.go
+++ b/src/encoding/json/encode.go
@@ -3,7 +3,7 @@
// license that can be found in the LICENSE file.
// Package json implements encoding and decoding of JSON as defined in
-// RFC 4627. The mapping between JSON and Go values is described
+// RFC 7159. The mapping between JSON and Go values is described
// in the documentation for the Marshal and Unmarshal functions.
//
// See "JSON and Go" for an introduction to this package:
@@ -166,6 +166,8 @@
}
// MarshalIndent is like Marshal but applies Indent to format the output.
+// Each JSON element in the output will begin on a new line beginning with prefix
+// followed by one or more copies of indent according to the indentation nesting.
func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) {
b, err := Marshal(v)
if err != nil {
@@ -243,8 +245,8 @@
// attempting to encode a string value with invalid UTF-8 sequences.
// As of Go 1.2, Marshal instead coerces the string to valid UTF-8 by
// replacing invalid bytes with the Unicode replacement rune U+FFFD.
-// This error is no longer generated but is kept for backwards compatibility
-// with programs that might mention it.
+//
+// Deprecated: No longer used; kept for compatibility.
type InvalidUTF8Error struct {
S string // the whole string value that caused the error
}
@@ -871,8 +873,7 @@
}
// NOTE: keep in sync with stringBytes below.
-func (e *encodeState) string(s string, escapeHTML bool) int {
- len0 := e.Len()
+func (e *encodeState) string(s string, escapeHTML bool) {
e.WriteByte('"')
start := 0
for i := 0; i < len(s); {
@@ -944,12 +945,10 @@
e.WriteString(s[start:])
}
e.WriteByte('"')
- return e.Len() - len0
}
// NOTE: keep in sync with string above.
-func (e *encodeState) stringBytes(s []byte, escapeHTML bool) int {
- len0 := e.Len()
+func (e *encodeState) stringBytes(s []byte, escapeHTML bool) {
e.WriteByte('"')
start := 0
for i := 0; i < len(s); {
@@ -1021,7 +1020,6 @@
e.Write(s[start:])
}
e.WriteByte('"')
- return e.Len() - len0
}
// A field represents a single field found in a struct.
@@ -1093,21 +1091,19 @@
// Scan f.typ for fields to include.
for i := 0; i < f.typ.NumField(); i++ {
sf := f.typ.Field(i)
+ isUnexported := sf.PkgPath != ""
if sf.Anonymous {
t := sf.Type
if t.Kind() == reflect.Ptr {
t = t.Elem()
}
- // If embedded, StructField.PkgPath is not a reliable
- // indicator of whether the field is exported.
- // See https://golang.org/issue/21122
- if !isExported(t.Name()) && t.Kind() != reflect.Struct {
+ if isUnexported && t.Kind() != reflect.Struct {
// Ignore embedded fields of unexported non-struct types.
- // Do not ignore embedded fields of unexported struct types
- // since they may have exported fields.
continue
}
- } else if sf.PkgPath != "" {
+ // Do not ignore embedded fields of unexported struct types
+ // since they may have exported fields.
+ } else if isUnexported {
// Ignore unexported non-embedded fields.
continue
}
@@ -1135,7 +1131,7 @@
switch ft.Kind() {
case reflect.Bool,
reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
- reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
+ reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
reflect.Float32, reflect.Float64,
reflect.String:
quoted = true
@@ -1226,12 +1222,6 @@
return fields
}
-// isExported reports whether the identifier is exported.
-func isExported(id string) bool {
- r, _ := utf8.DecodeRuneInString(id)
- return unicode.IsUpper(r)
-}
-
// dominantField looks through the fields, all of which are known to
// have the same name, to find the single field that dominates the
// others using Go's embedding rules, modified by the presence of
diff --git a/src/encoding/json/encode_test.go b/src/encoding/json/encode_test.go
index 3fda6a0..0f194e1 100644
--- a/src/encoding/json/encode_test.go
+++ b/src/encoding/json/encode_test.go
@@ -71,14 +71,16 @@
}
type StringTag struct {
- BoolStr bool `json:",string"`
- IntStr int64 `json:",string"`
- StrStr string `json:",string"`
+ BoolStr bool `json:",string"`
+ IntStr int64 `json:",string"`
+ UintptrStr uintptr `json:",string"`
+ StrStr string `json:",string"`
}
var stringTagExpected = `{
"BoolStr": "true",
"IntStr": "42",
+ "UintptrStr": "44",
"StrStr": "\"xzbit\""
}`
@@ -86,6 +88,7 @@
var s StringTag
s.BoolStr = true
s.IntStr = 42
+ s.UintptrStr = 44
s.StrStr = "xzbit"
got, err := MarshalIndent(&s, "", " ")
if err != nil {
@@ -943,7 +946,7 @@
//
// The tests below marked with Issue6458 used to generate "ImZvbyI=" instead "foo".
// This behavior was intentionally changed in Go 1.8.
- // See https://github.com/golang/go/issues/14493#issuecomment-255857318
+ // See https://golang.org/issues/14493#issuecomment-255857318
{rawText, `"foo"`, true}, // Issue6458
{&rawText, `"foo"`, true},
{[]interface{}{rawText}, `["foo"]`, true}, // Issue6458
diff --git a/src/encoding/json/example_test.go b/src/encoding/json/example_test.go
index fa1846b..39b3231 100644
--- a/src/encoding/json/example_test.go
+++ b/src/encoding/json/example_test.go
@@ -36,9 +36,9 @@
func ExampleUnmarshal() {
var jsonBlob = []byte(`[
- {"Name": "Platypus", "Order": "Monotremata"},
- {"Name": "Quoll", "Order": "Dasyuromorphia"}
- ]`)
+ {"Name": "Platypus", "Order": "Monotremata"},
+ {"Name": "Quoll", "Order": "Dasyuromorphia"}
+]`)
type Animal struct {
Name string
Order string
@@ -56,12 +56,12 @@
// This example uses a Decoder to decode a stream of distinct JSON values.
func ExampleDecoder() {
const jsonStream = `
- {"Name": "Ed", "Text": "Knock knock."}
- {"Name": "Sam", "Text": "Who's there?"}
- {"Name": "Ed", "Text": "Go fmt."}
- {"Name": "Sam", "Text": "Go fmt who?"}
- {"Name": "Ed", "Text": "Go fmt yourself!"}
- `
+ {"Name": "Ed", "Text": "Knock knock."}
+ {"Name": "Sam", "Text": "Who's there?"}
+ {"Name": "Ed", "Text": "Go fmt."}
+ {"Name": "Sam", "Text": "Go fmt who?"}
+ {"Name": "Ed", "Text": "Go fmt yourself!"}
+`
type Message struct {
Name, Text string
}
@@ -86,8 +86,8 @@
// This example uses a Decoder to decode a stream of distinct JSON values.
func ExampleDecoder_Token() {
const jsonStream = `
- {"Message": "Hello", "Array": [1, 2, 3], "Null": null, "Number": 1.234}
- `
+ {"Message": "Hello", "Array": [1, 2, 3], "Null": null, "Number": 1.234}
+`
dec := json.NewDecoder(strings.NewReader(jsonStream))
for {
t, err := dec.Token()
@@ -191,9 +191,9 @@
}
var j = []byte(`[
- {"Space": "YCbCr", "Point": {"Y": 255, "Cb": 0, "Cr": -10}},
- {"Space": "RGB", "Point": {"R": 98, "G": 218, "B": 255}}
- ]`)
+ {"Space": "YCbCr", "Point": {"Y": 255, "Cb": 0, "Cr": -10}},
+ {"Space": "RGB", "Point": {"R": 98, "G": 218, "B": 255}}
+]`)
var colors []Color
err := json.Unmarshal(j, &colors)
if err != nil {
@@ -273,3 +273,22 @@
// = }
// =]
}
+
+func ExampleMarshalIndent() {
+ data := map[string]int{
+ "a": 1,
+ "b": 2,
+ }
+
+ json, err := json.MarshalIndent(data, "<prefix>", "<indent>")
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ fmt.Println(string(json))
+ // Output:
+ // {
+ // <prefix><indent>"a": 1,
+ // <prefix><indent>"b": 2
+ // <prefix>}
+}
diff --git a/src/encoding/json/stream.go b/src/encoding/json/stream.go
index 95e30ce..75a4270 100644
--- a/src/encoding/json/stream.go
+++ b/src/encoding/json/stream.go
@@ -12,12 +12,13 @@
// A Decoder reads and decodes JSON values from an input stream.
type Decoder struct {
- r io.Reader
- buf []byte
- d decodeState
- scanp int // start of unread data in buf
- scan scanner
- err error
+ r io.Reader
+ buf []byte
+ d decodeState
+ scanp int // start of unread data in buf
+ scanned int64 // amount of data already scanned
+ scan scanner
+ err error
tokenState int
tokenStack []int
@@ -35,6 +36,11 @@
// Number instead of as a float64.
func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
+// DisallowUnknownFields causes the Decoder to return an error when the destination
+// is a struct and the input contains object keys which do not match any
+// non-ignored, exported fields in the destination.
+func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
+
// Decode reads the next JSON-encoded value from its
// input and stores it in the value pointed to by v.
//
@@ -50,7 +56,7 @@
}
if !dec.tokenValueAllowed() {
- return &SyntaxError{msg: "not at beginning of value"}
+ return &SyntaxError{msg: "not at beginning of value", Offset: dec.offset()}
}
// Read whole value into buffer.
@@ -135,6 +141,7 @@
// Make room to read more into the buffer.
// First slide down data already consumed.
if dec.scanp > 0 {
+ dec.scanned += int64(dec.scanp)
n := copy(dec.buf, dec.buf[dec.scanp:])
dec.buf = dec.buf[:n]
dec.scanp = 0
@@ -301,7 +308,7 @@
return err
}
if c != ',' {
- return &SyntaxError{"expected comma after array element", 0}
+ return &SyntaxError{"expected comma after array element", dec.offset()}
}
dec.scanp++
dec.tokenState = tokenArrayValue
@@ -311,7 +318,7 @@
return err
}
if c != ':' {
- return &SyntaxError{"expected colon after object key", 0}
+ return &SyntaxError{"expected colon after object key", dec.offset()}
}
dec.scanp++
dec.tokenState = tokenObjectValue
@@ -428,7 +435,6 @@
err := dec.Decode(&x)
dec.tokenState = old
if err != nil {
- clearOffset(err)
return nil, err
}
dec.tokenState = tokenObjectColon
@@ -442,7 +448,6 @@
}
var x interface{}
if err := dec.Decode(&x); err != nil {
- clearOffset(err)
return nil, err
}
return x, nil
@@ -450,12 +455,6 @@
}
}
-func clearOffset(err error) {
- if s, ok := err.(*SyntaxError); ok {
- s.Offset = 0
- }
-}
-
func (dec *Decoder) tokenError(c byte) (Token, error) {
var context string
switch dec.tokenState {
@@ -472,7 +471,7 @@
case tokenObjectComma:
context = " after object key:value pair"
}
- return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
+ return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, dec.offset()}
}
// More reports whether there is another element in the
@@ -501,19 +500,6 @@
}
}
-/*
-TODO
-
-// EncodeToken writes the given JSON token to the stream.
-// It returns an error if the delimiters [ ] { } are not properly used.
-//
-// EncodeToken does not call Flush, because usually it is part of
-// a larger operation such as Encode, and those will call Flush when finished.
-// Callers that create an Encoder and then invoke EncodeToken directly,
-// without using Encode, need to call Flush when finished to ensure that
-// the JSON is written to the underlying writer.
-func (e *Encoder) EncodeToken(t Token) error {
- ...
+func (dec *Decoder) offset() int64 {
+ return dec.scanned + int64(dec.scanp)
}
-
-*/
diff --git a/src/encoding/json/stream_test.go b/src/encoding/json/stream_test.go
index d0b3ffb..83c01d1 100644
--- a/src/encoding/json/stream_test.go
+++ b/src/encoding/json/stream_test.go
@@ -342,11 +342,18 @@
{json: ` [{"a": 1} {"a": 2}] `, expTokens: []interface{}{
Delim('['),
decodeThis{map[string]interface{}{"a": float64(1)}},
- decodeThis{&SyntaxError{"expected comma after array element", 0}},
+ decodeThis{&SyntaxError{"expected comma after array element", 11}},
}},
- {json: `{ "a" 1 }`, expTokens: []interface{}{
- Delim('{'), "a",
- decodeThis{&SyntaxError{"expected colon after object key", 0}},
+ {json: `{ "` + strings.Repeat("a", 513) + `" 1 }`, expTokens: []interface{}{
+ Delim('{'), strings.Repeat("a", 513),
+ decodeThis{&SyntaxError{"expected colon after object key", 518}},
+ }},
+ {json: `{ "\a" }`, expTokens: []interface{}{
+ Delim('{'),
+ &SyntaxError{"invalid character 'a' in string escape code", 3},
+ }},
+ {json: ` \a`, expTokens: []interface{}{
+ &SyntaxError{"invalid character '\\\\' looking for beginning of value", 1},
}},
}
@@ -367,15 +374,15 @@
tk, err = dec.Token()
}
if experr, ok := etk.(error); ok {
- if err == nil || err.Error() != experr.Error() {
- t.Errorf("case %v: Expected error %v in %q, but was %v", ci, experr, tcase.json, err)
+ if err == nil || !reflect.DeepEqual(err, experr) {
+ t.Errorf("case %v: Expected error %#v in %q, but was %#v", ci, experr, tcase.json, err)
}
break
} else if err == io.EOF {
t.Errorf("case %v: Unexpected EOF in %q", ci, tcase.json)
break
} else if err != nil {
- t.Errorf("case %v: Unexpected error '%v' in %q", ci, err, tcase.json)
+ t.Errorf("case %v: Unexpected error '%#v' in %q", ci, err, tcase.json)
break
}
if !reflect.DeepEqual(tk, etk) {
diff --git a/src/encoding/pem/example_test.go b/src/encoding/pem/example_test.go
index 900b31c..806e7bb 100644
--- a/src/encoding/pem/example_test.go
+++ b/src/encoding/pem/example_test.go
@@ -9,6 +9,7 @@
"encoding/pem"
"fmt"
"log"
+ "os"
)
func ExampleDecode() {
@@ -42,3 +43,23 @@
fmt.Printf("Got a %T, with remaining data: %q", pub, rest)
// Output: Got a *rsa.PublicKey, with remaining data: "and some more"
}
+
+func ExampleEncode() {
+ block := &pem.Block{
+ Type: "MESSAGE",
+ Headers: map[string]string{
+ "Animal": "Gopher",
+ },
+ Bytes: []byte("test"),
+ }
+
+ if err := pem.Encode(os.Stdout, block); err != nil {
+ log.Fatal(err)
+ }
+ // Output:
+ // -----BEGIN MESSAGE-----
+ // Animal: Gopher
+ //
+ // dGVzdA==
+ // -----END MESSAGE-----
+}
diff --git a/src/encoding/pem/pem.go b/src/encoding/pem/pem.go
index 5e1ab90..35058c3 100644
--- a/src/encoding/pem/pem.go
+++ b/src/encoding/pem/pem.go
@@ -36,7 +36,7 @@
// bytes) is also returned and this will always be smaller than the original
// argument.
func getLine(data []byte) (line, rest []byte) {
- i := bytes.Index(data, []byte{'\n'})
+ i := bytes.IndexByte(data, '\n')
var j int
if i < 0 {
i = len(data)
@@ -106,7 +106,7 @@
}
line, next := getLine(rest)
- i := bytes.Index(line, []byte{':'})
+ i := bytes.IndexByte(line, ':')
if i == -1 {
break
}
@@ -252,7 +252,18 @@
return err
}
+// Encode writes the PEM encoding of b to out.
func Encode(out io.Writer, b *Block) error {
+ // Check for invalid block before writing any output.
+ for k := range b.Headers {
+ if strings.Contains(k, ":") {
+ return errors.New("pem: cannot encode a header key that contains a colon")
+ }
+ }
+
+ // All errors below are relayed from underlying io.Writer,
+ // so it is now safe to write data.
+
if _, err := out.Write(pemStart[1:]); err != nil {
return err
}
@@ -281,9 +292,6 @@
// For consistency of output, write other headers sorted by key.
sort.Strings(h)
for _, k := range h {
- if strings.Contains(k, ":") {
- return errors.New("pem: cannot encode a header key that contains a colon")
- }
if err := writeHeader(out, k, b.Headers[k]); err != nil {
return err
}
@@ -310,8 +318,15 @@
return err
}
+// EncodeToMemory returns the PEM encoding of b.
+//
+// If b has invalid headers and cannot be encoded,
+// EncodeToMemory returns nil. If it is important to
+// report details about this error case, use Encode instead.
func EncodeToMemory(b *Block) []byte {
var buf bytes.Buffer
- Encode(&buf, b)
+ if err := Encode(&buf, b); err != nil {
+ return nil
+ }
return buf.Bytes()
}
diff --git a/src/encoding/pem/pem_test.go b/src/encoding/pem/pem_test.go
index 1a1250a..6a17516 100644
--- a/src/encoding/pem/pem_test.go
+++ b/src/encoding/pem/pem_test.go
@@ -590,3 +590,17 @@
tJQLJRZ+ItT5Irl4owSEBNLahC1j3fhQavbj9WVAfKk=
-----END RSA PRIVATE KEY-----
`
+
+func TestBadEncode(t *testing.T) {
+ b := &Block{Type: "BAD", Headers: map[string]string{"X:Y": "Z"}}
+ var buf bytes.Buffer
+ if err := Encode(&buf, b); err == nil {
+ t.Fatalf("Encode did not report invalid header")
+ }
+ if buf.Len() != 0 {
+ t.Fatalf("Encode wrote data before reporting invalid header")
+ }
+ if data := EncodeToMemory(b); data != nil {
+ t.Fatalf("EncodeToMemory returned non-nil data")
+ }
+}
diff --git a/src/encoding/xml/atom_test.go b/src/encoding/xml/atom_test.go
index a712843..f394dab 100644
--- a/src/encoding/xml/atom_test.go
+++ b/src/encoding/xml/atom_test.go
@@ -12,20 +12,20 @@
Link: []Link{{Href: "http://example.org/"}},
Updated: ParseTime("2003-12-13T18:30:02Z"),
Author: Person{Name: "John Doe"},
- Id: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6",
+ ID: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6",
Entry: []Entry{
{
Title: "Atom-Powered Robots Run Amok",
Link: []Link{{Href: "http://example.org/2003/12/13/atom03"}},
- Id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
+ ID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
Updated: ParseTime("2003-12-13T18:30:02Z"),
Summary: NewText("Some text."),
},
},
}
-var atomXml = `` +
+var atomXML = `` +
`<feed xmlns="http://www.w3.org/2005/Atom" updated="2003-12-13T18:30:02Z">` +
`<title>Example Feed</title>` +
`<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>` +
diff --git a/src/encoding/xml/marshal.go b/src/encoding/xml/marshal.go
index 4c6ba8c..d393d06 100644
--- a/src/encoding/xml/marshal.go
+++ b/src/encoding/xml/marshal.go
@@ -16,7 +16,7 @@
)
const (
- // A generic XML header suitable for use with the output of Marshal.
+ // Header is a generic XML header suitable for use with the output of Marshal.
// This is not automatically added to any output of this package,
// it is provided as a convenience.
Header = `<?xml version="1.0" encoding="UTF-8"?>` + "\n"
@@ -66,6 +66,9 @@
// parent elements a and b. Fields that appear next to each other that name
// the same parent will be enclosed in one XML element.
//
+// If the XML name for a struct field is defined by both the field tag and the
+// struct's XMLName field, the names must match.
+//
// See MarshalIndent for an example.
//
// Marshal will return an error if asked to marshal a channel, function, or map.
@@ -320,7 +323,7 @@
// (The "http://www.w3.org/2000/xmlns/" name space is also predefined as "xmlns",
// but users should not be trying to use that one directly - that's our job.)
if url == xmlURL {
- return "xml"
+ return xmlPrefix
}
// Need to define a new name space.
@@ -1011,7 +1014,7 @@
return nil
}
-// A MarshalXMLError is returned when Marshal encounters a type
+// UnsupportedTypeError is returned when Marshal encounters a type
// that cannot be converted into XML.
type UnsupportedTypeError struct {
Type reflect.Type
diff --git a/src/encoding/xml/marshal_test.go b/src/encoding/xml/marshal_test.go
index 674c6b5..a0ccf44 100644
--- a/src/encoding/xml/marshal_test.go
+++ b/src/encoding/xml/marshal_test.go
@@ -583,16 +583,6 @@
ExpectXML: `<PresenceTest></PresenceTest>`,
},
- // A pointer to struct{} may be used to test for an element's presence.
- {
- Value: &PresenceTest{new(struct{})},
- ExpectXML: `<PresenceTest><Exists></Exists></PresenceTest>`,
- },
- {
- Value: &PresenceTest{},
- ExpectXML: `<PresenceTest></PresenceTest>`,
- },
-
// A []byte field is only nil if the element was not found.
{
Value: &Data{},
@@ -646,7 +636,7 @@
{Value: &Universe{Visible: 9.3e13}, ExpectXML: `<universe>9.3e+13</universe>`},
{Value: &Particle{HasMass: true}, ExpectXML: `<particle>true</particle>`},
{Value: &Departure{When: ParseTime("2013-01-09T00:15:00-09:00")}, ExpectXML: `<departure>2013-01-09T00:15:00-09:00</departure>`},
- {Value: atomValue, ExpectXML: atomXml},
+ {Value: atomValue, ExpectXML: atomXML},
{
Value: &Ship{
Name: "Heart of Gold",
@@ -1910,7 +1900,7 @@
func BenchmarkUnmarshal(b *testing.B) {
b.ReportAllocs()
- xml := []byte(atomXml)
+ xml := []byte(atomXML)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
Unmarshal(xml, &Feed{})
@@ -2441,3 +2431,22 @@
t.Errorf("Unmarshal: expected error, got nil")
}
}
+
+// Issue 20953. Crash on invalid XMLName attribute.
+
+type InvalidXMLName struct {
+ XMLName Name `xml:"error"`
+ Type struct {
+ XMLName Name `xml:"type,attr"`
+ }
+}
+
+func TestInvalidXMLName(t *testing.T) {
+ var buf bytes.Buffer
+ enc := NewEncoder(&buf)
+ if err := enc.Encode(InvalidXMLName{}); err == nil {
+ t.Error("unexpected success")
+ } else if want := "invalid tag"; !strings.Contains(err.Error(), want) {
+ t.Errorf("error %q does not contain %q", err, want)
+ }
+}
diff --git a/src/encoding/xml/read.go b/src/encoding/xml/read.go
index 000d9fb..36c7ba6 100644
--- a/src/encoding/xml/read.go
+++ b/src/encoding/xml/read.go
@@ -107,12 +107,13 @@
// to the newly created value.
//
// Unmarshal maps an XML element or attribute value to a bool by
-// setting it to the boolean value represented by the string.
+// setting it to the boolean value represented by the string. Whitespace
+// is trimmed and ignored.
//
// Unmarshal maps an XML element or attribute value to an integer or
// floating-point field by setting the field to the result of
// interpreting the string value in decimal. There is no check for
-// overflow.
+// overflow. Whitespace is trimmed and ignored.
//
// Unmarshal maps an XML element to a Name by recording the element
// name.
@@ -160,7 +161,7 @@
// UnmarshalXML must consume exactly one XML element.
// One common implementation strategy is to unmarshal into
// a separate value with a layout matching the expected XML
-// using d.DecodeElement, and then to copy the data from
+// using d.DecodeElement, and then to copy the data from
// that value into the receiver.
// Another common strategy is to use d.Token to process the
// XML object one token at a time.
@@ -192,19 +193,19 @@
// unmarshalInterface unmarshals a single XML element into val.
// start is the opening tag of the element.
-func (p *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error {
+func (d *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error {
// Record that decoder must stop at end tag corresponding to start.
- p.pushEOF()
+ d.pushEOF()
- p.unmarshalDepth++
- err := val.UnmarshalXML(p, *start)
- p.unmarshalDepth--
+ d.unmarshalDepth++
+ err := val.UnmarshalXML(d, *start)
+ d.unmarshalDepth--
if err != nil {
- p.popEOF()
+ d.popEOF()
return err
}
- if !p.popEOF() {
+ if !d.popEOF() {
return fmt.Errorf("xml: %s.UnmarshalXML did not consume entire <%s> element", receiverType(val), start.Name.Local)
}
@@ -214,11 +215,11 @@
// unmarshalTextInterface unmarshals a single XML element into val.
// The chardata contained in the element (but not its children)
// is passed to the text unmarshaler.
-func (p *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error {
+func (d *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error {
var buf []byte
depth := 1
for depth > 0 {
- t, err := p.Token()
+ t, err := d.Token()
if err != nil {
return err
}
@@ -237,7 +238,7 @@
}
// unmarshalAttr unmarshals a single XML attribute into val.
-func (p *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error {
+func (d *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error {
if val.Kind() == reflect.Ptr {
if val.IsNil() {
val.Set(reflect.New(val.Type().Elem()))
@@ -276,7 +277,7 @@
val.Set(reflect.Append(val, reflect.Zero(val.Type().Elem())))
// Recur to read element into slice.
- if err := p.unmarshalAttr(val.Index(n), attr); err != nil {
+ if err := d.unmarshalAttr(val.Index(n), attr); err != nil {
val.SetLen(n)
return err
}
@@ -299,11 +300,11 @@
)
// Unmarshal a single XML element into val.
-func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error {
+func (d *Decoder) unmarshal(val reflect.Value, start *StartElement) error {
// Find start element if we need it.
if start == nil {
for {
- tok, err := p.Token()
+ tok, err := d.Token()
if err != nil {
return err
}
@@ -333,24 +334,24 @@
if val.CanInterface() && val.Type().Implements(unmarshalerType) {
// This is an unmarshaler with a non-pointer receiver,
// so it's likely to be incorrect, but we do what we're told.
- return p.unmarshalInterface(val.Interface().(Unmarshaler), start)
+ return d.unmarshalInterface(val.Interface().(Unmarshaler), start)
}
if val.CanAddr() {
pv := val.Addr()
if pv.CanInterface() && pv.Type().Implements(unmarshalerType) {
- return p.unmarshalInterface(pv.Interface().(Unmarshaler), start)
+ return d.unmarshalInterface(pv.Interface().(Unmarshaler), start)
}
}
if val.CanInterface() && val.Type().Implements(textUnmarshalerType) {
- return p.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler))
+ return d.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler))
}
if val.CanAddr() {
pv := val.Addr()
if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) {
- return p.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler))
+ return d.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler))
}
}
@@ -376,7 +377,7 @@
// TODO: For now, simply ignore the field. In the near
// future we may choose to unmarshal the start
// element on it, if not nil.
- return p.Skip()
+ return d.Skip()
case reflect.Slice:
typ := v.Type()
@@ -392,7 +393,7 @@
v.Set(reflect.Append(val, reflect.Zero(v.Type().Elem())))
// Recur to read element into slice.
- if err := p.unmarshal(v.Index(n), start); err != nil {
+ if err := d.unmarshal(v.Index(n), start); err != nil {
v.SetLen(n)
return err
}
@@ -445,7 +446,7 @@
case fAttr:
strv := finfo.value(sv)
if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) {
- if err := p.unmarshalAttr(strv, a); err != nil {
+ if err := d.unmarshalAttr(strv, a); err != nil {
return err
}
handled = true
@@ -460,7 +461,7 @@
if !handled && any >= 0 {
finfo := &tinfo.fields[any]
strv := finfo.value(sv)
- if err := p.unmarshalAttr(strv, a); err != nil {
+ if err := d.unmarshalAttr(strv, a); err != nil {
return err
}
}
@@ -488,11 +489,11 @@
case fInnerXml:
if !saveXML.IsValid() {
saveXML = finfo.value(sv)
- if p.saved == nil {
+ if d.saved == nil {
saveXMLIndex = 0
- p.saved = new(bytes.Buffer)
+ d.saved = new(bytes.Buffer)
} else {
- saveXMLIndex = p.savedOffset()
+ saveXMLIndex = d.savedOffset()
}
}
}
@@ -505,9 +506,9 @@
for {
var savedOffset int
if saveXML.IsValid() {
- savedOffset = p.savedOffset()
+ savedOffset = d.savedOffset()
}
- tok, err := p.Token()
+ tok, err := d.Token()
if err != nil {
return err
}
@@ -515,28 +516,28 @@
case StartElement:
consumed := false
if sv.IsValid() {
- consumed, err = p.unmarshalPath(tinfo, sv, nil, &t)
+ consumed, err = d.unmarshalPath(tinfo, sv, nil, &t)
if err != nil {
return err
}
if !consumed && saveAny.IsValid() {
consumed = true
- if err := p.unmarshal(saveAny, &t); err != nil {
+ if err := d.unmarshal(saveAny, &t); err != nil {
return err
}
}
}
if !consumed {
- if err := p.Skip(); err != nil {
+ if err := d.Skip(); err != nil {
return err
}
}
case EndElement:
if saveXML.IsValid() {
- saveXMLData = p.saved.Bytes()[saveXMLIndex:savedOffset]
+ saveXMLData = d.saved.Bytes()[saveXMLIndex:savedOffset]
if saveXMLIndex == 0 {
- p.saved = nil
+ d.saved = nil
}
}
break Loop
@@ -614,7 +615,7 @@
dst.SetInt(0)
return nil
}
- itmp, err := strconv.ParseInt(string(src), 10, dst.Type().Bits())
+ itmp, err := strconv.ParseInt(strings.TrimSpace(string(src)), 10, dst.Type().Bits())
if err != nil {
return err
}
@@ -624,7 +625,7 @@
dst.SetUint(0)
return nil
}
- utmp, err := strconv.ParseUint(string(src), 10, dst.Type().Bits())
+ utmp, err := strconv.ParseUint(strings.TrimSpace(string(src)), 10, dst.Type().Bits())
if err != nil {
return err
}
@@ -634,7 +635,7 @@
dst.SetFloat(0)
return nil
}
- ftmp, err := strconv.ParseFloat(string(src), dst.Type().Bits())
+ ftmp, err := strconv.ParseFloat(strings.TrimSpace(string(src)), dst.Type().Bits())
if err != nil {
return err
}
@@ -666,7 +667,7 @@
// The consumed result tells whether XML elements have been consumed
// from the Decoder until start's matching end element, or if it's
// still untouched because start is uninteresting for sv's fields.
-func (p *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement) (consumed bool, err error) {
+func (d *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement) (consumed bool, err error) {
recurse := false
Loop:
for i := range tinfo.fields {
@@ -681,7 +682,7 @@
}
if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local {
// It's a perfect match, unmarshal the field.
- return true, p.unmarshal(finfo.value(sv), start)
+ return true, d.unmarshal(finfo.value(sv), start)
}
if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local {
// It's a prefix for the field. Break and recurse
@@ -704,18 +705,18 @@
// prefix. Recurse and attempt to match these.
for {
var tok Token
- tok, err = p.Token()
+ tok, err = d.Token()
if err != nil {
return true, err
}
switch t := tok.(type) {
case StartElement:
- consumed2, err := p.unmarshalPath(tinfo, sv, parents, &t)
+ consumed2, err := d.unmarshalPath(tinfo, sv, parents, &t)
if err != nil {
return true, err
}
if !consumed2 {
- if err := p.Skip(); err != nil {
+ if err := d.Skip(); err != nil {
return true, err
}
}
diff --git a/src/encoding/xml/read_test.go b/src/encoding/xml/read_test.go
index a1eb516..8c2e70f 100644
--- a/src/encoding/xml/read_test.go
+++ b/src/encoding/xml/read_test.go
@@ -83,7 +83,7 @@
type Feed struct {
XMLName Name `xml:"http://www.w3.org/2005/Atom feed"`
Title string `xml:"title"`
- Id string `xml:"id"`
+ ID string `xml:"id"`
Link []Link `xml:"link"`
Updated time.Time `xml:"updated,attr"`
Author Person `xml:"author"`
@@ -92,7 +92,7 @@
type Entry struct {
Title string `xml:"title"`
- Id string `xml:"id"`
+ ID string `xml:"id"`
Link []Link `xml:"link"`
Updated time.Time `xml:"updated"`
Author Person `xml:"author"`
@@ -123,7 +123,7 @@
{Rel: "alternate", Href: "http://codereview.appspot.com/"},
{Rel: "self", Href: "http://codereview.appspot.com/rss/mine/rsc"},
},
- Id: "http://codereview.appspot.com/",
+ ID: "http://codereview.appspot.com/",
Updated: ParseTime("2009-10-04T01:35:58+00:00"),
Author: Person{
Name: "rietveld<>",
@@ -140,7 +140,7 @@
Name: "email-address-removed",
InnerXML: "<name>email-address-removed</name>",
},
- Id: "urn:md5:134d9179c41f806be79b3a5f7877d19a",
+ ID: "urn:md5:134d9179c41f806be79b3a5f7877d19a",
Summary: Text{
Type: "html",
Body: `
@@ -187,7 +187,7 @@
Name: "email-address-removed",
InnerXML: "<name>email-address-removed</name>",
},
- Id: "urn:md5:0a2a4f19bb815101f0ba2904aed7c35a",
+ ID: "urn:md5:0a2a4f19bb815101f0ba2904aed7c35a",
Summary: Text{
Type: "html",
Body: `
@@ -819,7 +819,7 @@
`
)
-// github.com/golang/go/issues/13417
+// golang.org/issues/13417
func TestUnmarshalEmptyValues(t *testing.T) {
// Test first with a zero-valued dst.
v := new(Parent)
@@ -908,3 +908,174 @@
t.Fatalf("populated: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want)
}
}
+
+type WhitespaceValuesParent struct {
+ BFalse bool
+ BTrue bool
+ I int
+ INeg int
+ I8 int8
+ I8Neg int8
+ I16 int16
+ I16Neg int16
+ I32 int32
+ I32Neg int32
+ I64 int64
+ I64Neg int64
+ UI uint
+ UI8 uint8
+ UI16 uint16
+ UI32 uint32
+ UI64 uint64
+ F32 float32
+ F32Neg float32
+ F64 float64
+ F64Neg float64
+}
+
+const whitespaceValuesXML = `
+<WhitespaceValuesParent>
+ <BFalse> false </BFalse>
+ <BTrue> true </BTrue>
+ <I> 266703 </I>
+ <INeg> -266703 </INeg>
+ <I8> 112 </I8>
+ <I8Neg> -112 </I8Neg>
+ <I16> 6703 </I16>
+ <I16Neg> -6703 </I16Neg>
+ <I32> 266703 </I32>
+ <I32Neg> -266703 </I32Neg>
+ <I64> 266703 </I64>
+ <I64Neg> -266703 </I64Neg>
+ <UI> 266703 </UI>
+ <UI8> 112 </UI8>
+ <UI16> 6703 </UI16>
+ <UI32> 266703 </UI32>
+ <UI64> 266703 </UI64>
+ <F32> 266.703 </F32>
+ <F32Neg> -266.703 </F32Neg>
+ <F64> 266.703 </F64>
+ <F64Neg> -266.703 </F64Neg>
+</WhitespaceValuesParent>
+`
+
+// golang.org/issues/22146
+func TestUnmarshalWhitespaceValues(t *testing.T) {
+ v := WhitespaceValuesParent{}
+ if err := Unmarshal([]byte(whitespaceValuesXML), &v); err != nil {
+ t.Fatalf("whitespace values: Unmarshal failed: got %v", err)
+ }
+
+ want := WhitespaceValuesParent{
+ BFalse: false,
+ BTrue: true,
+ I: 266703,
+ INeg: -266703,
+ I8: 112,
+ I8Neg: -112,
+ I16: 6703,
+ I16Neg: -6703,
+ I32: 266703,
+ I32Neg: -266703,
+ I64: 266703,
+ I64Neg: -266703,
+ UI: 266703,
+ UI8: 112,
+ UI16: 6703,
+ UI32: 266703,
+ UI64: 266703,
+ F32: 266.703,
+ F32Neg: -266.703,
+ F64: 266.703,
+ F64Neg: -266.703,
+ }
+ if v != want {
+ t.Fatalf("whitespace values: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want)
+ }
+}
+
+type WhitespaceAttrsParent struct {
+ BFalse bool `xml:",attr"`
+ BTrue bool `xml:",attr"`
+ I int `xml:",attr"`
+ INeg int `xml:",attr"`
+ I8 int8 `xml:",attr"`
+ I8Neg int8 `xml:",attr"`
+ I16 int16 `xml:",attr"`
+ I16Neg int16 `xml:",attr"`
+ I32 int32 `xml:",attr"`
+ I32Neg int32 `xml:",attr"`
+ I64 int64 `xml:",attr"`
+ I64Neg int64 `xml:",attr"`
+ UI uint `xml:",attr"`
+ UI8 uint8 `xml:",attr"`
+ UI16 uint16 `xml:",attr"`
+ UI32 uint32 `xml:",attr"`
+ UI64 uint64 `xml:",attr"`
+ F32 float32 `xml:",attr"`
+ F32Neg float32 `xml:",attr"`
+ F64 float64 `xml:",attr"`
+ F64Neg float64 `xml:",attr"`
+}
+
+const whitespaceAttrsXML = `
+<WhitespaceAttrsParent
+ BFalse=" false "
+ BTrue=" true "
+ I=" 266703 "
+ INeg=" -266703 "
+ I8=" 112 "
+ I8Neg=" -112 "
+ I16=" 6703 "
+ I16Neg=" -6703 "
+ I32=" 266703 "
+ I32Neg=" -266703 "
+ I64=" 266703 "
+ I64Neg=" -266703 "
+ UI=" 266703 "
+ UI8=" 112 "
+ UI16=" 6703 "
+ UI32=" 266703 "
+ UI64=" 266703 "
+ F32=" 266.703 "
+ F32Neg=" -266.703 "
+ F64=" 266.703 "
+ F64Neg=" -266.703 "
+>
+</WhitespaceAttrsParent>
+`
+
+// golang.org/issues/22146
+func TestUnmarshalWhitespaceAttrs(t *testing.T) {
+ v := WhitespaceAttrsParent{}
+ if err := Unmarshal([]byte(whitespaceAttrsXML), &v); err != nil {
+ t.Fatalf("whitespace attrs: Unmarshal failed: got %v", err)
+ }
+
+ want := WhitespaceAttrsParent{
+ BFalse: false,
+ BTrue: true,
+ I: 266703,
+ INeg: -266703,
+ I8: 112,
+ I8Neg: -112,
+ I16: 6703,
+ I16Neg: -6703,
+ I32: 266703,
+ I32Neg: -266703,
+ I64: 266703,
+ I64Neg: -266703,
+ UI: 266703,
+ UI8: 112,
+ UI16: 6703,
+ UI32: 266703,
+ UI64: 266703,
+ F32: 266.703,
+ F32Neg: -266.703,
+ F64: 266.703,
+ F64Neg: -266.703,
+ }
+ if v != want {
+ t.Fatalf("whitespace attrs: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want)
+ }
+}
diff --git a/src/encoding/xml/typeinfo.go b/src/encoding/xml/typeinfo.go
index 751caa9..48de3d7 100644
--- a/src/encoding/xml/typeinfo.go
+++ b/src/encoding/xml/typeinfo.go
@@ -40,6 +40,8 @@
fOmitEmpty
fMode = fElement | fAttr | fCDATA | fCharData | fInnerXml | fComment | fAny
+
+ xmlName = "XMLName"
)
var tinfoMap sync.Map // map[reflect.Type]*typeInfo
@@ -91,7 +93,7 @@
return nil, err
}
- if f.Name == "XMLName" {
+ if f.Name == xmlName {
tinfo.xmlname = finfo
continue
}
@@ -148,7 +150,7 @@
case 0:
finfo.flags |= fElement
case fAttr, fCDATA, fCharData, fInnerXml, fComment, fAny, fAny | fAttr:
- if f.Name == "XMLName" || tag != "" && mode != fAttr {
+ if f.Name == xmlName || tag != "" && mode != fAttr {
valid = false
}
default:
@@ -173,7 +175,7 @@
f.Name, typ, f.Tag.Get("xml"))
}
- if f.Name == "XMLName" {
+ if f.Name == xmlName {
// The XMLName field records the XML element name. Don't
// process it as usual because its name should default to
// empty rather than to the field name.
@@ -235,11 +237,11 @@
}
for i, n := 0, typ.NumField(); i < n; i++ {
f := typ.Field(i)
- if f.Name != "XMLName" {
+ if f.Name != xmlName {
continue
}
finfo, err := structFieldInfo(typ, &f)
- if finfo.name != "" && err == nil {
+ if err == nil && finfo.name != "" {
return finfo
}
// Also consider errors as a non-existent field tag
diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go
index 9a3b792..f408623 100644
--- a/src/encoding/xml/xml.go
+++ b/src/encoding/xml/xml.go
@@ -60,6 +60,7 @@
Attr []Attr
}
+// Copy creates a new copy of StartElement.
func (e StartElement) Copy() StartElement {
attrs := make([]Attr, len(e.Attr))
copy(attrs, e.Attr)
@@ -88,12 +89,14 @@
return b1
}
+// Copy creates a new copy of CharData.
func (c CharData) Copy() CharData { return CharData(makeCopy(c)) }
// A Comment represents an XML comment of the form <!--comment-->.
// The bytes do not include the <!-- and --> comment markers.
type Comment []byte
+// Copy creates a new copy of Comment.
func (c Comment) Copy() Comment { return Comment(makeCopy(c)) }
// A ProcInst represents an XML processing instruction of the form <?target inst?>
@@ -102,6 +105,7 @@
Inst []byte
}
+// Copy creates a new copy of ProcInst.
func (p ProcInst) Copy() ProcInst {
p.Inst = makeCopy(p.Inst)
return p
@@ -111,6 +115,7 @@
// The bytes do not include the <! and > markers.
type Directive []byte
+// Copy creates a new copy of Directive.
func (d Directive) Copy() Directive { return Directive(makeCopy(d)) }
// CopyToken returns a copy of a Token.
@@ -130,6 +135,23 @@
return t
}
+// A TokenReader is anything that can decode a stream of XML tokens, including a
+// Decoder.
+//
+// When Token encounters an error or end-of-file condition after successfully
+// reading a token, it returns the token. It may return the (non-nil) error from
+// the same call or return the error (and a nil token) from a subsequent call.
+// An instance of this general case is that a TokenReader returning a non-nil
+// token at the end of the token stream may return either io.EOF or a nil error.
+// The next Read should return nil, io.EOF.
+//
+// Implementations of Token are discouraged from returning a nil token with a
+// nil error. Callers should treat a return of nil, nil as indicating that
+// nothing happened; in particular it does not indicate EOF.
+type TokenReader interface {
+ Token() (Token, error)
+}
+
// A Decoder represents an XML parser reading a particular input stream.
// The parser assumes that its input is encoded in UTF-8.
type Decoder struct {
@@ -185,6 +207,7 @@
DefaultSpace string
r io.ByteReader
+ t TokenReader
buf bytes.Buffer
saved *bytes.Buffer
stk *stack
@@ -214,6 +237,22 @@
return d
}
+// NewTokenDecoder creates a new XML parser using an underlying token stream.
+func NewTokenDecoder(t TokenReader) *Decoder {
+ // Is it already a Decoder?
+ if d, ok := t.(*Decoder); ok {
+ return d
+ }
+ d := &Decoder{
+ ns: make(map[string]string),
+ t: t,
+ nextByte: -1,
+ line: 1,
+ Strict: true,
+ }
+ return d
+}
+
// Token returns the next XML token in the input stream.
// At the end of the input stream, Token returns nil, io.EOF.
//
@@ -266,12 +305,12 @@
// to the other attribute names, so process
// the translations first.
for _, a := range t1.Attr {
- if a.Name.Space == "xmlns" {
+ if a.Name.Space == xmlnsPrefix {
v, ok := d.ns[a.Name.Local]
d.pushNs(a.Name.Local, v, ok)
d.ns[a.Name.Local] = a.Value
}
- if a.Name.Space == "" && a.Name.Local == "xmlns" {
+ if a.Name.Space == "" && a.Name.Local == xmlnsPrefix {
// Default space for untagged names
v, ok := d.ns[""]
d.pushNs("", v, ok)
@@ -296,20 +335,24 @@
return t, err
}
-const xmlURL = "http://www.w3.org/XML/1998/namespace"
+const (
+ xmlURL = "http://www.w3.org/XML/1998/namespace"
+ xmlnsPrefix = "xmlns"
+ xmlPrefix = "xml"
+)
// Apply name space translation to name n.
// The default name space (for Space=="")
// applies only to element names, not to attribute names.
func (d *Decoder) translate(n *Name, isElementName bool) {
switch {
- case n.Space == "xmlns":
+ case n.Space == xmlnsPrefix:
return
case n.Space == "" && !isElementName:
return
- case n.Space == "xml":
+ case n.Space == xmlPrefix:
n.Space = xmlURL
- case n.Space == "" && n.Local == "xmlns":
+ case n.Space == "" && n.Local == xmlnsPrefix:
return
}
if v, ok := d.ns[n.Space]; ok {
@@ -503,6 +546,9 @@
}
func (d *Decoder) rawToken() (Token, error) {
+ if d.t != nil {
+ return d.t.Token()
+ }
if d.err != nil {
return nil, d.err
}
@@ -786,10 +832,9 @@
if d.Strict {
d.err = d.syntaxError("attribute name without = in element")
return nil, d.err
- } else {
- d.ungetc(b)
- a.Value = a.Name.Local
}
+ d.ungetc(b)
+ a.Value = a.Name.Local
} else {
d.space()
data := d.attrval()
@@ -1027,7 +1072,6 @@
if d.err != nil {
return nil
}
- ok = false
}
if b, ok = d.mustgetc(); !ok {
return nil
@@ -1837,15 +1881,15 @@
}
var (
- esc_quot = []byte(""") // shorter than """
- esc_apos = []byte("'") // shorter than "'"
- esc_amp = []byte("&")
- esc_lt = []byte("<")
- esc_gt = []byte(">")
- esc_tab = []byte("	")
- esc_nl = []byte("
")
- esc_cr = []byte("
")
- esc_fffd = []byte("\uFFFD") // Unicode replacement character
+ escQuot = []byte(""") // shorter than """
+ escApos = []byte("'") // shorter than "'"
+ escAmp = []byte("&")
+ escLT = []byte("<")
+ escGT = []byte(">")
+ escTab = []byte("	")
+ escNL = []byte("
")
+ escCR = []byte("
")
+ escFFFD = []byte("\uFFFD") // Unicode replacement character
)
// EscapeText writes to w the properly escaped XML equivalent
@@ -1865,27 +1909,27 @@
i += width
switch r {
case '"':
- esc = esc_quot
+ esc = escQuot
case '\'':
- esc = esc_apos
+ esc = escApos
case '&':
- esc = esc_amp
+ esc = escAmp
case '<':
- esc = esc_lt
+ esc = escLT
case '>':
- esc = esc_gt
+ esc = escGT
case '\t':
- esc = esc_tab
+ esc = escTab
case '\n':
if !escapeNewline {
continue
}
- esc = esc_nl
+ esc = escNL
case '\r':
- esc = esc_cr
+ esc = escCR
default:
if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
- esc = esc_fffd
+ esc = escFFFD
break
}
continue
@@ -1914,24 +1958,24 @@
i += width
switch r {
case '"':
- esc = esc_quot
+ esc = escQuot
case '\'':
- esc = esc_apos
+ esc = escApos
case '&':
- esc = esc_amp
+ esc = escAmp
case '<':
- esc = esc_lt
+ esc = escLT
case '>':
- esc = esc_gt
+ esc = escGT
case '\t':
- esc = esc_tab
+ esc = escTab
case '\n':
- esc = esc_nl
+ esc = escNL
case '\r':
- esc = esc_cr
+ esc = escCR
default:
if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
- esc = esc_fffd
+ esc = escFFFD
break
}
continue
diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go
index dad6ed9..7a3511d 100644
--- a/src/encoding/xml/xml_test.go
+++ b/src/encoding/xml/xml_test.go
@@ -479,15 +479,15 @@
}
type item struct {
- Field_a string
+ FieldA string
}
func TestIssue569(t *testing.T) {
- data := `<item><Field_a>abcd</Field_a></item>`
+ data := `<item><FieldA>abcd</FieldA></item>`
var i item
err := Unmarshal([]byte(data), &i)
- if err != nil || i.Field_a != "abcd" {
+ if err != nil || i.FieldA != "abcd" {
t.Fatal("Expecting abcd")
}
}
@@ -797,3 +797,90 @@
}
}
}
+
+func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
+ return func(src TokenReader) TokenReader {
+ return mapper{
+ t: src,
+ f: mapping,
+ }
+ }
+}
+
+type mapper struct {
+ t TokenReader
+ f func(Token) Token
+}
+
+func (m mapper) Token() (Token, error) {
+ tok, err := m.t.Token()
+ if err != nil {
+ return nil, err
+ }
+ return m.f(tok), nil
+}
+
+func TestNewTokenDecoderIdempotent(t *testing.T) {
+ d := NewDecoder(strings.NewReader(`<br/>`))
+ d2 := NewTokenDecoder(d)
+ if d != d2 {
+ t.Error("NewTokenDecoder did not detect underlying Decoder")
+ }
+}
+
+func TestWrapDecoder(t *testing.T) {
+ d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
+ m := tokenMap(func(t Token) Token {
+ switch tok := t.(type) {
+ case StartElement:
+ if tok.Name.Local == "quote" {
+ tok.Name.Local = "blocking"
+ return tok
+ }
+ case EndElement:
+ if tok.Name.Local == "quote" {
+ tok.Name.Local = "blocking"
+ return tok
+ }
+ }
+ return t
+ })
+
+ d = NewTokenDecoder(m(d))
+
+ o := struct {
+ XMLName Name `xml:"blocking"`
+ Chardata string `xml:",chardata"`
+ }{}
+
+ if err := d.Decode(&o); err != nil {
+ t.Fatal("Got unexpected error while decoding:", err)
+ }
+
+ if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
+ t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
+ }
+}
+
+type tokReader struct{}
+
+func (tokReader) Token() (Token, error) {
+ return StartElement{}, nil
+}
+
+type Failure struct{}
+
+func (Failure) UnmarshalXML(*Decoder, StartElement) error {
+ return nil
+}
+
+func TestTokenUnmarshaler(t *testing.T) {
+ defer func() {
+ if r := recover(); r != nil {
+ t.Error("Unexpected panic using custom token unmarshaler")
+ }
+ }()
+
+ d := NewTokenDecoder(tokReader{})
+ d.Decode(&Failure{})
+}