Update prebuilts to go1.10 ab/4625579
Test: m -j blueprint_tools
Change-Id: I12d0286a2978fcbafa50880625700ba69c4581d8
diff --git a/src/encoding/csv/reader.go b/src/encoding/csv/reader.go
index a3497c8..2efc7ad 100644
--- a/src/encoding/csv/reader.go
+++ b/src/encoding/csv/reader.go
@@ -58,44 +58,67 @@
"fmt"
"io"
"unicode"
+ "unicode/utf8"
)
// A ParseError is returned for parsing errors.
-// The first line is 1. The first column is 0.
+// Line numbers are 1-indexed and columns are 0-indexed.
type ParseError struct {
- Line int // Line where the error occurred
- Column int // Column (rune index) where the error occurred
- Err error // The actual error
+ StartLine int // Line where the record starts
+ Line int // Line where the error occurred
+ Column int // Column (rune index) where the error occurred
+ Err error // The actual error
}
func (e *ParseError) Error() string {
- return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err)
+ if e.Err == ErrFieldCount {
+ return fmt.Sprintf("record on line %d: %v", e.Line, e.Err)
+ }
+ if e.StartLine != e.Line {
+ return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err)
+ }
+ return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err)
}
-// These are the errors that can be returned in ParseError.Error
+// These are the errors that can be returned in ParseError.Err.
var (
- ErrTrailingComma = errors.New("extra delimiter at end of line") // no longer used
+ ErrTrailingComma = errors.New("extra delimiter at end of line") // Deprecated: No longer used.
ErrBareQuote = errors.New("bare \" in non-quoted-field")
- ErrQuote = errors.New("extraneous \" in field")
- ErrFieldCount = errors.New("wrong number of fields in line")
+ ErrQuote = errors.New("extraneous or missing \" in quoted-field")
+ ErrFieldCount = errors.New("wrong number of fields")
)
+var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
+
+func validDelim(r rune) bool {
+ return r != 0 && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
+}
+
// A Reader reads records from a CSV-encoded file.
//
// As returned by NewReader, a Reader expects input conforming to RFC 4180.
// The exported fields can be changed to customize the details before the
// first call to Read or ReadAll.
//
-//
+// The Reader converts all \r\n sequences in its input to plain \n,
+// including in multiline field values, so that the returned data does
+// not depend on which line-ending convention an input file uses.
type Reader struct {
// Comma is the field delimiter.
// It is set to comma (',') by NewReader.
+ // Comma must be a valid rune and must not be \r, \n,
+ // or the Unicode replacement character (0xFFFD).
Comma rune
+
// Comment, if not 0, is the comment character. Lines beginning with the
// Comment character without preceding whitespace are ignored.
// With leading whitespace the Comment character becomes part of the
// field, even if TrimLeadingSpace is true.
+ // Comment must be a valid rune and must not be \r, \n,
+ // or the Unicode replacement character (0xFFFD).
+ // It must also not be equal to Comma.
Comment rune
+
// FieldsPerRecord is the number of expected fields per record.
// If FieldsPerRecord is positive, Read requires each record to
// have the given number of fields. If FieldsPerRecord is 0, Read sets it to
@@ -103,31 +126,41 @@
// have the same field count. If FieldsPerRecord is negative, no check is
// made and records may have a variable number of fields.
FieldsPerRecord int
+
// If LazyQuotes is true, a quote may appear in an unquoted field and a
// non-doubled quote may appear in a quoted field.
- LazyQuotes bool
- TrailingComma bool // ignored; here for backwards compatibility
+ LazyQuotes bool
+
// If TrimLeadingSpace is true, leading white space in a field is ignored.
// This is done even if the field delimiter, Comma, is white space.
TrimLeadingSpace bool
+
// ReuseRecord controls whether calls to Read may return a slice sharing
// the backing array of the previous call's returned slice for performance.
// By default, each call to Read returns newly allocated memory owned by the caller.
ReuseRecord bool
- line int
- column int
- r *bufio.Reader
- // lineBuffer holds the unescaped fields read by readField, one after another.
+ TrailingComma bool // Deprecated: No longer used.
+
+ r *bufio.Reader
+
+ // numLine is the current line being read in the CSV file.
+ numLine int
+
+ // rawBuffer is a line buffer only used by the readLine method.
+ rawBuffer []byte
+
+ // recordBuffer holds the unescaped fields, one after another.
// The fields can be accessed by using the indexes in fieldIndexes.
- // Example: for the row `a,"b","c""d",e` lineBuffer will contain `abc"de` and
- // fieldIndexes will contain the indexes 0, 1, 2, 5.
- lineBuffer bytes.Buffer
- // Indexes of fields inside lineBuffer
- // The i'th field starts at offset fieldIndexes[i] in lineBuffer.
+ // E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de`
+ // and fieldIndexes will contain the indexes [1, 2, 5, 6].
+ recordBuffer []byte
+
+ // fieldIndexes is an index of fields inside recordBuffer.
+ // The i'th field ends at offset fieldIndexes[i] in recordBuffer.
fieldIndexes []int
- // only used when ReuseRecord == true
+ // lastRecord is a record cache and only used when ReuseRecord == true.
lastRecord []string
}
@@ -139,15 +172,6 @@
}
}
-// error creates a new ParseError based on err.
-func (r *Reader) error(err error) error {
- return &ParseError{
- Line: r.line,
- Column: r.column,
- Err: err,
- }
-}
-
// Read reads one record (a slice of fields) from r.
// If the record has an unexpected number of fields,
// Read returns the record along with the error ErrFieldCount.
@@ -163,7 +187,6 @@
} else {
record, err = r.readRecord(nil)
}
-
return record, err
}
@@ -185,226 +208,192 @@
}
}
-// readRecord reads and parses a single csv record from r.
-// Unlike parseRecord, readRecord handles FieldsPerRecord.
-// If dst has enough capacity it will be used for the returned record.
-func (r *Reader) readRecord(dst []string) (record []string, err error) {
- for {
- record, err = r.parseRecord(dst)
- if record != nil {
- break
+// readLine reads the next line (with the trailing endline).
+// If EOF is hit without a trailing endline, it will be omitted.
+// If some bytes were read, then the error is never io.EOF.
+// The result is only valid until the next call to readLine.
+func (r *Reader) readLine() ([]byte, error) {
+ line, err := r.r.ReadSlice('\n')
+ if err == bufio.ErrBufferFull {
+ r.rawBuffer = append(r.rawBuffer[:0], line...)
+ for err == bufio.ErrBufferFull {
+ line, err = r.r.ReadSlice('\n')
+ r.rawBuffer = append(r.rawBuffer, line...)
}
- if err != nil {
- return nil, err
+ line = r.rawBuffer
+ }
+ if len(line) > 0 && err == io.EOF {
+ err = nil
+ // For backwards compatibility, drop trailing \r before EOF.
+ if line[len(line)-1] == '\r' {
+ line = line[:len(line)-1]
}
}
+ r.numLine++
+ // Normalize \r\n to \n on all input lines.
+ if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
+ line[n-2] = '\n'
+ line = line[:n-1]
+ }
+ return line, err
+}
+// lengthNL reports the number of bytes for the trailing \n.
+func lengthNL(b []byte) int {
+ if len(b) > 0 && b[len(b)-1] == '\n' {
+ return 1
+ }
+ return 0
+}
+
+// nextRune returns the next rune in b or utf8.RuneError.
+func nextRune(b []byte) rune {
+ r, _ := utf8.DecodeRune(b)
+ return r
+}
+
+func (r *Reader) readRecord(dst []string) ([]string, error) {
+ if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
+ return nil, errInvalidDelim
+ }
+
+ // Read line (automatically skipping past empty lines and any comments).
+ var line, fullLine []byte
+ var errRead error
+ for errRead == nil {
+ line, errRead = r.readLine()
+ if r.Comment != 0 && nextRune(line) == r.Comment {
+ line = nil
+ continue // Skip comment lines
+ }
+ if errRead == nil && len(line) == lengthNL(line) {
+ line = nil
+ continue // Skip empty lines
+ }
+ fullLine = line
+ break
+ }
+ if errRead == io.EOF {
+ return nil, errRead
+ }
+
+ // Parse each field in the record.
+ var err error
+ const quoteLen = len(`"`)
+ commaLen := utf8.RuneLen(r.Comma)
+ recLine := r.numLine // Starting line for record
+ r.recordBuffer = r.recordBuffer[:0]
+ r.fieldIndexes = r.fieldIndexes[:0]
+parseField:
+ for {
+ if r.TrimLeadingSpace {
+ line = bytes.TrimLeftFunc(line, unicode.IsSpace)
+ }
+ if len(line) == 0 || line[0] != '"' {
+ // Non-quoted string field
+ i := bytes.IndexRune(line, r.Comma)
+ field := line
+ if i >= 0 {
+ field = field[:i]
+ } else {
+ field = field[:len(field)-lengthNL(field)]
+ }
+ // Check to make sure a quote does not appear in field.
+ if !r.LazyQuotes {
+ if j := bytes.IndexByte(field, '"'); j >= 0 {
+ col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
+ err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
+ break parseField
+ }
+ }
+ r.recordBuffer = append(r.recordBuffer, field...)
+ r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ if i >= 0 {
+ line = line[i+commaLen:]
+ continue parseField
+ }
+ break parseField
+ } else {
+ // Quoted string field
+ line = line[quoteLen:]
+ for {
+ i := bytes.IndexByte(line, '"')
+ if i >= 0 {
+ // Hit next quote.
+ r.recordBuffer = append(r.recordBuffer, line[:i]...)
+ line = line[i+quoteLen:]
+ switch rn := nextRune(line); {
+ case rn == '"':
+ // `""` sequence (append quote).
+ r.recordBuffer = append(r.recordBuffer, '"')
+ line = line[quoteLen:]
+ case rn == r.Comma:
+ // `",` sequence (end of field).
+ line = line[commaLen:]
+ r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ continue parseField
+ case lengthNL(line) == len(line):
+ // `"\n` sequence (end of line).
+ r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ break parseField
+ case r.LazyQuotes:
+ // `"` sequence (bare quote).
+ r.recordBuffer = append(r.recordBuffer, '"')
+ default:
+ // `"*` sequence (invalid non-escaped quote).
+ col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])
+ err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
+ break parseField
+ }
+ } else if len(line) > 0 {
+ // Hit end of line (copy all data so far).
+ r.recordBuffer = append(r.recordBuffer, line...)
+ if errRead != nil {
+ break parseField
+ }
+ line, errRead = r.readLine()
+ if errRead == io.EOF {
+ errRead = nil
+ }
+ fullLine = line
+ } else {
+ // Abrupt end of file (EOF or error).
+ if !r.LazyQuotes && errRead == nil {
+ col := utf8.RuneCount(fullLine)
+ err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
+ break parseField
+ }
+ r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+ break parseField
+ }
+ }
+ }
+ }
+ if err == nil {
+ err = errRead
+ }
+
+ // Create a single string and create slices out of it.
+ // This pins the memory of the fields together, but allocates once.
+ str := string(r.recordBuffer) // Convert to string once to batch allocations
+ dst = dst[:0]
+ if cap(dst) < len(r.fieldIndexes) {
+ dst = make([]string, len(r.fieldIndexes))
+ }
+ dst = dst[:len(r.fieldIndexes)]
+ var preIdx int
+ for i, idx := range r.fieldIndexes {
+ dst[i] = str[preIdx:idx]
+ preIdx = idx
+ }
+
+ // Check or update the expected fields per record.
if r.FieldsPerRecord > 0 {
- if len(record) != r.FieldsPerRecord {
- r.column = 0 // report at start of record
- return record, r.error(ErrFieldCount)
+ if len(dst) != r.FieldsPerRecord && err == nil {
+ err = &ParseError{StartLine: recLine, Line: recLine, Err: ErrFieldCount}
}
} else if r.FieldsPerRecord == 0 {
- r.FieldsPerRecord = len(record)
+ r.FieldsPerRecord = len(dst)
}
- return record, nil
-}
-
-// readRune reads one rune from r, folding \r\n to \n and keeping track
-// of how far into the line we have read. r.column will point to the start
-// of this rune, not the end of this rune.
-func (r *Reader) readRune() (rune, error) {
- r1, _, err := r.r.ReadRune()
-
- // Handle \r\n here. We make the simplifying assumption that
- // anytime \r is followed by \n that it can be folded to \n.
- // We will not detect files which contain both \r\n and bare \n.
- if r1 == '\r' {
- r1, _, err = r.r.ReadRune()
- if err == nil {
- if r1 != '\n' {
- r.r.UnreadRune()
- r1 = '\r'
- }
- }
- }
- r.column++
- return r1, err
-}
-
-// skip reads runes up to and including the rune delim or until error.
-func (r *Reader) skip(delim rune) error {
- for {
- r1, err := r.readRune()
- if err != nil {
- return err
- }
- if r1 == delim {
- return nil
- }
- }
-}
-
-// parseRecord reads and parses a single csv record from r.
-// If dst has enough capacity it will be used for the returned fields.
-func (r *Reader) parseRecord(dst []string) (fields []string, err error) {
- // Each record starts on a new line. We increment our line
- // number (lines start at 1, not 0) and set column to -1
- // so as we increment in readRune it points to the character we read.
- r.line++
- r.column = -1
-
- // Peek at the first rune. If it is an error we are done.
- // If we support comments and it is the comment character
- // then skip to the end of line.
-
- r1, _, err := r.r.ReadRune()
- if err != nil {
- return nil, err
- }
-
- if r.Comment != 0 && r1 == r.Comment {
- return nil, r.skip('\n')
- }
- r.r.UnreadRune()
-
- r.lineBuffer.Reset()
- r.fieldIndexes = r.fieldIndexes[:0]
-
- // At this point we have at least one field.
- for {
- idx := r.lineBuffer.Len()
-
- haveField, delim, err := r.parseField()
- if haveField {
- r.fieldIndexes = append(r.fieldIndexes, idx)
- }
-
- if delim == '\n' || err == io.EOF {
- if len(r.fieldIndexes) == 0 {
- return nil, err
- }
- break
- }
-
- if err != nil {
- return nil, err
- }
- }
-
- fieldCount := len(r.fieldIndexes)
- // Using this approach (creating a single string and taking slices of it)
- // means that a single reference to any of the fields will retain the whole
- // string. The risk of a nontrivial space leak caused by this is considered
- // minimal and a tradeoff for better performance through the combined
- // allocations.
- line := r.lineBuffer.String()
-
- if cap(dst) >= fieldCount {
- fields = dst[:fieldCount]
- } else {
- fields = make([]string, fieldCount)
- }
-
- for i, idx := range r.fieldIndexes {
- if i == fieldCount-1 {
- fields[i] = line[idx:]
- } else {
- fields[i] = line[idx:r.fieldIndexes[i+1]]
- }
- }
-
- return fields, nil
-}
-
-// parseField parses the next field in the record. The read field is
-// appended to r.lineBuffer. Delim is the first character not part of the field
-// (r.Comma or '\n').
-func (r *Reader) parseField() (haveField bool, delim rune, err error) {
- r1, err := r.readRune()
- for err == nil && r.TrimLeadingSpace && r1 != '\n' && unicode.IsSpace(r1) {
- r1, err = r.readRune()
- }
-
- if err == io.EOF && r.column != 0 {
- return true, 0, err
- }
- if err != nil {
- return false, 0, err
- }
-
- switch r1 {
- case r.Comma:
- // will check below
-
- case '\n':
- // We are a trailing empty field or a blank line
- if r.column == 0 {
- return false, r1, nil
- }
- return true, r1, nil
-
- case '"':
- // quoted field
- Quoted:
- for {
- r1, err = r.readRune()
- if err != nil {
- if err == io.EOF {
- if r.LazyQuotes {
- return true, 0, err
- }
- return false, 0, r.error(ErrQuote)
- }
- return false, 0, err
- }
- switch r1 {
- case '"':
- r1, err = r.readRune()
- if err != nil || r1 == r.Comma {
- break Quoted
- }
- if r1 == '\n' {
- return true, r1, nil
- }
- if r1 != '"' {
- if !r.LazyQuotes {
- r.column--
- return false, 0, r.error(ErrQuote)
- }
- // accept the bare quote
- r.lineBuffer.WriteRune('"')
- }
- case '\n':
- r.line++
- r.column = -1
- }
- r.lineBuffer.WriteRune(r1)
- }
-
- default:
- // unquoted field
- for {
- r.lineBuffer.WriteRune(r1)
- r1, err = r.readRune()
- if err != nil || r1 == r.Comma {
- break
- }
- if r1 == '\n' {
- return true, r1, nil
- }
- if !r.LazyQuotes && r1 == '"' {
- return false, 0, r.error(ErrBareQuote)
- }
- }
- }
-
- if err != nil {
- if err == io.EOF {
- return true, 0, err
- }
- return false, 0, err
- }
-
- return true, r1, nil
+ return dst, err
}
diff --git a/src/encoding/csv/reader_test.go b/src/encoding/csv/reader_test.go
index 5ab1b61..1fc69f9 100644
--- a/src/encoding/csv/reader_test.go
+++ b/src/encoding/csv/reader_test.go
@@ -9,45 +9,38 @@
"reflect"
"strings"
"testing"
+ "unicode/utf8"
)
-var readTests = []struct {
- Name string
- Input string
- Output [][]string
- UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
+func TestRead(t *testing.T) {
+ tests := []struct {
+ Name string
+ Input string
+ Output [][]string
+ Error error
- // These fields are copied into the Reader
- Comma rune
- Comment rune
- FieldsPerRecord int
- LazyQuotes bool
- TrailingComma bool
- TrimLeadingSpace bool
- ReuseRecord bool
-
- Error string
- Line int // Expected error line if != 0
- Column int // Expected error column if line != 0
-}{
- {
+ // These fields are copied into the Reader
+ Comma rune
+ Comment rune
+ UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
+ FieldsPerRecord int
+ LazyQuotes bool
+ TrimLeadingSpace bool
+ ReuseRecord bool
+ }{{
Name: "Simple",
Input: "a,b,c\n",
Output: [][]string{{"a", "b", "c"}},
- },
- {
+ }, {
Name: "CRLF",
Input: "a,b\r\nc,d\r\n",
Output: [][]string{{"a", "b"}, {"c", "d"}},
- },
- {
+ }, {
Name: "BareCR",
Input: "a,b\rc,d\r\n",
Output: [][]string{{"a", "b\rc", "d"}},
- },
- {
- Name: "RFC4180test",
- UseFieldsPerRecord: true,
+ }, {
+ Name: "RFC4180test",
Input: `#field1,field2,field3
"aaa","bb
b","ccc"
@@ -60,163 +53,139 @@
{"a,a", `b"bb`, "ccc"},
{"zzz", "yyy", "xxx"},
},
- },
- {
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0,
+ }, {
Name: "NoEOLTest",
Input: "a,b,c",
Output: [][]string{{"a", "b", "c"}},
- },
- {
+ }, {
Name: "Semicolon",
- Comma: ';',
Input: "a;b;c\n",
Output: [][]string{{"a", "b", "c"}},
- },
- {
+ Comma: ';',
+ }, {
Name: "MultiLine",
Input: `"two
line","one line","three
line
field"`,
Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
- },
- {
+ }, {
Name: "BlankLine",
Input: "a,b,c\n\nd,e,f\n\n",
Output: [][]string{
{"a", "b", "c"},
{"d", "e", "f"},
},
- },
- {
- Name: "BlankLineFieldCount",
- Input: "a,b,c\n\nd,e,f\n\n",
- UseFieldsPerRecord: true,
+ }, {
+ Name: "BlankLineFieldCount",
+ Input: "a,b,c\n\nd,e,f\n\n",
Output: [][]string{
{"a", "b", "c"},
{"d", "e", "f"},
},
- },
- {
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0,
+ }, {
Name: "TrimSpace",
Input: " a, b, c\n",
- TrimLeadingSpace: true,
Output: [][]string{{"a", "b", "c"}},
- },
- {
+ TrimLeadingSpace: true,
+ }, {
Name: "LeadingSpace",
Input: " a, b, c\n",
Output: [][]string{{" a", " b", " c"}},
- },
- {
+ }, {
Name: "Comment",
- Comment: '#',
Input: "#1,2,3\na,b,c\n#comment",
Output: [][]string{{"a", "b", "c"}},
- },
- {
+ Comment: '#',
+ }, {
Name: "NoComment",
Input: "#1,2,3\na,b,c",
Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
- },
- {
+ }, {
Name: "LazyQuotes",
- LazyQuotes: true,
Input: `a "word","1"2",a","b`,
Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
- },
- {
- Name: "BareQuotes",
LazyQuotes: true,
+ }, {
+ Name: "BareQuotes",
Input: `a "word","1"2",a"`,
Output: [][]string{{`a "word"`, `1"2`, `a"`}},
- },
- {
- Name: "BareDoubleQuotes",
LazyQuotes: true,
+ }, {
+ Name: "BareDoubleQuotes",
Input: `a""b,c`,
Output: [][]string{{`a""b`, `c`}},
- },
- {
+ LazyQuotes: true,
+ }, {
Name: "BadDoubleQuotes",
Input: `a""b,c`,
- Error: `bare " in non-quoted-field`, Line: 1, Column: 1,
- },
- {
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
+ }, {
Name: "TrimQuote",
Input: ` "a"," b",c`,
- TrimLeadingSpace: true,
Output: [][]string{{"a", " b", "c"}},
- },
- {
+ TrimLeadingSpace: true,
+ }, {
Name: "BadBareQuote",
Input: `a "word","b"`,
- Error: `bare " in non-quoted-field`, Line: 1, Column: 2,
- },
- {
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote},
+ }, {
Name: "BadTrailingQuote",
Input: `"a word",b"`,
- Error: `bare " in non-quoted-field`, Line: 1, Column: 10,
- },
- {
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote},
+ }, {
Name: "ExtraneousQuote",
Input: `"a "word","b"`,
- Error: `extraneous " in field`, Line: 1, Column: 3,
- },
- {
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote},
+ }, {
Name: "BadFieldCount",
- UseFieldsPerRecord: true,
Input: "a,b,c\nd,e",
- Error: "wrong number of fields", Line: 2,
- },
- {
+ Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount},
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0,
+ }, {
Name: "BadFieldCount1",
+ Input: `a,b,c`,
+ Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
UseFieldsPerRecord: true,
FieldsPerRecord: 2,
- Input: `a,b,c`,
- Error: "wrong number of fields", Line: 1,
- },
- {
+ }, {
Name: "FieldCount",
Input: "a,b,c\nd,e",
Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
- },
- {
+ }, {
Name: "TrailingCommaEOF",
Input: "a,b,c,",
Output: [][]string{{"a", "b", "c", ""}},
- },
- {
+ }, {
Name: "TrailingCommaEOL",
Input: "a,b,c,\n",
Output: [][]string{{"a", "b", "c", ""}},
- },
- {
+ }, {
Name: "TrailingCommaSpaceEOF",
- TrimLeadingSpace: true,
Input: "a,b,c, ",
Output: [][]string{{"a", "b", "c", ""}},
- },
- {
- Name: "TrailingCommaSpaceEOL",
TrimLeadingSpace: true,
+ }, {
+ Name: "TrailingCommaSpaceEOL",
Input: "a,b,c, \n",
Output: [][]string{{"a", "b", "c", ""}},
- },
- {
- Name: "TrailingCommaLine3",
TrimLeadingSpace: true,
+ }, {
+ Name: "TrailingCommaLine3",
Input: "a,b,c\nd,e,f\ng,hi,",
Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
- },
- {
+ TrimLeadingSpace: true,
+ }, {
Name: "NotTrailingComma3",
Input: "a,b,c, \n",
Output: [][]string{{"a", "b", "c", " "}},
- },
- {
- Name: "CommaFieldTest",
- TrailingComma: true,
+ }, {
+ Name: "CommaFieldTest",
Input: `x,y,z,w
x,y,z,
x,y,,
@@ -240,67 +209,201 @@
{"x", "", "", ""},
{"", "", "", ""},
},
- },
- {
- Name: "TrailingCommaIneffective1",
- TrailingComma: true,
- TrimLeadingSpace: true,
- Input: "a,b,\nc,d,e",
+ }, {
+ Name: "TrailingCommaIneffective1",
+ Input: "a,b,\nc,d,e",
Output: [][]string{
{"a", "b", ""},
{"c", "d", "e"},
},
- },
- {
- Name: "TrailingCommaIneffective2",
- TrailingComma: false,
TrimLeadingSpace: true,
- Input: "a,b,\nc,d,e",
- Output: [][]string{
- {"a", "b", ""},
- {"c", "d", "e"},
- },
- },
- {
- Name: "ReadAllReuseRecord",
- ReuseRecord: true,
- Input: "a,b\nc,d",
+ }, {
+ Name: "ReadAllReuseRecord",
+ Input: "a,b\nc,d",
Output: [][]string{
{"a", "b"},
{"c", "d"},
},
- },
-}
+ ReuseRecord: true,
+ }, {
+ Name: "StartLine1", // Issue 19019
+ Input: "a,\"b\nc\"d,e",
+ Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
+ }, {
+ Name: "StartLine2",
+ Input: "a,b\n\"d\n\n,e",
+ Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
+ }, {
+ Name: "CRLFInQuotedField", // Issue 21201
+ Input: "A,\"Hello\r\nHi\",B\r\n",
+ Output: [][]string{
+ {"A", "Hello\nHi", "B"},
+ },
+ }, {
+ Name: "BinaryBlobField", // Issue 19410
+ Input: "x09\x41\xb4\x1c,aktau",
+ Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
+ }, {
+ Name: "TrailingCR",
+ Input: "field1,field2\r",
+ Output: [][]string{{"field1", "field2"}},
+ }, {
+ Name: "QuotedTrailingCR",
+ Input: "\"field\"\r",
+ Output: [][]string{{"field"}},
+ }, {
+ Name: "QuotedTrailingCRCR",
+ Input: "\"field\"\r\r",
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
+ }, {
+ Name: "FieldCR",
+ Input: "field\rfield\r",
+ Output: [][]string{{"field\rfield"}},
+ }, {
+ Name: "FieldCRCR",
+ Input: "field\r\rfield\r\r",
+ Output: [][]string{{"field\r\rfield\r"}},
+ }, {
+ Name: "FieldCRCRLF",
+ Input: "field\r\r\nfield\r\r\n",
+ Output: [][]string{{"field\r"}, {"field\r"}},
+ }, {
+ Name: "FieldCRCRLFCR",
+ Input: "field\r\r\n\rfield\r\r\n\r",
+ Output: [][]string{{"field\r"}, {"\rfield\r"}},
+ }, {
+ Name: "FieldCRCRLFCRCR",
+ Input: "field\r\r\n\r\rfield\r\r\n\r\r",
+ Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
+ }, {
+ Name: "MultiFieldCRCRLFCRCR",
+ Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
+ Output: [][]string{
+ {"field1", "field2\r"},
+ {"\r\rfield1", "field2\r"},
+ {"\r\r", ""},
+ },
+ }, {
+ Name: "NonASCIICommaAndComment",
+ Input: "a£b,c£ \td,e\n€ comment\n",
+ Output: [][]string{{"a", "b,c", "d,e"}},
+ TrimLeadingSpace: true,
+ Comma: '£',
+ Comment: '€',
+ }, {
+ Name: "NonASCIICommaAndCommentWithQuotes",
+ Input: "a€\" b,\"€ c\nλ comment\n",
+ Output: [][]string{{"a", " b,", " c"}},
+ Comma: '€',
+ Comment: 'λ',
+ }, {
+ // λ and θ start with the same byte.
+ // This tests that the parser doesn't confuse such characters.
+ Name: "NonASCIICommaConfusion",
+ Input: "\"abθcd\"λefθgh",
+ Output: [][]string{{"abθcd", "efθgh"}},
+ Comma: 'λ',
+ Comment: '€',
+ }, {
+ Name: "NonASCIICommentConfusion",
+ Input: "λ\nλ\nθ\nλ\n",
+ Output: [][]string{{"λ"}, {"λ"}, {"λ"}},
+ Comment: 'θ',
+ }, {
+ Name: "QuotedFieldMultipleLF",
+ Input: "\"\n\n\n\n\"",
+ Output: [][]string{{"\n\n\n\n"}},
+ }, {
+ Name: "MultipleCRLF",
+ Input: "\r\n\r\n\r\n\r\n",
+ }, {
+ // The implementation may read each line in several chunks if it doesn't fit entirely
+ // in the read buffer, so we should test the code to handle that condition.
+ Name: "HugeLines",
+ Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
+ Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
+ Comment: '#',
+ }, {
+ Name: "QuoteWithTrailingCRLF",
+ Input: "\"foo\"bar\"\r\n",
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
+ }, {
+ Name: "LazyQuoteWithTrailingCRLF",
+ Input: "\"foo\"bar\"\r\n",
+ Output: [][]string{{`foo"bar`}},
+ LazyQuotes: true,
+ }, {
+ Name: "DoubleQuoteWithTrailingCRLF",
+ Input: "\"foo\"\"bar\"\r\n",
+ Output: [][]string{{`foo"bar`}},
+ }, {
+ Name: "EvenQuotes",
+ Input: `""""""""`,
+ Output: [][]string{{`"""`}},
+ }, {
+ Name: "OddQuotes",
+ Input: `"""""""`,
+ Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote},
+ }, {
+ Name: "LazyOddQuotes",
+ Input: `"""""""`,
+ Output: [][]string{{`"""`}},
+ LazyQuotes: true,
+ }, {
+ Name: "BadComma1",
+ Comma: '\n',
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadComma2",
+ Comma: '\r',
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadComma3",
+ Comma: utf8.RuneError,
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadComment1",
+ Comment: '\n',
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadComment2",
+ Comment: '\r',
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadComment3",
+ Comment: utf8.RuneError,
+ Error: errInvalidDelim,
+ }, {
+ Name: "BadCommaComment",
+ Comma: 'X',
+ Comment: 'X',
+ Error: errInvalidDelim,
+ }}
-func TestRead(t *testing.T) {
- for _, tt := range readTests {
- r := NewReader(strings.NewReader(tt.Input))
- r.Comment = tt.Comment
- if tt.UseFieldsPerRecord {
- r.FieldsPerRecord = tt.FieldsPerRecord
- } else {
- r.FieldsPerRecord = -1
- }
- r.LazyQuotes = tt.LazyQuotes
- r.TrailingComma = tt.TrailingComma
- r.TrimLeadingSpace = tt.TrimLeadingSpace
- r.ReuseRecord = tt.ReuseRecord
- if tt.Comma != 0 {
- r.Comma = tt.Comma
- }
- out, err := r.ReadAll()
- perr, _ := err.(*ParseError)
- if tt.Error != "" {
- if err == nil || !strings.Contains(err.Error(), tt.Error) {
- t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
- } else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) {
- t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column)
+ for _, tt := range tests {
+ t.Run(tt.Name, func(t *testing.T) {
+ r := NewReader(strings.NewReader(tt.Input))
+
+ if tt.Comma != 0 {
+ r.Comma = tt.Comma
}
- } else if err != nil {
- t.Errorf("%s: unexpected error %v", tt.Name, err)
- } else if !reflect.DeepEqual(out, tt.Output) {
- t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output)
- }
+ r.Comment = tt.Comment
+ if tt.UseFieldsPerRecord {
+ r.FieldsPerRecord = tt.FieldsPerRecord
+ } else {
+ r.FieldsPerRecord = -1
+ }
+ r.LazyQuotes = tt.LazyQuotes
+ r.TrimLeadingSpace = tt.TrimLeadingSpace
+ r.ReuseRecord = tt.ReuseRecord
+
+ out, err := r.ReadAll()
+ if !reflect.DeepEqual(err, tt.Error) {
+ t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error)
+ } else if !reflect.DeepEqual(out, tt.Output) {
+ t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output)
+ }
+ })
}
}
diff --git a/src/encoding/csv/writer.go b/src/encoding/csv/writer.go
index 84b7aa1..ef3594e 100644
--- a/src/encoding/csv/writer.go
+++ b/src/encoding/csv/writer.go
@@ -20,7 +20,7 @@
//
// Comma is the field delimiter.
//
-// If UseCRLF is true, the Writer ends each record with \r\n instead of \n.
+// If UseCRLF is true, the Writer ends each output line with \r\n instead of \n.
type Writer struct {
Comma rune // Field delimiter (set to ',' by NewWriter)
UseCRLF bool // True to use \r\n as the line terminator
@@ -38,6 +38,10 @@
// Writer writes a single CSV record to w along with any necessary quoting.
// A record is a slice of strings with each string being one field.
func (w *Writer) Write(record []string) error {
+ if !validDelim(w.Comma) {
+ return errInvalidDelim
+ }
+
for n, field := range record {
if n > 0 {
if _, err := w.w.WriteRune(w.Comma); err != nil {