Update prebuilts to go1.10 ab/4625579 Test: m -j blueprint_tools Change-Id: I12d0286a2978fcbafa50880625700ba69c4581d8

commit: e1b3b188ff03c41213dca621c46c39702d79f3ee [log] [tgz]
author: Dan Willemsen <[email protected]> Tue Feb 27 19:36:27 2018 -0800
committer: Dan Willemsen <[email protected]> Tue Feb 27 19:36:27 2018 -0800
tree: 67ab03d7ee51c5fafa1602c28e0d9033a8e3f26d
parent: 3c27c3f6c42dbb890c67820f2897812e16f01be1 [diff]
diff --git a/src/encoding/csv/reader.go b/src/encoding/csv/reader.go
index a3497c8..2efc7ad 100644
--- a/src/encoding/csv/reader.go
+++ b/src/encoding/csv/reader.go

@@ -58,44 +58,67 @@
 	"fmt"
 	"io"
 	"unicode"
+	"unicode/utf8"
 )
 
 // A ParseError is returned for parsing errors.
-// The first line is 1.  The first column is 0.
+// Line numbers are 1-indexed and columns are 0-indexed.
 type ParseError struct {
-	Line   int   // Line where the error occurred
-	Column int   // Column (rune index) where the error occurred
-	Err    error // The actual error
+	StartLine int   // Line where the record starts
+	Line      int   // Line where the error occurred
+	Column    int   // Column (rune index) where the error occurred
+	Err       error // The actual error
 }
 
 func (e *ParseError) Error() string {
-	return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Err)
+	if e.Err == ErrFieldCount {
+		return fmt.Sprintf("record on line %d: %v", e.Line, e.Err)
+	}
+	if e.StartLine != e.Line {
+		return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err)
+	}
+	return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err)
 }
 
-// These are the errors that can be returned in ParseError.Error
+// These are the errors that can be returned in ParseError.Err.
 var (
-	ErrTrailingComma = errors.New("extra delimiter at end of line") // no longer used
+	ErrTrailingComma = errors.New("extra delimiter at end of line") // Deprecated: No longer used.
 	ErrBareQuote     = errors.New("bare \" in non-quoted-field")
-	ErrQuote         = errors.New("extraneous \" in field")
-	ErrFieldCount    = errors.New("wrong number of fields in line")
+	ErrQuote         = errors.New("extraneous or missing \" in quoted-field")
+	ErrFieldCount    = errors.New("wrong number of fields")
 )
 
+var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
+
+func validDelim(r rune) bool {
+	return r != 0 && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
+}
+
 // A Reader reads records from a CSV-encoded file.
 //
 // As returned by NewReader, a Reader expects input conforming to RFC 4180.
 // The exported fields can be changed to customize the details before the
 // first call to Read or ReadAll.
 //
-//
+// The Reader converts all \r\n sequences in its input to plain \n,
+// including in multiline field values, so that the returned data does
+// not depend on which line-ending convention an input file uses.
 type Reader struct {
 	// Comma is the field delimiter.
 	// It is set to comma (',') by NewReader.
+	// Comma must be a valid rune and must not be \r, \n,
+	// or the Unicode replacement character (0xFFFD).
 	Comma rune
+
 	// Comment, if not 0, is the comment character. Lines beginning with the
 	// Comment character without preceding whitespace are ignored.
 	// With leading whitespace the Comment character becomes part of the
 	// field, even if TrimLeadingSpace is true.
+	// Comment must be a valid rune and must not be \r, \n,
+	// or the Unicode replacement character (0xFFFD).
+	// It must also not be equal to Comma.
 	Comment rune
+
 	// FieldsPerRecord is the number of expected fields per record.
 	// If FieldsPerRecord is positive, Read requires each record to
 	// have the given number of fields. If FieldsPerRecord is 0, Read sets it to
@@ -103,31 +126,41 @@
 	// have the same field count. If FieldsPerRecord is negative, no check is
 	// made and records may have a variable number of fields.
 	FieldsPerRecord int
+
 	// If LazyQuotes is true, a quote may appear in an unquoted field and a
 	// non-doubled quote may appear in a quoted field.
-	LazyQuotes    bool
-	TrailingComma bool // ignored; here for backwards compatibility
+	LazyQuotes bool
+
 	// If TrimLeadingSpace is true, leading white space in a field is ignored.
 	// This is done even if the field delimiter, Comma, is white space.
 	TrimLeadingSpace bool
+
 	// ReuseRecord controls whether calls to Read may return a slice sharing
 	// the backing array of the previous call's returned slice for performance.
 	// By default, each call to Read returns newly allocated memory owned by the caller.
 	ReuseRecord bool
 
-	line   int
-	column int
-	r      *bufio.Reader
-	// lineBuffer holds the unescaped fields read by readField, one after another.
+	TrailingComma bool // Deprecated: No longer used.
+
+	r *bufio.Reader
+
+	// numLine is the current line being read in the CSV file.
+	numLine int
+
+	// rawBuffer is a line buffer only used by the readLine method.
+	rawBuffer []byte
+
+	// recordBuffer holds the unescaped fields, one after another.
 	// The fields can be accessed by using the indexes in fieldIndexes.
-	// Example: for the row `a,"b","c""d",e` lineBuffer will contain `abc"de` and
-	// fieldIndexes will contain the indexes 0, 1, 2, 5.
-	lineBuffer bytes.Buffer
-	// Indexes of fields inside lineBuffer
-	// The i'th field starts at offset fieldIndexes[i] in lineBuffer.
+	// E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de`
+	// and fieldIndexes will contain the indexes [1, 2, 5, 6].
+	recordBuffer []byte
+
+	// fieldIndexes is an index of fields inside recordBuffer.
+	// The i'th field ends at offset fieldIndexes[i] in recordBuffer.
 	fieldIndexes []int
 
-	// only used when ReuseRecord == true
+	// lastRecord is a record cache and only used when ReuseRecord == true.
 	lastRecord []string
 }
 
@@ -139,15 +172,6 @@
 	}
 }
 
-// error creates a new ParseError based on err.
-func (r *Reader) error(err error) error {
-	return &ParseError{
-		Line:   r.line,
-		Column: r.column,
-		Err:    err,
-	}
-}
-
 // Read reads one record (a slice of fields) from r.
 // If the record has an unexpected number of fields,
 // Read returns the record along with the error ErrFieldCount.
@@ -163,7 +187,6 @@
 	} else {
 		record, err = r.readRecord(nil)
 	}
-
 	return record, err
 }
 
@@ -185,226 +208,192 @@
 	}
 }
 
-// readRecord reads and parses a single csv record from r.
-// Unlike parseRecord, readRecord handles FieldsPerRecord.
-// If dst has enough capacity it will be used for the returned record.
-func (r *Reader) readRecord(dst []string) (record []string, err error) {
-	for {
-		record, err = r.parseRecord(dst)
-		if record != nil {
-			break
+// readLine reads the next line (with the trailing endline).
+// If EOF is hit without a trailing endline, it will be omitted.
+// If some bytes were read, then the error is never io.EOF.
+// The result is only valid until the next call to readLine.
+func (r *Reader) readLine() ([]byte, error) {
+	line, err := r.r.ReadSlice('\n')
+	if err == bufio.ErrBufferFull {
+		r.rawBuffer = append(r.rawBuffer[:0], line...)
+		for err == bufio.ErrBufferFull {
+			line, err = r.r.ReadSlice('\n')
+			r.rawBuffer = append(r.rawBuffer, line...)
 		}
-		if err != nil {
-			return nil, err
+		line = r.rawBuffer
+	}
+	if len(line) > 0 && err == io.EOF {
+		err = nil
+		// For backwards compatibility, drop trailing \r before EOF.
+		if line[len(line)-1] == '\r' {
+			line = line[:len(line)-1]
 		}
 	}
+	r.numLine++
+	// Normalize \r\n to \n on all input lines.
+	if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
+		line[n-2] = '\n'
+		line = line[:n-1]
+	}
+	return line, err
+}
 
+// lengthNL reports the number of bytes for the trailing \n.
+func lengthNL(b []byte) int {
+	if len(b) > 0 && b[len(b)-1] == '\n' {
+		return 1
+	}
+	return 0
+}
+
+// nextRune returns the next rune in b or utf8.RuneError.
+func nextRune(b []byte) rune {
+	r, _ := utf8.DecodeRune(b)
+	return r
+}
+
+func (r *Reader) readRecord(dst []string) ([]string, error) {
+	if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
+		return nil, errInvalidDelim
+	}
+
+	// Read line (automatically skipping past empty lines and any comments).
+	var line, fullLine []byte
+	var errRead error
+	for errRead == nil {
+		line, errRead = r.readLine()
+		if r.Comment != 0 && nextRune(line) == r.Comment {
+			line = nil
+			continue // Skip comment lines
+		}
+		if errRead == nil && len(line) == lengthNL(line) {
+			line = nil
+			continue // Skip empty lines
+		}
+		fullLine = line
+		break
+	}
+	if errRead == io.EOF {
+		return nil, errRead
+	}
+
+	// Parse each field in the record.
+	var err error
+	const quoteLen = len(`"`)
+	commaLen := utf8.RuneLen(r.Comma)
+	recLine := r.numLine // Starting line for record
+	r.recordBuffer = r.recordBuffer[:0]
+	r.fieldIndexes = r.fieldIndexes[:0]
+parseField:
+	for {
+		if r.TrimLeadingSpace {
+			line = bytes.TrimLeftFunc(line, unicode.IsSpace)
+		}
+		if len(line) == 0 || line[0] != '"' {
+			// Non-quoted string field
+			i := bytes.IndexRune(line, r.Comma)
+			field := line
+			if i >= 0 {
+				field = field[:i]
+			} else {
+				field = field[:len(field)-lengthNL(field)]
+			}
+			// Check to make sure a quote does not appear in field.
+			if !r.LazyQuotes {
+				if j := bytes.IndexByte(field, '"'); j >= 0 {
+					col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
+					err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
+					break parseField
+				}
+			}
+			r.recordBuffer = append(r.recordBuffer, field...)
+			r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+			if i >= 0 {
+				line = line[i+commaLen:]
+				continue parseField
+			}
+			break parseField
+		} else {
+			// Quoted string field
+			line = line[quoteLen:]
+			for {
+				i := bytes.IndexByte(line, '"')
+				if i >= 0 {
+					// Hit next quote.
+					r.recordBuffer = append(r.recordBuffer, line[:i]...)
+					line = line[i+quoteLen:]
+					switch rn := nextRune(line); {
+					case rn == '"':
+						// `""` sequence (append quote).
+						r.recordBuffer = append(r.recordBuffer, '"')
+						line = line[quoteLen:]
+					case rn == r.Comma:
+						// `",` sequence (end of field).
+						line = line[commaLen:]
+						r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+						continue parseField
+					case lengthNL(line) == len(line):
+						// `"\n` sequence (end of line).
+						r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+						break parseField
+					case r.LazyQuotes:
+						// `"` sequence (bare quote).
+						r.recordBuffer = append(r.recordBuffer, '"')
+					default:
+						// `"*` sequence (invalid non-escaped quote).
+						col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])
+						err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
+						break parseField
+					}
+				} else if len(line) > 0 {
+					// Hit end of line (copy all data so far).
+					r.recordBuffer = append(r.recordBuffer, line...)
+					if errRead != nil {
+						break parseField
+					}
+					line, errRead = r.readLine()
+					if errRead == io.EOF {
+						errRead = nil
+					}
+					fullLine = line
+				} else {
+					// Abrupt end of file (EOF or error).
+					if !r.LazyQuotes && errRead == nil {
+						col := utf8.RuneCount(fullLine)
+						err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
+						break parseField
+					}
+					r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
+					break parseField
+				}
+			}
+		}
+	}
+	if err == nil {
+		err = errRead
+	}
+
+	// Create a single string and create slices out of it.
+	// This pins the memory of the fields together, but allocates once.
+	str := string(r.recordBuffer) // Convert to string once to batch allocations
+	dst = dst[:0]
+	if cap(dst) < len(r.fieldIndexes) {
+		dst = make([]string, len(r.fieldIndexes))
+	}
+	dst = dst[:len(r.fieldIndexes)]
+	var preIdx int
+	for i, idx := range r.fieldIndexes {
+		dst[i] = str[preIdx:idx]
+		preIdx = idx
+	}
+
+	// Check or update the expected fields per record.
 	if r.FieldsPerRecord > 0 {
-		if len(record) != r.FieldsPerRecord {
-			r.column = 0 // report at start of record
-			return record, r.error(ErrFieldCount)
+		if len(dst) != r.FieldsPerRecord && err == nil {
+			err = &ParseError{StartLine: recLine, Line: recLine, Err: ErrFieldCount}
 		}
 	} else if r.FieldsPerRecord == 0 {
-		r.FieldsPerRecord = len(record)
+		r.FieldsPerRecord = len(dst)
 	}
-	return record, nil
-}
-
-// readRune reads one rune from r, folding \r\n to \n and keeping track
-// of how far into the line we have read.  r.column will point to the start
-// of this rune, not the end of this rune.
-func (r *Reader) readRune() (rune, error) {
-	r1, _, err := r.r.ReadRune()
-
-	// Handle \r\n here. We make the simplifying assumption that
-	// anytime \r is followed by \n that it can be folded to \n.
-	// We will not detect files which contain both \r\n and bare \n.
-	if r1 == '\r' {
-		r1, _, err = r.r.ReadRune()
-		if err == nil {
-			if r1 != '\n' {
-				r.r.UnreadRune()
-				r1 = '\r'
-			}
-		}
-	}
-	r.column++
-	return r1, err
-}
-
-// skip reads runes up to and including the rune delim or until error.
-func (r *Reader) skip(delim rune) error {
-	for {
-		r1, err := r.readRune()
-		if err != nil {
-			return err
-		}
-		if r1 == delim {
-			return nil
-		}
-	}
-}
-
-// parseRecord reads and parses a single csv record from r.
-// If dst has enough capacity it will be used for the returned fields.
-func (r *Reader) parseRecord(dst []string) (fields []string, err error) {
-	// Each record starts on a new line. We increment our line
-	// number (lines start at 1, not 0) and set column to -1
-	// so as we increment in readRune it points to the character we read.
-	r.line++
-	r.column = -1
-
-	// Peek at the first rune. If it is an error we are done.
-	// If we support comments and it is the comment character
-	// then skip to the end of line.
-
-	r1, _, err := r.r.ReadRune()
-	if err != nil {
-		return nil, err
-	}
-
-	if r.Comment != 0 && r1 == r.Comment {
-		return nil, r.skip('\n')
-	}
-	r.r.UnreadRune()
-
-	r.lineBuffer.Reset()
-	r.fieldIndexes = r.fieldIndexes[:0]
-
-	// At this point we have at least one field.
-	for {
-		idx := r.lineBuffer.Len()
-
-		haveField, delim, err := r.parseField()
-		if haveField {
-			r.fieldIndexes = append(r.fieldIndexes, idx)
-		}
-
-		if delim == '\n' || err == io.EOF {
-			if len(r.fieldIndexes) == 0 {
-				return nil, err
-			}
-			break
-		}
-
-		if err != nil {
-			return nil, err
-		}
-	}
-
-	fieldCount := len(r.fieldIndexes)
-	// Using this approach (creating a single string and taking slices of it)
-	// means that a single reference to any of the fields will retain the whole
-	// string. The risk of a nontrivial space leak caused by this is considered
-	// minimal and a tradeoff for better performance through the combined
-	// allocations.
-	line := r.lineBuffer.String()
-
-	if cap(dst) >= fieldCount {
-		fields = dst[:fieldCount]
-	} else {
-		fields = make([]string, fieldCount)
-	}
-
-	for i, idx := range r.fieldIndexes {
-		if i == fieldCount-1 {
-			fields[i] = line[idx:]
-		} else {
-			fields[i] = line[idx:r.fieldIndexes[i+1]]
-		}
-	}
-
-	return fields, nil
-}
-
-// parseField parses the next field in the record. The read field is
-// appended to r.lineBuffer. Delim is the first character not part of the field
-// (r.Comma or '\n').
-func (r *Reader) parseField() (haveField bool, delim rune, err error) {
-	r1, err := r.readRune()
-	for err == nil && r.TrimLeadingSpace && r1 != '\n' && unicode.IsSpace(r1) {
-		r1, err = r.readRune()
-	}
-
-	if err == io.EOF && r.column != 0 {
-		return true, 0, err
-	}
-	if err != nil {
-		return false, 0, err
-	}
-
-	switch r1 {
-	case r.Comma:
-		// will check below
-
-	case '\n':
-		// We are a trailing empty field or a blank line
-		if r.column == 0 {
-			return false, r1, nil
-		}
-		return true, r1, nil
-
-	case '"':
-		// quoted field
-	Quoted:
-		for {
-			r1, err = r.readRune()
-			if err != nil {
-				if err == io.EOF {
-					if r.LazyQuotes {
-						return true, 0, err
-					}
-					return false, 0, r.error(ErrQuote)
-				}
-				return false, 0, err
-			}
-			switch r1 {
-			case '"':
-				r1, err = r.readRune()
-				if err != nil || r1 == r.Comma {
-					break Quoted
-				}
-				if r1 == '\n' {
-					return true, r1, nil
-				}
-				if r1 != '"' {
-					if !r.LazyQuotes {
-						r.column--
-						return false, 0, r.error(ErrQuote)
-					}
-					// accept the bare quote
-					r.lineBuffer.WriteRune('"')
-				}
-			case '\n':
-				r.line++
-				r.column = -1
-			}
-			r.lineBuffer.WriteRune(r1)
-		}
-
-	default:
-		// unquoted field
-		for {
-			r.lineBuffer.WriteRune(r1)
-			r1, err = r.readRune()
-			if err != nil || r1 == r.Comma {
-				break
-			}
-			if r1 == '\n' {
-				return true, r1, nil
-			}
-			if !r.LazyQuotes && r1 == '"' {
-				return false, 0, r.error(ErrBareQuote)
-			}
-		}
-	}
-
-	if err != nil {
-		if err == io.EOF {
-			return true, 0, err
-		}
-		return false, 0, err
-	}
-
-	return true, r1, nil
+	return dst, err
 }

diff --git a/src/encoding/csv/reader_test.go b/src/encoding/csv/reader_test.go
index 5ab1b61..1fc69f9 100644
--- a/src/encoding/csv/reader_test.go
+++ b/src/encoding/csv/reader_test.go

@@ -9,45 +9,38 @@
 	"reflect"
 	"strings"
 	"testing"
+	"unicode/utf8"
 )
 
-var readTests = []struct {
-	Name               string
-	Input              string
-	Output             [][]string
-	UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
+func TestRead(t *testing.T) {
+	tests := []struct {
+		Name   string
+		Input  string
+		Output [][]string
+		Error  error
 
-	// These fields are copied into the Reader
-	Comma            rune
-	Comment          rune
-	FieldsPerRecord  int
-	LazyQuotes       bool
-	TrailingComma    bool
-	TrimLeadingSpace bool
-	ReuseRecord      bool
-
-	Error  string
-	Line   int // Expected error line if != 0
-	Column int // Expected error column if line != 0
-}{
-	{
+		// These fields are copied into the Reader
+		Comma              rune
+		Comment            rune
+		UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
+		FieldsPerRecord    int
+		LazyQuotes         bool
+		TrimLeadingSpace   bool
+		ReuseRecord        bool
+	}{{
 		Name:   "Simple",
 		Input:  "a,b,c\n",
 		Output: [][]string{{"a", "b", "c"}},
-	},
-	{
+	}, {
 		Name:   "CRLF",
 		Input:  "a,b\r\nc,d\r\n",
 		Output: [][]string{{"a", "b"}, {"c", "d"}},
-	},
-	{
+	}, {
 		Name:   "BareCR",
 		Input:  "a,b\rc,d\r\n",
 		Output: [][]string{{"a", "b\rc", "d"}},
-	},
-	{
-		Name:               "RFC4180test",
-		UseFieldsPerRecord: true,
+	}, {
+		Name: "RFC4180test",
 		Input: `#field1,field2,field3
 "aaa","bb
 b","ccc"
@@ -60,163 +53,139 @@
 			{"a,a", `b"bb`, "ccc"},
 			{"zzz", "yyy", "xxx"},
 		},
-	},
-	{
+		UseFieldsPerRecord: true,
+		FieldsPerRecord:    0,
+	}, {
 		Name:   "NoEOLTest",
 		Input:  "a,b,c",
 		Output: [][]string{{"a", "b", "c"}},
-	},
-	{
+	}, {
 		Name:   "Semicolon",
-		Comma:  ';',
 		Input:  "a;b;c\n",
 		Output: [][]string{{"a", "b", "c"}},
-	},
-	{
+		Comma:  ';',
+	}, {
 		Name: "MultiLine",
 		Input: `"two
 line","one line","three
 line
 field"`,
 		Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
-	},
-	{
+	}, {
 		Name:  "BlankLine",
 		Input: "a,b,c\n\nd,e,f\n\n",
 		Output: [][]string{
 			{"a", "b", "c"},
 			{"d", "e", "f"},
 		},
-	},
-	{
-		Name:               "BlankLineFieldCount",
-		Input:              "a,b,c\n\nd,e,f\n\n",
-		UseFieldsPerRecord: true,
+	}, {
+		Name:  "BlankLineFieldCount",
+		Input: "a,b,c\n\nd,e,f\n\n",
 		Output: [][]string{
 			{"a", "b", "c"},
 			{"d", "e", "f"},
 		},
-	},
-	{
+		UseFieldsPerRecord: true,
+		FieldsPerRecord:    0,
+	}, {
 		Name:             "TrimSpace",
 		Input:            " a,  b,   c\n",
-		TrimLeadingSpace: true,
 		Output:           [][]string{{"a", "b", "c"}},
-	},
-	{
+		TrimLeadingSpace: true,
+	}, {
 		Name:   "LeadingSpace",
 		Input:  " a,  b,   c\n",
 		Output: [][]string{{" a", "  b", "   c"}},
-	},
-	{
+	}, {
 		Name:    "Comment",
-		Comment: '#',
 		Input:   "#1,2,3\na,b,c\n#comment",
 		Output:  [][]string{{"a", "b", "c"}},
-	},
-	{
+		Comment: '#',
+	}, {
 		Name:   "NoComment",
 		Input:  "#1,2,3\na,b,c",
 		Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
-	},
-	{
+	}, {
 		Name:       "LazyQuotes",
-		LazyQuotes: true,
 		Input:      `a "word","1"2",a","b`,
 		Output:     [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
-	},
-	{
-		Name:       "BareQuotes",
 		LazyQuotes: true,
+	}, {
+		Name:       "BareQuotes",
 		Input:      `a "word","1"2",a"`,
 		Output:     [][]string{{`a "word"`, `1"2`, `a"`}},
-	},
-	{
-		Name:       "BareDoubleQuotes",
 		LazyQuotes: true,
+	}, {
+		Name:       "BareDoubleQuotes",
 		Input:      `a""b,c`,
 		Output:     [][]string{{`a""b`, `c`}},
-	},
-	{
+		LazyQuotes: true,
+	}, {
 		Name:  "BadDoubleQuotes",
 		Input: `a""b,c`,
-		Error: `bare " in non-quoted-field`, Line: 1, Column: 1,
-	},
-	{
+		Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
+	}, {
 		Name:             "TrimQuote",
 		Input:            ` "a"," b",c`,
-		TrimLeadingSpace: true,
 		Output:           [][]string{{"a", " b", "c"}},
-	},
-	{
+		TrimLeadingSpace: true,
+	}, {
 		Name:  "BadBareQuote",
 		Input: `a "word","b"`,
-		Error: `bare " in non-quoted-field`, Line: 1, Column: 2,
-	},
-	{
+		Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote},
+	}, {
 		Name:  "BadTrailingQuote",
 		Input: `"a word",b"`,
-		Error: `bare " in non-quoted-field`, Line: 1, Column: 10,
-	},
-	{
+		Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote},
+	}, {
 		Name:  "ExtraneousQuote",
 		Input: `"a "word","b"`,
-		Error: `extraneous " in field`, Line: 1, Column: 3,
-	},
-	{
+		Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote},
+	}, {
 		Name:               "BadFieldCount",
-		UseFieldsPerRecord: true,
 		Input:              "a,b,c\nd,e",
-		Error:              "wrong number of fields", Line: 2,
-	},
-	{
+		Error:              &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount},
+		UseFieldsPerRecord: true,
+		FieldsPerRecord:    0,
+	}, {
 		Name:               "BadFieldCount1",
+		Input:              `a,b,c`,
+		Error:              &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
 		UseFieldsPerRecord: true,
 		FieldsPerRecord:    2,
-		Input:              `a,b,c`,
-		Error:              "wrong number of fields", Line: 1,
-	},
-	{
+	}, {
 		Name:   "FieldCount",
 		Input:  "a,b,c\nd,e",
 		Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
-	},
-	{
+	}, {
 		Name:   "TrailingCommaEOF",
 		Input:  "a,b,c,",
 		Output: [][]string{{"a", "b", "c", ""}},
-	},
-	{
+	}, {
 		Name:   "TrailingCommaEOL",
 		Input:  "a,b,c,\n",
 		Output: [][]string{{"a", "b", "c", ""}},
-	},
-	{
+	}, {
 		Name:             "TrailingCommaSpaceEOF",
-		TrimLeadingSpace: true,
 		Input:            "a,b,c, ",
 		Output:           [][]string{{"a", "b", "c", ""}},
-	},
-	{
-		Name:             "TrailingCommaSpaceEOL",
 		TrimLeadingSpace: true,
+	}, {
+		Name:             "TrailingCommaSpaceEOL",
 		Input:            "a,b,c, \n",
 		Output:           [][]string{{"a", "b", "c", ""}},
-	},
-	{
-		Name:             "TrailingCommaLine3",
 		TrimLeadingSpace: true,
+	}, {
+		Name:             "TrailingCommaLine3",
 		Input:            "a,b,c\nd,e,f\ng,hi,",
 		Output:           [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
-	},
-	{
+		TrimLeadingSpace: true,
+	}, {
 		Name:   "NotTrailingComma3",
 		Input:  "a,b,c, \n",
 		Output: [][]string{{"a", "b", "c", " "}},
-	},
-	{
-		Name:          "CommaFieldTest",
-		TrailingComma: true,
+	}, {
+		Name: "CommaFieldTest",
 		Input: `x,y,z,w
 x,y,z,
 x,y,,
@@ -240,67 +209,201 @@
 			{"x", "", "", ""},
 			{"", "", "", ""},
 		},
-	},
-	{
-		Name:             "TrailingCommaIneffective1",
-		TrailingComma:    true,
-		TrimLeadingSpace: true,
-		Input:            "a,b,\nc,d,e",
+	}, {
+		Name:  "TrailingCommaIneffective1",
+		Input: "a,b,\nc,d,e",
 		Output: [][]string{
 			{"a", "b", ""},
 			{"c", "d", "e"},
 		},
-	},
-	{
-		Name:             "TrailingCommaIneffective2",
-		TrailingComma:    false,
 		TrimLeadingSpace: true,
-		Input:            "a,b,\nc,d,e",
-		Output: [][]string{
-			{"a", "b", ""},
-			{"c", "d", "e"},
-		},
-	},
-	{
-		Name:        "ReadAllReuseRecord",
-		ReuseRecord: true,
-		Input:       "a,b\nc,d",
+	}, {
+		Name:  "ReadAllReuseRecord",
+		Input: "a,b\nc,d",
 		Output: [][]string{
 			{"a", "b"},
 			{"c", "d"},
 		},
-	},
-}
+		ReuseRecord: true,
+	}, {
+		Name:  "StartLine1", // Issue 19019
+		Input: "a,\"b\nc\"d,e",
+		Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
+	}, {
+		Name:  "StartLine2",
+		Input: "a,b\n\"d\n\n,e",
+		Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
+	}, {
+		Name:  "CRLFInQuotedField", // Issue 21201
+		Input: "A,\"Hello\r\nHi\",B\r\n",
+		Output: [][]string{
+			{"A", "Hello\nHi", "B"},
+		},
+	}, {
+		Name:   "BinaryBlobField", // Issue 19410
+		Input:  "x09\x41\xb4\x1c,aktau",
+		Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
+	}, {
+		Name:   "TrailingCR",
+		Input:  "field1,field2\r",
+		Output: [][]string{{"field1", "field2"}},
+	}, {
+		Name:   "QuotedTrailingCR",
+		Input:  "\"field\"\r",
+		Output: [][]string{{"field"}},
+	}, {
+		Name:  "QuotedTrailingCRCR",
+		Input: "\"field\"\r\r",
+		Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
+	}, {
+		Name:   "FieldCR",
+		Input:  "field\rfield\r",
+		Output: [][]string{{"field\rfield"}},
+	}, {
+		Name:   "FieldCRCR",
+		Input:  "field\r\rfield\r\r",
+		Output: [][]string{{"field\r\rfield\r"}},
+	}, {
+		Name:   "FieldCRCRLF",
+		Input:  "field\r\r\nfield\r\r\n",
+		Output: [][]string{{"field\r"}, {"field\r"}},
+	}, {
+		Name:   "FieldCRCRLFCR",
+		Input:  "field\r\r\n\rfield\r\r\n\r",
+		Output: [][]string{{"field\r"}, {"\rfield\r"}},
+	}, {
+		Name:   "FieldCRCRLFCRCR",
+		Input:  "field\r\r\n\r\rfield\r\r\n\r\r",
+		Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
+	}, {
+		Name:  "MultiFieldCRCRLFCRCR",
+		Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
+		Output: [][]string{
+			{"field1", "field2\r"},
+			{"\r\rfield1", "field2\r"},
+			{"\r\r", ""},
+		},
+	}, {
+		Name:             "NonASCIICommaAndComment",
+		Input:            "a£b,c£ \td,e\n€ comment\n",
+		Output:           [][]string{{"a", "b,c", "d,e"}},
+		TrimLeadingSpace: true,
+		Comma:            '£',
+		Comment:          '€',
+	}, {
+		Name:    "NonASCIICommaAndCommentWithQuotes",
+		Input:   "a€\"  b,\"€ c\nλ comment\n",
+		Output:  [][]string{{"a", "  b,", " c"}},
+		Comma:   '€',
+		Comment: 'λ',
+	}, {
+		// λ and θ start with the same byte.
+		// This tests that the parser doesn't confuse such characters.
+		Name:    "NonASCIICommaConfusion",
+		Input:   "\"abθcd\"λefθgh",
+		Output:  [][]string{{"abθcd", "efθgh"}},
+		Comma:   'λ',
+		Comment: '€',
+	}, {
+		Name:    "NonASCIICommentConfusion",
+		Input:   "λ\nλ\nθ\nλ\n",
+		Output:  [][]string{{"λ"}, {"λ"}, {"λ"}},
+		Comment: 'θ',
+	}, {
+		Name:   "QuotedFieldMultipleLF",
+		Input:  "\"\n\n\n\n\"",
+		Output: [][]string{{"\n\n\n\n"}},
+	}, {
+		Name:  "MultipleCRLF",
+		Input: "\r\n\r\n\r\n\r\n",
+	}, {
+		// The implementation may read each line in several chunks if it doesn't fit entirely
+		// in the read buffer, so we should test the code to handle that condition.
+		Name:    "HugeLines",
+		Input:   strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
+		Output:  [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
+		Comment: '#',
+	}, {
+		Name:  "QuoteWithTrailingCRLF",
+		Input: "\"foo\"bar\"\r\n",
+		Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
+	}, {
+		Name:       "LazyQuoteWithTrailingCRLF",
+		Input:      "\"foo\"bar\"\r\n",
+		Output:     [][]string{{`foo"bar`}},
+		LazyQuotes: true,
+	}, {
+		Name:   "DoubleQuoteWithTrailingCRLF",
+		Input:  "\"foo\"\"bar\"\r\n",
+		Output: [][]string{{`foo"bar`}},
+	}, {
+		Name:   "EvenQuotes",
+		Input:  `""""""""`,
+		Output: [][]string{{`"""`}},
+	}, {
+		Name:  "OddQuotes",
+		Input: `"""""""`,
+		Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote},
+	}, {
+		Name:       "LazyOddQuotes",
+		Input:      `"""""""`,
+		Output:     [][]string{{`"""`}},
+		LazyQuotes: true,
+	}, {
+		Name:  "BadComma1",
+		Comma: '\n',
+		Error: errInvalidDelim,
+	}, {
+		Name:  "BadComma2",
+		Comma: '\r',
+		Error: errInvalidDelim,
+	}, {
+		Name:  "BadComma3",
+		Comma: utf8.RuneError,
+		Error: errInvalidDelim,
+	}, {
+		Name:    "BadComment1",
+		Comment: '\n',
+		Error:   errInvalidDelim,
+	}, {
+		Name:    "BadComment2",
+		Comment: '\r',
+		Error:   errInvalidDelim,
+	}, {
+		Name:    "BadComment3",
+		Comment: utf8.RuneError,
+		Error:   errInvalidDelim,
+	}, {
+		Name:    "BadCommaComment",
+		Comma:   'X',
+		Comment: 'X',
+		Error:   errInvalidDelim,
+	}}
 
-func TestRead(t *testing.T) {
-	for _, tt := range readTests {
-		r := NewReader(strings.NewReader(tt.Input))
-		r.Comment = tt.Comment
-		if tt.UseFieldsPerRecord {
-			r.FieldsPerRecord = tt.FieldsPerRecord
-		} else {
-			r.FieldsPerRecord = -1
-		}
-		r.LazyQuotes = tt.LazyQuotes
-		r.TrailingComma = tt.TrailingComma
-		r.TrimLeadingSpace = tt.TrimLeadingSpace
-		r.ReuseRecord = tt.ReuseRecord
-		if tt.Comma != 0 {
-			r.Comma = tt.Comma
-		}
-		out, err := r.ReadAll()
-		perr, _ := err.(*ParseError)
-		if tt.Error != "" {
-			if err == nil || !strings.Contains(err.Error(), tt.Error) {
-				t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
-			} else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) {
-				t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column)
+	for _, tt := range tests {
+		t.Run(tt.Name, func(t *testing.T) {
+			r := NewReader(strings.NewReader(tt.Input))
+
+			if tt.Comma != 0 {
+				r.Comma = tt.Comma
 			}
-		} else if err != nil {
-			t.Errorf("%s: unexpected error %v", tt.Name, err)
-		} else if !reflect.DeepEqual(out, tt.Output) {
-			t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output)
-		}
+			r.Comment = tt.Comment
+			if tt.UseFieldsPerRecord {
+				r.FieldsPerRecord = tt.FieldsPerRecord
+			} else {
+				r.FieldsPerRecord = -1
+			}
+			r.LazyQuotes = tt.LazyQuotes
+			r.TrimLeadingSpace = tt.TrimLeadingSpace
+			r.ReuseRecord = tt.ReuseRecord
+
+			out, err := r.ReadAll()
+			if !reflect.DeepEqual(err, tt.Error) {
+				t.Errorf("ReadAll() error:\ngot  %v\nwant %v", err, tt.Error)
+			} else if !reflect.DeepEqual(out, tt.Output) {
+				t.Errorf("ReadAll() output:\ngot  %q\nwant %q", out, tt.Output)
+			}
+		})
 	}
 }
 

diff --git a/src/encoding/csv/writer.go b/src/encoding/csv/writer.go
index 84b7aa1..ef3594e 100644
--- a/src/encoding/csv/writer.go
+++ b/src/encoding/csv/writer.go

@@ -20,7 +20,7 @@
 //
 // Comma is the field delimiter.
 //
-// If UseCRLF is true, the Writer ends each record with \r\n instead of \n.
+// If UseCRLF is true, the Writer ends each output line with \r\n instead of \n.
 type Writer struct {
 	Comma   rune // Field delimiter (set to ',' by NewWriter)
 	UseCRLF bool // True to use \r\n as the line terminator
@@ -38,6 +38,10 @@
 // Writer writes a single CSV record to w along with any necessary quoting.
 // A record is a slice of strings with each string being one field.
 func (w *Writer) Write(record []string) error {
+	if !validDelim(w.Comma) {
+		return errInvalidDelim
+	}
+
 	for n, field := range record {
 		if n > 0 {
 			if _, err := w.w.WriteRune(w.Comma); err != nil {
commit	e1b3b188ff03c41213dca621c46c39702d79f3ee	[log] [tgz]
author	Dan Willemsen <[email protected]>	Tue Feb 27 19:36:27 2018 -0800
committer	Dan Willemsen <[email protected]>	Tue Feb 27 19:36:27 2018 -0800
tree	67ab03d7ee51c5fafa1602c28e0d9033a8e3f26d
parent	3c27c3f6c42dbb890c67820f2897812e16f01be1 [diff]