updated vendor
This commit is contained in:
+107
-102
@@ -15,89 +15,6 @@ import (
|
||||
"io"
|
||||
)
|
||||
|
||||
func formatReaderError(problem string, offset int, value int) error {
|
||||
return ReaderError{
|
||||
Offset: offset,
|
||||
Value: value,
|
||||
Err: errors.New(problem),
|
||||
}
|
||||
}
|
||||
|
||||
// Byte order marks.
|
||||
const (
|
||||
bom_UTF8 = "\xef\xbb\xbf"
|
||||
bom_UTF16LE = "\xff\xfe"
|
||||
bom_UTF16BE = "\xfe\xff"
|
||||
)
|
||||
|
||||
// Determine the input stream encoding by checking the BOM symbol. If no BOM is
|
||||
// found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure.
|
||||
func (parser *Parser) determineEncoding() error {
|
||||
// Ensure that we had enough bytes in the raw buffer.
|
||||
for !parser.eof && len(parser.raw_buffer)-parser.raw_buffer_pos < 3 {
|
||||
if err := parser.updateRawBuffer(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Determine the encoding.
|
||||
buf := parser.raw_buffer
|
||||
pos := parser.raw_buffer_pos
|
||||
avail := len(buf) - pos
|
||||
if avail >= 2 && buf[pos] == bom_UTF16LE[0] && buf[pos+1] == bom_UTF16LE[1] {
|
||||
parser.encoding = UTF16LE_ENCODING
|
||||
parser.raw_buffer_pos += 2
|
||||
parser.offset += 2
|
||||
} else if avail >= 2 && buf[pos] == bom_UTF16BE[0] && buf[pos+1] == bom_UTF16BE[1] {
|
||||
parser.encoding = UTF16BE_ENCODING
|
||||
parser.raw_buffer_pos += 2
|
||||
parser.offset += 2
|
||||
} else if avail >= 3 && buf[pos] == bom_UTF8[0] && buf[pos+1] == bom_UTF8[1] && buf[pos+2] == bom_UTF8[2] {
|
||||
parser.encoding = UTF8_ENCODING
|
||||
parser.raw_buffer_pos += 3
|
||||
parser.offset += 3
|
||||
} else {
|
||||
parser.encoding = UTF8_ENCODING
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update the raw buffer.
|
||||
func (parser *Parser) updateRawBuffer() error {
|
||||
size_read := 0
|
||||
|
||||
// Return if the raw buffer is full.
|
||||
if parser.raw_buffer_pos == 0 && len(parser.raw_buffer) == cap(parser.raw_buffer) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Return on EOF.
|
||||
if parser.eof {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Move the remaining bytes in the raw buffer to the beginning.
|
||||
if parser.raw_buffer_pos > 0 && parser.raw_buffer_pos < len(parser.raw_buffer) {
|
||||
copy(parser.raw_buffer, parser.raw_buffer[parser.raw_buffer_pos:])
|
||||
}
|
||||
parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)-parser.raw_buffer_pos]
|
||||
parser.raw_buffer_pos = 0
|
||||
|
||||
// Call the read handler to fill the buffer.
|
||||
size_read, err := parser.read_handler(parser, parser.raw_buffer[len(parser.raw_buffer):cap(parser.raw_buffer)])
|
||||
parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)+size_read]
|
||||
if err == io.EOF {
|
||||
parser.eof = true
|
||||
} else if err != nil {
|
||||
return ReaderError{
|
||||
Offset: parser.offset,
|
||||
Value: -1,
|
||||
Err: fmt.Errorf("input error: %w", err),
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Ensure that the buffer contains at least `length` characters.
|
||||
// Return true on success, false on failure.
|
||||
//
|
||||
@@ -107,9 +24,10 @@ func (parser *Parser) updateBuffer(length int) error {
|
||||
panic("read handler must be set")
|
||||
}
|
||||
|
||||
// [Go] This function was changed to guarantee the requested length size at EOF.
|
||||
// The fact we need to do this is pretty awful, but the description above implies
|
||||
// for that to be the case, and there are tests
|
||||
// [Go] This function was changed to guarantee the requested length
|
||||
// size at EOF.
|
||||
// The fact we need to do this is pretty awful, but the description
|
||||
// above implies for that to be the case, and there are tests
|
||||
|
||||
// If the EOF flag is set and the raw buffer is empty, do nothing.
|
||||
//
|
||||
@@ -205,8 +123,8 @@ func (parser *Parser) updateBuffer(length int) error {
|
||||
default:
|
||||
// The leading octet is invalid.
|
||||
return formatReaderError(
|
||||
"invalid leading UTF-8 octet",
|
||||
parser.offset, int(octet))
|
||||
fmt.Sprintf("invalid leading UTF-8 octet (value: %d)", octet),
|
||||
Mark{Index: parser.offset})
|
||||
}
|
||||
|
||||
// Check if the raw buffer contains an incomplete character.
|
||||
@@ -214,7 +132,7 @@ func (parser *Parser) updateBuffer(length int) error {
|
||||
if parser.eof {
|
||||
return formatReaderError(
|
||||
"incomplete UTF-8 octet sequence",
|
||||
parser.offset, -1)
|
||||
Mark{Index: parser.offset})
|
||||
}
|
||||
break inner
|
||||
}
|
||||
@@ -240,8 +158,8 @@ func (parser *Parser) updateBuffer(length int) error {
|
||||
// Check if the octet is valid.
|
||||
if (octet & 0xC0) != 0x80 {
|
||||
return formatReaderError(
|
||||
"invalid trailing UTF-8 octet",
|
||||
parser.offset+k, int(octet))
|
||||
fmt.Sprintf("invalid trailing UTF-8 octet (value: %d)", octet),
|
||||
Mark{Index: parser.offset + k})
|
||||
}
|
||||
|
||||
// Decode the octet.
|
||||
@@ -257,14 +175,14 @@ func (parser *Parser) updateBuffer(length int) error {
|
||||
default:
|
||||
return formatReaderError(
|
||||
"invalid length of a UTF-8 sequence",
|
||||
parser.offset, -1)
|
||||
Mark{Index: parser.offset})
|
||||
}
|
||||
|
||||
// Check the range of the value.
|
||||
if value >= 0xD800 && value <= 0xDFFF || value > 0x10FFFF {
|
||||
return formatReaderError(
|
||||
"invalid Unicode character",
|
||||
parser.offset, int(value))
|
||||
fmt.Sprintf("invalid Unicode character (value: %d)", value),
|
||||
Mark{Index: parser.offset})
|
||||
}
|
||||
|
||||
case UTF16LE_ENCODING, UTF16BE_ENCODING:
|
||||
@@ -304,7 +222,7 @@ func (parser *Parser) updateBuffer(length int) error {
|
||||
if parser.eof {
|
||||
return formatReaderError(
|
||||
"incomplete UTF-16 character",
|
||||
parser.offset, -1)
|
||||
Mark{Index: parser.offset})
|
||||
}
|
||||
break inner
|
||||
}
|
||||
@@ -316,8 +234,8 @@ func (parser *Parser) updateBuffer(length int) error {
|
||||
// Check for unexpected low surrogate area.
|
||||
if value&0xFC00 == 0xDC00 {
|
||||
return formatReaderError(
|
||||
"unexpected low surrogate area",
|
||||
parser.offset, int(value))
|
||||
fmt.Sprintf("unexpected low surrogate area (value: %d)", value),
|
||||
Mark{Index: parser.offset})
|
||||
}
|
||||
|
||||
// Check for a high surrogate area.
|
||||
@@ -329,7 +247,7 @@ func (parser *Parser) updateBuffer(length int) error {
|
||||
if parser.eof {
|
||||
return formatReaderError(
|
||||
"incomplete UTF-16 surrogate pair",
|
||||
parser.offset, -1)
|
||||
Mark{Index: parser.offset})
|
||||
}
|
||||
break inner
|
||||
}
|
||||
@@ -341,8 +259,8 @@ func (parser *Parser) updateBuffer(length int) error {
|
||||
// Check for a low surrogate area.
|
||||
if value2&0xFC00 != 0xDC00 {
|
||||
return formatReaderError(
|
||||
"expected low surrogate area",
|
||||
parser.offset+2, int(value2))
|
||||
fmt.Sprintf("expected low surrogate area (value: %d)", value2),
|
||||
Mark{Index: parser.offset + 2})
|
||||
}
|
||||
|
||||
// Generate the value of the surrogate pair.
|
||||
@@ -383,8 +301,8 @@ func (parser *Parser) updateBuffer(length int) error {
|
||||
case value >= 0x10000 && value <= 0x10FFFF:
|
||||
default:
|
||||
return formatReaderError(
|
||||
"control characters are not allowed",
|
||||
parser.offset, int(value))
|
||||
fmt.Sprintf("control characters are not allowed (value: %d)", value),
|
||||
Mark{Index: parser.offset})
|
||||
}
|
||||
|
||||
// Move the raw pointers.
|
||||
@@ -439,3 +357,90 @@ func (parser *Parser) updateBuffer(length int) error {
|
||||
parser.buffer = parser.buffer[:buffer_len]
|
||||
return nil
|
||||
}
|
||||
|
||||
// Byte order marks for UTF-8, UTF-16LE, and UTF-16BE encodings.
|
||||
const (
|
||||
bom_UTF8 = "\xef\xbb\xbf"
|
||||
bom_UTF16LE = "\xff\xfe"
|
||||
bom_UTF16BE = "\xfe\xff"
|
||||
)
|
||||
|
||||
// Determine the input stream encoding by checking the BOM symbol.
|
||||
// If no BOM is found, the UTF-8 encoding is assumed.
|
||||
// Return 1 on success, 0 on failure.
|
||||
func (parser *Parser) determineEncoding() error {
|
||||
// Ensure that we had enough bytes in the raw buffer.
|
||||
for !parser.eof && len(parser.raw_buffer)-parser.raw_buffer_pos < 3 {
|
||||
if err := parser.updateRawBuffer(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Determine the encoding.
|
||||
buf := parser.raw_buffer
|
||||
pos := parser.raw_buffer_pos
|
||||
avail := len(buf) - pos
|
||||
if avail >= 2 && buf[pos] == bom_UTF16LE[0] && buf[pos+1] == bom_UTF16LE[1] {
|
||||
parser.encoding = UTF16LE_ENCODING
|
||||
parser.raw_buffer_pos += 2
|
||||
parser.offset += 2
|
||||
} else if avail >= 2 && buf[pos] == bom_UTF16BE[0] && buf[pos+1] == bom_UTF16BE[1] {
|
||||
parser.encoding = UTF16BE_ENCODING
|
||||
parser.raw_buffer_pos += 2
|
||||
parser.offset += 2
|
||||
} else if avail >= 3 && buf[pos] == bom_UTF8[0] && buf[pos+1] == bom_UTF8[1] && buf[pos+2] == bom_UTF8[2] {
|
||||
parser.encoding = UTF8_ENCODING
|
||||
parser.raw_buffer_pos += 3
|
||||
parser.offset += 3
|
||||
} else {
|
||||
parser.encoding = UTF8_ENCODING
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update the raw buffer.
|
||||
func (parser *Parser) updateRawBuffer() error {
|
||||
size_read := 0
|
||||
|
||||
// Return if the raw buffer is full.
|
||||
if parser.raw_buffer_pos == 0 && len(parser.raw_buffer) == cap(parser.raw_buffer) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Return on EOF.
|
||||
if parser.eof {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Move the remaining bytes in the raw buffer to the beginning.
|
||||
if parser.raw_buffer_pos > 0 && parser.raw_buffer_pos < len(parser.raw_buffer) {
|
||||
copy(parser.raw_buffer, parser.raw_buffer[parser.raw_buffer_pos:])
|
||||
}
|
||||
parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)-parser.raw_buffer_pos]
|
||||
parser.raw_buffer_pos = 0
|
||||
|
||||
// Call the read handler to fill the buffer.
|
||||
size_read, err := parser.read_handler(parser, parser.raw_buffer[len(parser.raw_buffer):cap(parser.raw_buffer)])
|
||||
parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)+size_read]
|
||||
if err == io.EOF {
|
||||
parser.eof = true
|
||||
} else if err != nil {
|
||||
return &LoadError{
|
||||
Stage: ReaderStage,
|
||||
Message: fmt.Sprintf("input error: %v", err),
|
||||
Mark: Mark{Index: parser.offset},
|
||||
err: err,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// formatReaderError creates a LoadError for reader-stage errors.
|
||||
func formatReaderError(message string, mark Mark) *LoadError {
|
||||
return &LoadError{
|
||||
Stage: ReaderStage,
|
||||
Message: message,
|
||||
Mark: mark,
|
||||
err: errors.New(message),
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user