updated vendor
This commit is contained in:
+538
-266
@@ -4,7 +4,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 AND MIT
|
||||
|
||||
// Parser stage: Transforms token stream into event stream.
|
||||
// Implements a recursive-descent parser (LL(1)) following the YAML grammar specification.
|
||||
// Implements a recursive-descent parser (LL(1)) following the YAML grammar
|
||||
// specification.
|
||||
//
|
||||
// The parser implements the following grammar:
|
||||
//
|
||||
@@ -52,59 +53,213 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Peek the next token in the token queue.
|
||||
func (parser *Parser) peekToken(out **Token) error {
|
||||
if !parser.token_available {
|
||||
if err := parser.fetchMoreTokens(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// ReadHandler is called by the [Parser] when it needs to read more bytes
|
||||
// from the input source. The handler should fill the provided buffer with
|
||||
// up to len(buffer) bytes from the input source.
|
||||
//
|
||||
// The arguments are as follows:
|
||||
//
|
||||
// [in] parser The parser object.
|
||||
// [out] buffer The buffer for reading.
|
||||
// [out] size_read The actual number of bytes read from the source.
|
||||
//
|
||||
// On success, the handler should return 1. If the handler failed,
|
||||
// the returned value should be 0. On EOF, the handler should set the
|
||||
// size_read to 0 and return 1.
|
||||
type ReadHandler func(parser *Parser, buffer []byte) (n int, err error)
|
||||
|
||||
token := &parser.tokens[parser.tokens_head]
|
||||
parser.UnfoldComments(token)
|
||||
*out = token
|
||||
return nil
|
||||
// SimpleKey holds information about a potential simple key.
|
||||
type SimpleKey struct {
|
||||
flow_level int // What flow level is the key at?
|
||||
required bool // Is a simple key required?
|
||||
token_number int // The number of the token.
|
||||
mark Mark // The position mark.
|
||||
}
|
||||
|
||||
// UnfoldComments walks through the comments queue and joins all
|
||||
// comments behind the position of the provided token into the respective
|
||||
// top-level comment slices in the parser.
|
||||
func (parser *Parser) UnfoldComments(token *Token) {
|
||||
for parser.comments_head < len(parser.comments) && token.StartMark.Index >= parser.comments[parser.comments_head].TokenMark.Index {
|
||||
comment := &parser.comments[parser.comments_head]
|
||||
if len(comment.Head) > 0 {
|
||||
if token.Type == BLOCK_END_TOKEN {
|
||||
// No heads on ends, so keep comment.Head for a follow up token.
|
||||
break
|
||||
}
|
||||
if len(parser.HeadComment) > 0 {
|
||||
parser.HeadComment = append(parser.HeadComment, '\n')
|
||||
}
|
||||
parser.HeadComment = append(parser.HeadComment, comment.Head...)
|
||||
}
|
||||
if len(comment.Foot) > 0 {
|
||||
if len(parser.FootComment) > 0 {
|
||||
parser.FootComment = append(parser.FootComment, '\n')
|
||||
}
|
||||
parser.FootComment = append(parser.FootComment, comment.Foot...)
|
||||
}
|
||||
if len(comment.Line) > 0 {
|
||||
if len(parser.LineComment) > 0 {
|
||||
parser.LineComment = append(parser.LineComment, '\n')
|
||||
}
|
||||
parser.LineComment = append(parser.LineComment, comment.Line...)
|
||||
}
|
||||
*comment = Comment{}
|
||||
parser.comments_head++
|
||||
// ParserState represents the state of the parser.
|
||||
type ParserState int
|
||||
|
||||
// Parser state constants define the different states the parser can be in.
|
||||
const (
|
||||
PARSE_STREAM_START_STATE ParserState = iota
|
||||
|
||||
PARSE_IMPLICIT_DOCUMENT_START_STATE // Expect the beginning of an implicit document.
|
||||
PARSE_DOCUMENT_START_STATE // Expect DOCUMENT-START.
|
||||
PARSE_DOCUMENT_CONTENT_STATE // Expect the content of a document.
|
||||
PARSE_DOCUMENT_END_STATE // Expect DOCUMENT-END.
|
||||
PARSE_BLOCK_NODE_STATE // Expect a block node.
|
||||
PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE // Expect the first entry of a block sequence.
|
||||
PARSE_BLOCK_SEQUENCE_ENTRY_STATE // Expect an entry of a block sequence.
|
||||
PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE // Expect an entry of an indentless sequence.
|
||||
PARSE_BLOCK_MAPPING_FIRST_KEY_STATE // Expect the first key of a block mapping.
|
||||
PARSE_BLOCK_MAPPING_KEY_STATE // Expect a block mapping key.
|
||||
PARSE_BLOCK_MAPPING_VALUE_STATE // Expect a block mapping value.
|
||||
PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE // Expect the first entry of a flow sequence.
|
||||
PARSE_FLOW_SEQUENCE_ENTRY_STATE // Expect an entry of a flow sequence.
|
||||
PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE // Expect a key of an ordered mapping.
|
||||
PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE // Expect a value of an ordered mapping.
|
||||
PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE // Expect the and of an ordered mapping entry.
|
||||
PARSE_FLOW_MAPPING_FIRST_KEY_STATE // Expect the first key of a flow mapping.
|
||||
PARSE_FLOW_MAPPING_KEY_STATE // Expect a key of a flow mapping.
|
||||
PARSE_FLOW_MAPPING_VALUE_STATE // Expect a value of a flow mapping.
|
||||
PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE // Expect an empty value of a flow mapping.
|
||||
PARSE_END_STATE // Expect nothing.
|
||||
)
|
||||
|
||||
// String returns a string representation of the parser state.
|
||||
func (ps ParserState) String() string {
|
||||
switch ps {
|
||||
case PARSE_STREAM_START_STATE:
|
||||
return "PARSE_STREAM_START_STATE"
|
||||
case PARSE_IMPLICIT_DOCUMENT_START_STATE:
|
||||
return "PARSE_IMPLICIT_DOCUMENT_START_STATE"
|
||||
case PARSE_DOCUMENT_START_STATE:
|
||||
return "PARSE_DOCUMENT_START_STATE"
|
||||
case PARSE_DOCUMENT_CONTENT_STATE:
|
||||
return "PARSE_DOCUMENT_CONTENT_STATE"
|
||||
case PARSE_DOCUMENT_END_STATE:
|
||||
return "PARSE_DOCUMENT_END_STATE"
|
||||
case PARSE_BLOCK_NODE_STATE:
|
||||
return "PARSE_BLOCK_NODE_STATE"
|
||||
case PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE:
|
||||
return "PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE"
|
||||
case PARSE_BLOCK_SEQUENCE_ENTRY_STATE:
|
||||
return "PARSE_BLOCK_SEQUENCE_ENTRY_STATE"
|
||||
case PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE:
|
||||
return "PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE"
|
||||
case PARSE_BLOCK_MAPPING_FIRST_KEY_STATE:
|
||||
return "PARSE_BLOCK_MAPPING_FIRST_KEY_STATE"
|
||||
case PARSE_BLOCK_MAPPING_KEY_STATE:
|
||||
return "PARSE_BLOCK_MAPPING_KEY_STATE"
|
||||
case PARSE_BLOCK_MAPPING_VALUE_STATE:
|
||||
return "PARSE_BLOCK_MAPPING_VALUE_STATE"
|
||||
case PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE:
|
||||
return "PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE"
|
||||
case PARSE_FLOW_SEQUENCE_ENTRY_STATE:
|
||||
return "PARSE_FLOW_SEQUENCE_ENTRY_STATE"
|
||||
case PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE:
|
||||
return "PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE"
|
||||
case PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE:
|
||||
return "PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE"
|
||||
case PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE:
|
||||
return "PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE"
|
||||
case PARSE_FLOW_MAPPING_FIRST_KEY_STATE:
|
||||
return "PARSE_FLOW_MAPPING_FIRST_KEY_STATE"
|
||||
case PARSE_FLOW_MAPPING_KEY_STATE:
|
||||
return "PARSE_FLOW_MAPPING_KEY_STATE"
|
||||
case PARSE_FLOW_MAPPING_VALUE_STATE:
|
||||
return "PARSE_FLOW_MAPPING_VALUE_STATE"
|
||||
case PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE:
|
||||
return "PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE"
|
||||
case PARSE_END_STATE:
|
||||
return "PARSE_END_STATE"
|
||||
}
|
||||
return "<unknown parser state>"
|
||||
}
|
||||
|
||||
// Remove the next token from the queue (must be called after peek_token).
|
||||
func (parser *Parser) skipToken() {
|
||||
parser.token_available = false
|
||||
parser.tokens_parsed++
|
||||
parser.stream_end_produced = parser.tokens[parser.tokens_head].Type == STREAM_END_TOKEN
|
||||
parser.tokens_head++
|
||||
// AliasData holds information about aliases.
|
||||
type AliasData struct {
|
||||
anchor []byte // The anchor.
|
||||
index int // The node id.
|
||||
mark Mark // The anchor mark.
|
||||
}
|
||||
|
||||
// Comment holds information about a comment in the YAML stream.
|
||||
type Comment struct {
|
||||
ScanMark Mark // Position where scanning for comments started
|
||||
TokenMark Mark // Position after which tokens will be associated with this comment
|
||||
StartMark Mark // Position of '#' comment mark
|
||||
EndMark Mark // Position where comment terminated
|
||||
|
||||
Head []byte
|
||||
Line []byte
|
||||
Foot []byte
|
||||
}
|
||||
|
||||
// Parser structure holds all information about the current
|
||||
// state of the parser.
|
||||
type Parser struct {
|
||||
lastError error
|
||||
|
||||
// Reader stuff
|
||||
read_handler ReadHandler // Read handler.
|
||||
|
||||
input_reader io.Reader // File input data.
|
||||
input []byte // String input data.
|
||||
input_pos int
|
||||
|
||||
eof bool // EOF flag
|
||||
|
||||
buffer []byte // The working buffer.
|
||||
buffer_pos int // The current position of the buffer.
|
||||
|
||||
unread int // The number of unread characters in the buffer.
|
||||
|
||||
newlines int // The number of line breaks since last non-break/non-blank character
|
||||
|
||||
raw_buffer []byte // The raw buffer.
|
||||
raw_buffer_pos int // The current position of the buffer.
|
||||
|
||||
encoding Encoding // The input encoding.
|
||||
|
||||
offset int // The offset of the current position (in bytes).
|
||||
mark Mark // The mark of the current position.
|
||||
|
||||
// Comments
|
||||
|
||||
HeadComment []byte // The current head comments
|
||||
LineComment []byte // The current line comments
|
||||
FootComment []byte // The current foot comments
|
||||
tail_comment []byte // Foot comment that happens at the end of a block.
|
||||
stem_comment []byte // Comment in item preceding a nested structure (list inside list item, etc)
|
||||
|
||||
comments []Comment // The folded comments for all parsed tokens
|
||||
comments_head int
|
||||
|
||||
skip_comments bool // Skip comment scanning for performance
|
||||
|
||||
// Scanner stuff
|
||||
|
||||
stream_start_produced bool // Have we started to scan the input stream?
|
||||
stream_end_produced bool // Have we reached the end of the input stream?
|
||||
|
||||
flow_level int // The number of unclosed '[' and '{' indicators.
|
||||
|
||||
tokens []Token // The tokens queue.
|
||||
tokens_head int // The head of the tokens queue.
|
||||
tokens_parsed int // The number of tokens fetched from the queue.
|
||||
token_available bool // Does the tokens queue contain a token ready for dequeueing.
|
||||
|
||||
indent int // The current indentation level.
|
||||
indents []int // The indentation levels stack.
|
||||
|
||||
simple_key_allowed bool // May a simple key occur at the current position?
|
||||
simple_key_possible bool // Is the current simple key possible?
|
||||
simple_key SimpleKey // The current simple key.
|
||||
simple_key_stack []SimpleKey // The stack of simple keys.
|
||||
|
||||
depthCheck func(int, *DepthContext) error // Depth limit check function
|
||||
|
||||
// Parser stuff
|
||||
|
||||
state ParserState // The current parser state.
|
||||
states []ParserState // The parser states stack.
|
||||
marks []Mark // The stack of marks.
|
||||
tag_directives []TagDirective // The list of TAG directives.
|
||||
|
||||
// Representer stuff
|
||||
|
||||
aliases []AliasData // The alias data.
|
||||
}
|
||||
|
||||
// NewParser creates a new parser object.
|
||||
func NewParser() Parser {
|
||||
return Parser{
|
||||
raw_buffer: make([]byte, 0, input_raw_buffer_size),
|
||||
buffer: make([]byte, 0, input_buffer_size),
|
||||
mark: Mark{Line: 1, Column: 1},
|
||||
depthCheck: DefaultDepthCheck,
|
||||
}
|
||||
}
|
||||
|
||||
// Parse gets the next event.
|
||||
@@ -130,21 +285,74 @@ func (parser *Parser) Parse(event *Event) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func formatParserError(problem string, problem_mark Mark) error {
|
||||
return ParserError{
|
||||
Mark: problem_mark,
|
||||
Message: problem,
|
||||
}
|
||||
// Delete a parser object.
|
||||
func (parser *Parser) Delete() {
|
||||
*parser = Parser{}
|
||||
}
|
||||
|
||||
func formatParserErrorContext(context string, context_mark Mark, problem string, problem_mark Mark) error {
|
||||
return ParserError{
|
||||
ContextMark: context_mark,
|
||||
ContextMessage: context,
|
||||
|
||||
Mark: problem_mark,
|
||||
Message: problem,
|
||||
// String read handler.
|
||||
func yamlStringReadHandler(parser *Parser, buffer []byte) (n int, err error) {
|
||||
if parser.input_pos == len(parser.input) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
n = copy(buffer, parser.input[parser.input_pos:])
|
||||
parser.input_pos += n
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// Reader read handler.
|
||||
func yamlReaderReadHandler(parser *Parser, buffer []byte) (n int, err error) {
|
||||
return parser.input_reader.Read(buffer)
|
||||
}
|
||||
|
||||
// SetInputString sets a string input.
|
||||
func (parser *Parser) SetInputString(input []byte) {
|
||||
if parser.read_handler != nil {
|
||||
panic("must set the input source only once")
|
||||
}
|
||||
parser.read_handler = yamlStringReadHandler
|
||||
parser.input = input
|
||||
parser.input_pos = 0
|
||||
}
|
||||
|
||||
// SetInputReader sets a file input.
|
||||
func (parser *Parser) SetInputReader(r io.Reader) {
|
||||
if parser.read_handler != nil {
|
||||
panic("must set the input source only once")
|
||||
}
|
||||
parser.read_handler = yamlReaderReadHandler
|
||||
parser.input_reader = r
|
||||
}
|
||||
|
||||
// SetEncoding sets the source encoding.
|
||||
func (parser *Parser) SetEncoding(encoding Encoding) {
|
||||
if parser.encoding != ANY_ENCODING {
|
||||
panic("must set the encoding only once")
|
||||
}
|
||||
parser.encoding = encoding
|
||||
}
|
||||
|
||||
// GetPendingComments returns the parser's comment queue for CLI access.
|
||||
func (parser *Parser) GetPendingComments() []Comment {
|
||||
return parser.comments
|
||||
}
|
||||
|
||||
// GetCommentsHead returns the current position in the comment queue.
|
||||
func (parser *Parser) GetCommentsHead() int {
|
||||
return parser.comments_head
|
||||
}
|
||||
|
||||
// SetSkipComments enables or disables comment scanning.
|
||||
// When enabled, the scanner skips comment tokens for better performance.
|
||||
func (parser *Parser) SetSkipComments(skip bool) {
|
||||
parser.skip_comments = skip
|
||||
}
|
||||
|
||||
// default_tag_directives defines the standard tag directives (! and !!)
|
||||
// that are implicitly available in all YAML documents.
|
||||
var default_tag_directives = []TagDirective{
|
||||
{[]byte("!"), []byte("!")},
|
||||
{[]byte("!!"), []byte("tag:yaml.org,2002:")},
|
||||
}
|
||||
|
||||
// State dispatcher.
|
||||
@@ -221,9 +429,9 @@ func (parser *Parser) stateMachine(event *Event) error {
|
||||
}
|
||||
|
||||
// Parse the production:
|
||||
// stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
|
||||
//
|
||||
// ************
|
||||
// stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
|
||||
// ************
|
||||
func (parser *Parser) parseStreamStart(event *Event) error {
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
@@ -244,13 +452,11 @@ func (parser *Parser) parseStreamStart(event *Event) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// implicit_document ::= block_node DOCUMENT-END*
|
||||
//
|
||||
// *
|
||||
//
|
||||
// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
|
||||
//
|
||||
// *************************
|
||||
// implicit_document ::= block_node DOCUMENT-END*
|
||||
// *
|
||||
// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
|
||||
// *************************
|
||||
func (parser *Parser) parseDocumentStart(event *Event, implicit bool) error {
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
@@ -258,12 +464,10 @@ func (parser *Parser) parseDocumentStart(event *Event, implicit bool) error {
|
||||
}
|
||||
|
||||
// Parse extra document end indicators.
|
||||
if !implicit {
|
||||
for token.Type == DOCUMENT_END_TOKEN {
|
||||
parser.skipToken()
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
return err
|
||||
}
|
||||
for token.Type == DOCUMENT_END_TOKEN {
|
||||
parser.skipToken()
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,9 +484,11 @@ func (parser *Parser) parseDocumentStart(event *Event, implicit bool) error {
|
||||
|
||||
var head_comment []byte
|
||||
if len(parser.HeadComment) > 0 {
|
||||
// [Go] Scan the header comment backwards, and if an empty line is found, break
|
||||
// the header so the part before the last empty line goes into the
|
||||
// document header, while the bottom of it goes into a follow up event.
|
||||
// [Go] Scan the header comment backwards, and if an
|
||||
// empty line is found, break the header so the part
|
||||
// before the last empty line goes into the document
|
||||
// header, while the bottom of it goes into a follow up
|
||||
// event.
|
||||
for i := len(parser.HeadComment) - 1; i > 0; i-- {
|
||||
if parser.HeadComment[i] == '\n' {
|
||||
if i == len(parser.HeadComment)-1 {
|
||||
@@ -351,9 +557,9 @@ func (parser *Parser) parseDocumentStart(event *Event, implicit bool) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
|
||||
//
|
||||
// ***********
|
||||
// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
|
||||
// ***********
|
||||
func (parser *Parser) parseDocumentContent(event *Event) error {
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
@@ -374,11 +580,10 @@ func (parser *Parser) parseDocumentContent(event *Event) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// implicit_document ::= block_node DOCUMENT-END*
|
||||
//
|
||||
// *************
|
||||
//
|
||||
// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
|
||||
// implicit_document ::= block_node DOCUMENT-END*
|
||||
// *************
|
||||
// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
|
||||
func (parser *Parser) parseDocumentEnd(event *Event) error {
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
@@ -412,54 +617,113 @@ func (parser *Parser) parseDocumentEnd(event *Event) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (parser *Parser) setEventComments(event *Event) {
|
||||
event.HeadComment = parser.HeadComment
|
||||
event.LineComment = parser.LineComment
|
||||
event.FootComment = parser.FootComment
|
||||
parser.HeadComment = nil
|
||||
parser.LineComment = nil
|
||||
parser.FootComment = nil
|
||||
parser.tail_comment = nil
|
||||
parser.stem_comment = nil
|
||||
// Parse directives.
|
||||
func (parser *Parser) processDirectives(version_directive_ref **VersionDirective, tag_directives_ref *[]TagDirective) error {
|
||||
var version_directive *VersionDirective
|
||||
var tag_directives []TagDirective
|
||||
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for token.Type == VERSION_DIRECTIVE_TOKEN || token.Type == TAG_DIRECTIVE_TOKEN {
|
||||
switch token.Type {
|
||||
case VERSION_DIRECTIVE_TOKEN:
|
||||
if version_directive != nil {
|
||||
return formatParserError(
|
||||
"found duplicate %YAML directive", token.StartMark)
|
||||
}
|
||||
if token.major != 1 || token.minor != 1 {
|
||||
return formatParserError(
|
||||
"found incompatible YAML document", token.StartMark)
|
||||
}
|
||||
version_directive = &VersionDirective{
|
||||
major: token.major,
|
||||
minor: token.minor,
|
||||
}
|
||||
case TAG_DIRECTIVE_TOKEN:
|
||||
value := TagDirective{
|
||||
handle: token.Value,
|
||||
prefix: token.prefix,
|
||||
}
|
||||
if err := parser.appendTagDirective(value, false, token.StartMark); err != nil {
|
||||
return err
|
||||
}
|
||||
tag_directives = append(tag_directives, value)
|
||||
}
|
||||
|
||||
parser.skipToken()
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for i := range default_tag_directives {
|
||||
if err := parser.appendTagDirective(default_tag_directives[i], true, token.StartMark); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if version_directive_ref != nil {
|
||||
*version_directive_ref = version_directive
|
||||
}
|
||||
if tag_directives_ref != nil {
|
||||
*tag_directives_ref = tag_directives
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Append a tag directive to the directives stack.
|
||||
func (parser *Parser) appendTagDirective(value TagDirective, allow_duplicates bool, mark Mark) error {
|
||||
for i := range parser.tag_directives {
|
||||
if bytes.Equal(value.handle, parser.tag_directives[i].handle) {
|
||||
if allow_duplicates {
|
||||
return nil
|
||||
}
|
||||
return formatParserError("found duplicate %TAG directive", mark)
|
||||
}
|
||||
}
|
||||
|
||||
// [Go] I suspect the copy is unnecessary. This was likely done
|
||||
// because there was no way to track ownership of the data.
|
||||
value_copy := TagDirective{
|
||||
handle: make([]byte, len(value.handle)),
|
||||
prefix: make([]byte, len(value.prefix)),
|
||||
}
|
||||
copy(value_copy.handle, value.handle)
|
||||
copy(value_copy.prefix, value.prefix)
|
||||
parser.tag_directives = append(parser.tag_directives, value_copy)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// block_node_or_indentless_sequence ::=
|
||||
//
|
||||
// ALIAS
|
||||
// *****
|
||||
// | properties (block_content | indentless_block_sequence)?
|
||||
// ********** *
|
||||
// | block_content | indentless_block_sequence
|
||||
// *
|
||||
//
|
||||
// block_node ::= ALIAS
|
||||
//
|
||||
// *****
|
||||
// | properties block_content?
|
||||
// ********** *
|
||||
// | block_content
|
||||
// *
|
||||
//
|
||||
// flow_node ::= ALIAS
|
||||
//
|
||||
// *****
|
||||
// | properties flow_content?
|
||||
// ********** *
|
||||
// | flow_content
|
||||
// *
|
||||
//
|
||||
// properties ::= TAG ANCHOR? | ANCHOR TAG?
|
||||
//
|
||||
// *************************
|
||||
//
|
||||
// block_content ::= block_collection | flow_collection | SCALAR
|
||||
//
|
||||
// ******
|
||||
//
|
||||
// flow_content ::= flow_collection | SCALAR
|
||||
//
|
||||
// ******
|
||||
// block_node_or_indentless_sequence ::=
|
||||
// ALIAS
|
||||
// *****
|
||||
// | properties (block_content | indentless_block_sequence)?
|
||||
// ********** *
|
||||
// | block_content | indentless_block_sequence
|
||||
// *
|
||||
// block_node ::= ALIAS
|
||||
// *****
|
||||
// | properties block_content?
|
||||
// ********** *
|
||||
// | block_content
|
||||
// *
|
||||
// flow_node ::= ALIAS
|
||||
// *****
|
||||
// | properties flow_content?
|
||||
// ********** *
|
||||
// | flow_content
|
||||
// *
|
||||
// properties ::= TAG ANCHOR? | ANCHOR TAG?
|
||||
// *************************
|
||||
// block_content ::= block_collection | flow_collection | SCALAR
|
||||
// ******
|
||||
// flow_content ::= flow_collection | SCALAR
|
||||
// ******
|
||||
func (parser *Parser) parseNode(event *Event, block, indentless_sequence bool) error {
|
||||
// defer trace("yaml_parser_parse_node", "block:", block, "indentless_sequence:", indentless_sequence)()
|
||||
|
||||
@@ -683,9 +947,9 @@ func (parser *Parser) parseNode(event *Event, block, indentless_sequence bool) e
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
|
||||
//
|
||||
// ******************** *********** * *********
|
||||
// block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
|
||||
// ******************** *********** * *********
|
||||
func (parser *Parser) parseBlockSequenceEntry(event *Event, first bool) error {
|
||||
if first {
|
||||
var token *Token
|
||||
@@ -742,9 +1006,9 @@ func (parser *Parser) parseBlockSequenceEntry(event *Event, first bool) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// indentless_sequence ::= (BLOCK-ENTRY block_node?)+
|
||||
//
|
||||
// *********** *
|
||||
// indentless_sequence ::= (BLOCK-ENTRY block_node?)+
|
||||
// *********** *
|
||||
func (parser *Parser) parseIndentlessSequenceEntry(event *Event) error {
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
@@ -784,10 +1048,11 @@ func (parser *Parser) parseIndentlessSequenceEntry(event *Event) error {
|
||||
|
||||
// Split stem comment from head comment.
|
||||
//
|
||||
// When a sequence or map is found under a sequence entry, the former head comment
|
||||
// is assigned to the underlying sequence or map as a whole, not the individual
|
||||
// sequence or map entry as would be expected otherwise. To handle this case the
|
||||
// previous head comment is moved aside as the stem comment.
|
||||
// When a sequence or map is found under a sequence entry, the former head
|
||||
// comment is assigned to the underlying sequence or map as a whole, not the
|
||||
// individual sequence or map entry as would be expected otherwise.
|
||||
// To handle this case the previous head comment is moved aside as the stem
|
||||
// comment.
|
||||
func (parser *Parser) splitStemComment(stem_len int) error {
|
||||
if stem_len == 0 {
|
||||
return nil
|
||||
@@ -813,15 +1078,15 @@ func (parser *Parser) splitStemComment(stem_len int) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// block_mapping ::= BLOCK-MAPPING_START
|
||||
//
|
||||
// *******************
|
||||
// ((KEY block_node_or_indentless_sequence?)?
|
||||
// *** *
|
||||
// (VALUE block_node_or_indentless_sequence?)?)*
|
||||
// block_mapping ::= BLOCK-MAPPING_START
|
||||
// *******************
|
||||
// ((KEY block_node_or_indentless_sequence?)?
|
||||
// *** *
|
||||
// (VALUE block_node_or_indentless_sequence?)?)*
|
||||
//
|
||||
// BLOCK-END
|
||||
// *********
|
||||
// BLOCK-END
|
||||
// *********
|
||||
func (parser *Parser) parseBlockMappingKey(event *Event, first bool) error {
|
||||
if first {
|
||||
var token *Token
|
||||
@@ -837,8 +1102,9 @@ func (parser *Parser) parseBlockMappingKey(event *Event, first bool) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// [Go] A tail comment was left from the prior mapping value processed. Emit an event
|
||||
// as it needs to be processed with that value and not the following key.
|
||||
// [Go] A tail comment was left from the prior mapping value processed.
|
||||
// Emit an event as it needs to be processed with that value and not
|
||||
// the following key.
|
||||
if len(parser.tail_comment) > 0 {
|
||||
*event = Event{
|
||||
Type: TAIL_COMMENT_EVENT,
|
||||
@@ -888,13 +1154,14 @@ func (parser *Parser) parseBlockMappingKey(event *Event, first bool) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// block_mapping ::= BLOCK-MAPPING_START
|
||||
//
|
||||
// ((KEY block_node_or_indentless_sequence?)?
|
||||
// block_mapping ::= BLOCK-MAPPING_START
|
||||
//
|
||||
// (VALUE block_node_or_indentless_sequence?)?)*
|
||||
// ***** *
|
||||
// BLOCK-END
|
||||
// ((KEY block_node_or_indentless_sequence?)?
|
||||
//
|
||||
// (VALUE block_node_or_indentless_sequence?)?)*
|
||||
// ***** *
|
||||
// BLOCK-END
|
||||
func (parser *Parser) parseBlockMappingValue(event *Event) error {
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
@@ -920,19 +1187,17 @@ func (parser *Parser) parseBlockMappingValue(event *Event) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// flow_sequence ::= FLOW-SEQUENCE-START
|
||||
//
|
||||
// *******************
|
||||
// (flow_sequence_entry FLOW-ENTRY)*
|
||||
// * **********
|
||||
// flow_sequence_entry?
|
||||
// *
|
||||
// FLOW-SEQUENCE-END
|
||||
// *****************
|
||||
//
|
||||
// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
//
|
||||
// *
|
||||
// flow_sequence ::= FLOW-SEQUENCE-START
|
||||
// *******************
|
||||
// (flow_sequence_entry FLOW-ENTRY)*
|
||||
// * **********
|
||||
// flow_sequence_entry?
|
||||
// *
|
||||
// FLOW-SEQUENCE-END
|
||||
// *****************
|
||||
// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
// *
|
||||
func (parser *Parser) parseFlowSequenceEntry(event *Event, first bool) error {
|
||||
if first {
|
||||
var token *Token
|
||||
@@ -995,9 +1260,9 @@ func (parser *Parser) parseFlowSequenceEntry(event *Event, first bool) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
//
|
||||
// *** *
|
||||
// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
// *** *
|
||||
func (parser *Parser) parseFlowSequenceEntryMappingKey(event *Event) error {
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
@@ -1016,9 +1281,9 @@ func (parser *Parser) parseFlowSequenceEntryMappingKey(event *Event) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
//
|
||||
// ***** *
|
||||
// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
// ***** *
|
||||
func (parser *Parser) parseFlowSequenceEntryMappingValue(event *Event) error {
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
@@ -1040,9 +1305,9 @@ func (parser *Parser) parseFlowSequenceEntryMappingValue(event *Event) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
//
|
||||
// *
|
||||
// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
// *
|
||||
func (parser *Parser) parseFlowSequenceEntryMappingEnd(event *Event) error {
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
@@ -1058,18 +1323,17 @@ func (parser *Parser) parseFlowSequenceEntryMappingEnd(event *Event) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// flow_mapping ::= FLOW-MAPPING-START
|
||||
//
|
||||
// ******************
|
||||
// (flow_mapping_entry FLOW-ENTRY)*
|
||||
// * **********
|
||||
// flow_mapping_entry?
|
||||
// ******************
|
||||
// FLOW-MAPPING-END
|
||||
// ****************
|
||||
//
|
||||
// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
// - *** *
|
||||
// flow_mapping ::= FLOW-MAPPING-START
|
||||
// ******************
|
||||
// (flow_mapping_entry FLOW-ENTRY)*
|
||||
// * **********
|
||||
// flow_mapping_entry?
|
||||
// ******************
|
||||
// FLOW-MAPPING-END
|
||||
// ****************
|
||||
// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
// * *** *
|
||||
func (parser *Parser) parseFlowMappingKey(event *Event, first bool) error {
|
||||
if first {
|
||||
var token *Token
|
||||
@@ -1135,8 +1399,9 @@ func (parser *Parser) parseFlowMappingKey(event *Event, first bool) error {
|
||||
}
|
||||
|
||||
// Parse the productions:
|
||||
// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
// - ***** *
|
||||
//
|
||||
// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
|
||||
// * ***** *
|
||||
func (parser *Parser) parseFlowMappingValue(event *Event, empty bool) error {
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
@@ -1160,6 +1425,96 @@ func (parser *Parser) parseFlowMappingValue(event *Event, empty bool) error {
|
||||
return parser.processEmptyScalar(event, token.StartMark)
|
||||
}
|
||||
|
||||
// Peek the next token in the token queue.
|
||||
func (parser *Parser) peekToken(out **Token) error {
|
||||
if !parser.token_available {
|
||||
if err := parser.fetchMoreTokens(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
token := &parser.tokens[parser.tokens_head]
|
||||
parser.UnfoldComments(token)
|
||||
*out = token
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnfoldComments walks through the comments queue and joins all
|
||||
// comments behind the position of the provided token into the respective
|
||||
// top-level comment slices in the parser.
|
||||
func (parser *Parser) UnfoldComments(token *Token) {
|
||||
for parser.comments_head < len(parser.comments) && token.StartMark.Index >= parser.comments[parser.comments_head].TokenMark.Index {
|
||||
comment := &parser.comments[parser.comments_head]
|
||||
if len(comment.Head) > 0 {
|
||||
if token.Type == BLOCK_END_TOKEN {
|
||||
// No heads on ends, so keep comment.Head for a follow up token.
|
||||
break
|
||||
}
|
||||
if len(parser.HeadComment) > 0 {
|
||||
parser.HeadComment = append(parser.HeadComment, '\n')
|
||||
}
|
||||
parser.HeadComment = append(parser.HeadComment, comment.Head...)
|
||||
}
|
||||
if len(comment.Foot) > 0 {
|
||||
if len(parser.FootComment) > 0 {
|
||||
parser.FootComment = append(parser.FootComment, '\n')
|
||||
}
|
||||
parser.FootComment = append(parser.FootComment, comment.Foot...)
|
||||
}
|
||||
if len(comment.Line) > 0 {
|
||||
if len(parser.LineComment) > 0 {
|
||||
parser.LineComment = append(parser.LineComment, '\n')
|
||||
}
|
||||
parser.LineComment = append(parser.LineComment, comment.Line...)
|
||||
}
|
||||
*comment = Comment{}
|
||||
parser.comments_head++
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the next token from the queue (must be called after peek_token).
|
||||
func (parser *Parser) skipToken() {
|
||||
parser.token_available = false
|
||||
parser.tokens_parsed++
|
||||
parser.stream_end_produced = parser.tokens[parser.tokens_head].Type == STREAM_END_TOKEN
|
||||
parser.tokens_head++
|
||||
}
|
||||
|
||||
// formatParserError creates a LoadError with the given problem message
|
||||
// and mark position.
|
||||
func formatParserError(problem string, problemMark Mark) *LoadError {
|
||||
return &LoadError{
|
||||
Stage: ParserStage,
|
||||
Mark: problemMark,
|
||||
Message: problem,
|
||||
}
|
||||
}
|
||||
|
||||
// formatParserErrorContext creates a LoadError with both context and
|
||||
// problem information, each with their own mark positions.
|
||||
func formatParserErrorContext(context string, contextMark Mark, problem string, problemMark Mark) *LoadError {
|
||||
return &LoadError{
|
||||
Stage: ParserStage,
|
||||
ContextMark: contextMark,
|
||||
ContextMsg: context,
|
||||
Mark: problemMark,
|
||||
Message: problem,
|
||||
}
|
||||
}
|
||||
|
||||
// setEventComments transfers accumulated comments from the parser to the
|
||||
// event and clears the parser's comment state.
|
||||
func (parser *Parser) setEventComments(event *Event) {
|
||||
event.HeadComment = parser.HeadComment
|
||||
event.LineComment = parser.LineComment
|
||||
event.FootComment = parser.FootComment
|
||||
parser.HeadComment = nil
|
||||
parser.LineComment = nil
|
||||
parser.FootComment = nil
|
||||
parser.tail_comment = nil
|
||||
parser.stem_comment = nil
|
||||
}
|
||||
|
||||
// Generate an empty scalar event.
|
||||
func (parser *Parser) processEmptyScalar(event *Event, mark Mark) error {
|
||||
*event = Event{
|
||||
@@ -1173,94 +1528,9 @@ func (parser *Parser) processEmptyScalar(event *Event, mark Mark) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
var default_tag_directives = []TagDirective{
|
||||
{[]byte("!"), []byte("!")},
|
||||
{[]byte("!!"), []byte("tag:yaml.org,2002:")},
|
||||
}
|
||||
|
||||
// Parse directives.
|
||||
func (parser *Parser) processDirectives(version_directive_ref **VersionDirective, tag_directives_ref *[]TagDirective) error {
|
||||
var version_directive *VersionDirective
|
||||
var tag_directives []TagDirective
|
||||
|
||||
var token *Token
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for token.Type == VERSION_DIRECTIVE_TOKEN || token.Type == TAG_DIRECTIVE_TOKEN {
|
||||
switch token.Type {
|
||||
case VERSION_DIRECTIVE_TOKEN:
|
||||
if version_directive != nil {
|
||||
return formatParserError(
|
||||
"found duplicate %YAML directive", token.StartMark)
|
||||
}
|
||||
if token.major != 1 || token.minor != 1 {
|
||||
return formatParserError(
|
||||
"found incompatible YAML document", token.StartMark)
|
||||
}
|
||||
version_directive = &VersionDirective{
|
||||
major: token.major,
|
||||
minor: token.minor,
|
||||
}
|
||||
case TAG_DIRECTIVE_TOKEN:
|
||||
value := TagDirective{
|
||||
handle: token.Value,
|
||||
prefix: token.prefix,
|
||||
}
|
||||
if err := parser.appendTagDirective(value, false, token.StartMark); err != nil {
|
||||
return err
|
||||
}
|
||||
tag_directives = append(tag_directives, value)
|
||||
}
|
||||
|
||||
parser.skipToken()
|
||||
if err := parser.peekToken(&token); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for i := range default_tag_directives {
|
||||
if err := parser.appendTagDirective(default_tag_directives[i], true, token.StartMark); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if version_directive_ref != nil {
|
||||
*version_directive_ref = version_directive
|
||||
}
|
||||
if tag_directives_ref != nil {
|
||||
*tag_directives_ref = tag_directives
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Append a tag directive to the directives stack.
|
||||
func (parser *Parser) appendTagDirective(value TagDirective, allow_duplicates bool, mark Mark) error {
|
||||
for i := range parser.tag_directives {
|
||||
if bytes.Equal(value.handle, parser.tag_directives[i].handle) {
|
||||
if allow_duplicates {
|
||||
return nil
|
||||
}
|
||||
return formatParserError("found duplicate %TAG directive", mark)
|
||||
}
|
||||
}
|
||||
|
||||
// [Go] I suspect the copy is unnecessary. This was likely done
|
||||
// because there was no way to track ownership of the data.
|
||||
value_copy := TagDirective{
|
||||
handle: make([]byte, len(value.handle)),
|
||||
prefix: make([]byte, len(value.prefix)),
|
||||
}
|
||||
copy(value_copy.handle, value.handle)
|
||||
copy(value_copy.prefix, value.prefix)
|
||||
parser.tag_directives = append(parser.tag_directives, value_copy)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ParserGetEvents parses the YAML input and returns the generated event stream.
|
||||
func ParserGetEvents(in []byte) (string, error) {
|
||||
p := NewComposer(in)
|
||||
p := NewComposer(in, nil)
|
||||
defer p.Destroy()
|
||||
var events strings.Builder
|
||||
var event Event
|
||||
@@ -1280,6 +1550,8 @@ func ParserGetEvents(in []byte) (string, error) {
|
||||
return events.String(), nil
|
||||
}
|
||||
|
||||
// formatEvent formats an event as a human-readable string for debugging
|
||||
// and testing purposes.
|
||||
func formatEvent(e *Event) string {
|
||||
var b strings.Builder
|
||||
switch e.Type {
|
||||
|
||||
Reference in New Issue
Block a user