updated vendor

2026-06-16 08:02:19 +02:00
parent 2f7f99d3f0
commit 77299d0c64
1283 changed files with 67302 additions and 208958 deletions
@@ -4,7 +4,8 @@
 // SPDX-License-Identifier: Apache-2.0 AND MIT

 // Parser stage: Transforms token stream into event stream.
-// Implements a recursive-descent parser (LL(1)) following the YAML grammar specification.
+// Implements a recursive-descent parser (LL(1)) following the YAML grammar
+// specification.
 //
 // The parser implements the following grammar:
 //
@@ -52,59 +53,213 @@ import (
 	"strings"
 )

-// Peek the next token in the token queue.
-func (parser *Parser) peekToken(out **Token) error {
-	if !parser.token_available {
-		if err := parser.fetchMoreTokens(); err != nil {
-			return err
-		}
-	}
+// ReadHandler is called by the [Parser] when it needs to read more bytes
+// from the input source.  The handler should fill the provided buffer with
+// up to len(buffer) bytes from the input source.
+//
+// The arguments are as follows:
+//
+// [in]       parser      The parser object.
+// [out]      buffer      The buffer for reading.
+// [out]      size_read   The actual number of bytes read from the source.
+//
+// On success, the handler should return 1.  If the handler failed,
+// the returned value should be 0. On EOF, the handler should set the
+// size_read to 0 and return 1.
+type ReadHandler func(parser *Parser, buffer []byte) (n int, err error)

-	token := &parser.tokens[parser.tokens_head]
-	parser.UnfoldComments(token)
-	*out = token
-	return nil
+// SimpleKey holds information about a potential simple key.
+type SimpleKey struct {
+	flow_level   int  // What flow level is the key at?
+	required     bool // Is a simple key required?
+	token_number int  // The number of the token.
+	mark         Mark // The position mark.
 }

-// UnfoldComments walks through the comments queue and joins all
-// comments behind the position of the provided token into the respective
-// top-level comment slices in the parser.
-func (parser *Parser) UnfoldComments(token *Token) {
-	for parser.comments_head < len(parser.comments) && token.StartMark.Index >= parser.comments[parser.comments_head].TokenMark.Index {
-		comment := &parser.comments[parser.comments_head]
-		if len(comment.Head) > 0 {
-			if token.Type == BLOCK_END_TOKEN {
-				// No heads on ends, so keep comment.Head for a follow up token.
-				break
-			}
-			if len(parser.HeadComment) > 0 {
-				parser.HeadComment = append(parser.HeadComment, '\n')
-			}
-			parser.HeadComment = append(parser.HeadComment, comment.Head...)
-		}
-		if len(comment.Foot) > 0 {
-			if len(parser.FootComment) > 0 {
-				parser.FootComment = append(parser.FootComment, '\n')
-			}
-			parser.FootComment = append(parser.FootComment, comment.Foot...)
-		}
-		if len(comment.Line) > 0 {
-			if len(parser.LineComment) > 0 {
-				parser.LineComment = append(parser.LineComment, '\n')
-			}
-			parser.LineComment = append(parser.LineComment, comment.Line...)
-		}
-		*comment = Comment{}
-		parser.comments_head++
+// ParserState represents the state of the parser.
+type ParserState int
+
+// Parser state constants define the different states the parser can be in.
+const (
+	PARSE_STREAM_START_STATE ParserState = iota
+
+	PARSE_IMPLICIT_DOCUMENT_START_STATE           // Expect the beginning of an implicit document.
+	PARSE_DOCUMENT_START_STATE                    // Expect DOCUMENT-START.
+	PARSE_DOCUMENT_CONTENT_STATE                  // Expect the content of a document.
+	PARSE_DOCUMENT_END_STATE                      // Expect DOCUMENT-END.
+	PARSE_BLOCK_NODE_STATE                        // Expect a block node.
+	PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE        // Expect the first entry of a block sequence.
+	PARSE_BLOCK_SEQUENCE_ENTRY_STATE              // Expect an entry of a block sequence.
+	PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE         // Expect an entry of an indentless sequence.
+	PARSE_BLOCK_MAPPING_FIRST_KEY_STATE           // Expect the first key of a block mapping.
+	PARSE_BLOCK_MAPPING_KEY_STATE                 // Expect a block mapping key.
+	PARSE_BLOCK_MAPPING_VALUE_STATE               // Expect a block mapping value.
+	PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE         // Expect the first entry of a flow sequence.
+	PARSE_FLOW_SEQUENCE_ENTRY_STATE               // Expect an entry of a flow sequence.
+	PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE   // Expect a key of an ordered mapping.
+	PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE // Expect a value of an ordered mapping.
+	PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE   // Expect the and of an ordered mapping entry.
+	PARSE_FLOW_MAPPING_FIRST_KEY_STATE            // Expect the first key of a flow mapping.
+	PARSE_FLOW_MAPPING_KEY_STATE                  // Expect a key of a flow mapping.
+	PARSE_FLOW_MAPPING_VALUE_STATE                // Expect a value of a flow mapping.
+	PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE          // Expect an empty value of a flow mapping.
+	PARSE_END_STATE                               // Expect nothing.
+)
+
+// String returns a string representation of the parser state.
+func (ps ParserState) String() string {
+	switch ps {
+	case PARSE_STREAM_START_STATE:
+		return "PARSE_STREAM_START_STATE"
+	case PARSE_IMPLICIT_DOCUMENT_START_STATE:
+		return "PARSE_IMPLICIT_DOCUMENT_START_STATE"
+	case PARSE_DOCUMENT_START_STATE:
+		return "PARSE_DOCUMENT_START_STATE"
+	case PARSE_DOCUMENT_CONTENT_STATE:
+		return "PARSE_DOCUMENT_CONTENT_STATE"
+	case PARSE_DOCUMENT_END_STATE:
+		return "PARSE_DOCUMENT_END_STATE"
+	case PARSE_BLOCK_NODE_STATE:
+		return "PARSE_BLOCK_NODE_STATE"
+	case PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE:
+		return "PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE"
+	case PARSE_BLOCK_SEQUENCE_ENTRY_STATE:
+		return "PARSE_BLOCK_SEQUENCE_ENTRY_STATE"
+	case PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE:
+		return "PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE"
+	case PARSE_BLOCK_MAPPING_FIRST_KEY_STATE:
+		return "PARSE_BLOCK_MAPPING_FIRST_KEY_STATE"
+	case PARSE_BLOCK_MAPPING_KEY_STATE:
+		return "PARSE_BLOCK_MAPPING_KEY_STATE"
+	case PARSE_BLOCK_MAPPING_VALUE_STATE:
+		return "PARSE_BLOCK_MAPPING_VALUE_STATE"
+	case PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE:
+		return "PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE"
+	case PARSE_FLOW_SEQUENCE_ENTRY_STATE:
+		return "PARSE_FLOW_SEQUENCE_ENTRY_STATE"
+	case PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE:
+		return "PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE"
+	case PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE:
+		return "PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE"
+	case PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE:
+		return "PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE"
+	case PARSE_FLOW_MAPPING_FIRST_KEY_STATE:
+		return "PARSE_FLOW_MAPPING_FIRST_KEY_STATE"
+	case PARSE_FLOW_MAPPING_KEY_STATE:
+		return "PARSE_FLOW_MAPPING_KEY_STATE"
+	case PARSE_FLOW_MAPPING_VALUE_STATE:
+		return "PARSE_FLOW_MAPPING_VALUE_STATE"
+	case PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE:
+		return "PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE"
+	case PARSE_END_STATE:
+		return "PARSE_END_STATE"
 	}
+	return "<unknown parser state>"
 }

-// Remove the next token from the queue (must be called after peek_token).
-func (parser *Parser) skipToken() {
-	parser.token_available = false
-	parser.tokens_parsed++
-	parser.stream_end_produced = parser.tokens[parser.tokens_head].Type == STREAM_END_TOKEN
-	parser.tokens_head++
+// AliasData holds information about aliases.
+type AliasData struct {
+	anchor []byte // The anchor.
+	index  int    // The node id.
+	mark   Mark   // The anchor mark.
+}
+
+// Comment holds information about a comment in the YAML stream.
+type Comment struct {
+	ScanMark  Mark // Position where scanning for comments started
+	TokenMark Mark // Position after which tokens will be associated with this comment
+	StartMark Mark // Position of '#' comment mark
+	EndMark   Mark // Position where comment terminated
+
+	Head []byte
+	Line []byte
+	Foot []byte
+}
+
+// Parser structure holds all information about the current
+// state of the parser.
+type Parser struct {
+	lastError error
+
+	// Reader stuff
+	read_handler ReadHandler // Read handler.
+
+	input_reader io.Reader // File input data.
+	input        []byte    // String input data.
+	input_pos    int
+
+	eof bool // EOF flag
+
+	buffer     []byte // The working buffer.
+	buffer_pos int    // The current position of the buffer.
+
+	unread int // The number of unread characters in the buffer.
+
+	newlines int // The number of line breaks since last non-break/non-blank character
+
+	raw_buffer     []byte // The raw buffer.
+	raw_buffer_pos int    // The current position of the buffer.
+
+	encoding Encoding // The input encoding.
+
+	offset int  // The offset of the current position (in bytes).
+	mark   Mark // The mark of the current position.
+
+	// Comments
+
+	HeadComment  []byte // The current head comments
+	LineComment  []byte // The current line comments
+	FootComment  []byte // The current foot comments
+	tail_comment []byte // Foot comment that happens at the end of a block.
+	stem_comment []byte // Comment in item preceding a nested structure (list inside list item, etc)
+
+	comments      []Comment // The folded comments for all parsed tokens
+	comments_head int
+
+	skip_comments bool // Skip comment scanning for performance
+
+	// Scanner stuff
+
+	stream_start_produced bool // Have we started to scan the input stream?
+	stream_end_produced   bool // Have we reached the end of the input stream?
+
+	flow_level int // The number of unclosed '[' and '{' indicators.
+
+	tokens          []Token // The tokens queue.
+	tokens_head     int     // The head of the tokens queue.
+	tokens_parsed   int     // The number of tokens fetched from the queue.
+	token_available bool    // Does the tokens queue contain a token ready for dequeueing.
+
+	indent  int   // The current indentation level.
+	indents []int // The indentation levels stack.
+
+	simple_key_allowed  bool        // May a simple key occur at the current position?
+	simple_key_possible bool        // Is the current simple key possible?
+	simple_key          SimpleKey   // The current simple key.
+	simple_key_stack    []SimpleKey // The stack of simple keys.
+
+	depthCheck func(int, *DepthContext) error // Depth limit check function
+
+	// Parser stuff
+
+	state          ParserState    // The current parser state.
+	states         []ParserState  // The parser states stack.
+	marks          []Mark         // The stack of marks.
+	tag_directives []TagDirective // The list of TAG directives.
+
+	// Representer stuff
+
+	aliases []AliasData // The alias data.
+}
+
+// NewParser creates a new parser object.
+func NewParser() Parser {
+	return Parser{
+		raw_buffer: make([]byte, 0, input_raw_buffer_size),
+		buffer:     make([]byte, 0, input_buffer_size),
+		mark:       Mark{Line: 1, Column: 1},
+		depthCheck: DefaultDepthCheck,
+	}
 }

 // Parse gets the next event.
@@ -130,21 +285,74 @@ func (parser *Parser) Parse(event *Event) error {
 	return nil
 }

-func formatParserError(problem string, problem_mark Mark) error {
-	return ParserError{
-		Mark:    problem_mark,
-		Message: problem,
-	}
+// Delete a parser object.
+func (parser *Parser) Delete() {
+	*parser = Parser{}
 }

-func formatParserErrorContext(context string, context_mark Mark, problem string, problem_mark Mark) error {
-	return ParserError{
-		ContextMark:    context_mark,
-		ContextMessage: context,
-
-		Mark:    problem_mark,
-		Message: problem,
+// String read handler.
+func yamlStringReadHandler(parser *Parser, buffer []byte) (n int, err error) {
+	if parser.input_pos == len(parser.input) {
+		return 0, io.EOF
 	}
+	n = copy(buffer, parser.input[parser.input_pos:])
+	parser.input_pos += n
+	return n, nil
+}
+
+// Reader read handler.
+func yamlReaderReadHandler(parser *Parser, buffer []byte) (n int, err error) {
+	return parser.input_reader.Read(buffer)
+}
+
+// SetInputString sets a string input.
+func (parser *Parser) SetInputString(input []byte) {
+	if parser.read_handler != nil {
+		panic("must set the input source only once")
+	}
+	parser.read_handler = yamlStringReadHandler
+	parser.input = input
+	parser.input_pos = 0
+}
+
+// SetInputReader sets a file input.
+func (parser *Parser) SetInputReader(r io.Reader) {
+	if parser.read_handler != nil {
+		panic("must set the input source only once")
+	}
+	parser.read_handler = yamlReaderReadHandler
+	parser.input_reader = r
+}
+
+// SetEncoding sets the source encoding.
+func (parser *Parser) SetEncoding(encoding Encoding) {
+	if parser.encoding != ANY_ENCODING {
+		panic("must set the encoding only once")
+	}
+	parser.encoding = encoding
+}
+
+// GetPendingComments returns the parser's comment queue for CLI access.
+func (parser *Parser) GetPendingComments() []Comment {
+	return parser.comments
+}
+
+// GetCommentsHead returns the current position in the comment queue.
+func (parser *Parser) GetCommentsHead() int {
+	return parser.comments_head
+}
+
+// SetSkipComments enables or disables comment scanning.
+// When enabled, the scanner skips comment tokens for better performance.
+func (parser *Parser) SetSkipComments(skip bool) {
+	parser.skip_comments = skip
+}
+
+// default_tag_directives defines the standard tag directives (! and !!)
+// that are implicitly available in all YAML documents.
+var default_tag_directives = []TagDirective{
+	{[]byte("!"), []byte("!")},
+	{[]byte("!!"), []byte("tag:yaml.org,2002:")},
 }

 // State dispatcher.
@@ -221,9 +429,9 @@ func (parser *Parser) stateMachine(event *Event) error {
 }

 // Parse the production:
-// stream   ::= STREAM-START implicit_document? explicit_document* STREAM-END
 //
-//	************
+//	stream   ::= STREAM-START implicit_document? explicit_document* STREAM-END
+//	             ************
 func (parser *Parser) parseStreamStart(event *Event) error {
 	var token *Token
 	if err := parser.peekToken(&token); err != nil {
@@ -244,13 +452,11 @@ func (parser *Parser) parseStreamStart(event *Event) error {
 }

 // Parse the productions:
-// implicit_document    ::= block_node DOCUMENT-END*
 //
-//	*
-//
-// explicit_document    ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
-//
-//	*************************
+//	implicit_document    ::= block_node DOCUMENT-END*
+//	                         *
+//	explicit_document    ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
+//	                         *************************
 func (parser *Parser) parseDocumentStart(event *Event, implicit bool) error {
 	var token *Token
 	if err := parser.peekToken(&token); err != nil {
@@ -258,12 +464,10 @@ func (parser *Parser) parseDocumentStart(event *Event, implicit bool) error {
 	}

 	// Parse extra document end indicators.
-	if !implicit {
-		for token.Type == DOCUMENT_END_TOKEN {
-			parser.skipToken()
-			if err := parser.peekToken(&token); err != nil {
-				return err
-			}
+	for token.Type == DOCUMENT_END_TOKEN {
+		parser.skipToken()
+		if err := parser.peekToken(&token); err != nil {
+			return err
 		}
 	}

@@ -280,9 +484,11 @@ func (parser *Parser) parseDocumentStart(event *Event, implicit bool) error {

 		var head_comment []byte
 		if len(parser.HeadComment) > 0 {
-			// [Go] Scan the header comment backwards, and if an empty line is found, break
-			//      the header so the part before the last empty line goes into the
-			//      document header, while the bottom of it goes into a follow up event.
+			// [Go] Scan the header comment backwards, and if an
+			// empty line is found, break the header so the part
+			// before the last empty line goes into the document
+			// header, while the bottom of it goes into a follow up
+			// event.
 			for i := len(parser.HeadComment) - 1; i > 0; i-- {
 				if parser.HeadComment[i] == '\n' {
 					if i == len(parser.HeadComment)-1 {
@@ -351,9 +557,9 @@ func (parser *Parser) parseDocumentStart(event *Event, implicit bool) error {
 }

 // Parse the productions:
-// explicit_document    ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
 //
-//	***********
+//	explicit_document    ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
+//	                                                   ***********
 func (parser *Parser) parseDocumentContent(event *Event) error {
 	var token *Token
 	if err := parser.peekToken(&token); err != nil {
@@ -374,11 +580,10 @@ func (parser *Parser) parseDocumentContent(event *Event) error {
 }

 // Parse the productions:
-// implicit_document    ::= block_node DOCUMENT-END*
 //
-//	*************
-//
-// explicit_document    ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
+//	implicit_document    ::= block_node DOCUMENT-END*
+//	                                    *************
+//	explicit_document    ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
 func (parser *Parser) parseDocumentEnd(event *Event) error {
 	var token *Token
 	if err := parser.peekToken(&token); err != nil {
@@ -412,54 +617,113 @@ func (parser *Parser) parseDocumentEnd(event *Event) error {
 	return nil
 }

-func (parser *Parser) setEventComments(event *Event) {
-	event.HeadComment = parser.HeadComment
-	event.LineComment = parser.LineComment
-	event.FootComment = parser.FootComment
-	parser.HeadComment = nil
-	parser.LineComment = nil
-	parser.FootComment = nil
-	parser.tail_comment = nil
-	parser.stem_comment = nil
+// Parse directives.
+func (parser *Parser) processDirectives(version_directive_ref **VersionDirective, tag_directives_ref *[]TagDirective) error {
+	var version_directive *VersionDirective
+	var tag_directives []TagDirective
+
+	var token *Token
+	if err := parser.peekToken(&token); err != nil {
+		return err
+	}
+
+	for token.Type == VERSION_DIRECTIVE_TOKEN || token.Type == TAG_DIRECTIVE_TOKEN {
+		switch token.Type {
+		case VERSION_DIRECTIVE_TOKEN:
+			if version_directive != nil {
+				return formatParserError(
+					"found duplicate %YAML directive", token.StartMark)
+			}
+			if token.major != 1 || token.minor != 1 {
+				return formatParserError(
+					"found incompatible YAML document", token.StartMark)
+			}
+			version_directive = &VersionDirective{
+				major: token.major,
+				minor: token.minor,
+			}
+		case TAG_DIRECTIVE_TOKEN:
+			value := TagDirective{
+				handle: token.Value,
+				prefix: token.prefix,
+			}
+			if err := parser.appendTagDirective(value, false, token.StartMark); err != nil {
+				return err
+			}
+			tag_directives = append(tag_directives, value)
+		}
+
+		parser.skipToken()
+		if err := parser.peekToken(&token); err != nil {
+			return err
+		}
+	}
+
+	for i := range default_tag_directives {
+		if err := parser.appendTagDirective(default_tag_directives[i], true, token.StartMark); err != nil {
+			return err
+		}
+	}
+
+	if version_directive_ref != nil {
+		*version_directive_ref = version_directive
+	}
+	if tag_directives_ref != nil {
+		*tag_directives_ref = tag_directives
+	}
+	return nil
+}
+
+// Append a tag directive to the directives stack.
+func (parser *Parser) appendTagDirective(value TagDirective, allow_duplicates bool, mark Mark) error {
+	for i := range parser.tag_directives {
+		if bytes.Equal(value.handle, parser.tag_directives[i].handle) {
+			if allow_duplicates {
+				return nil
+			}
+			return formatParserError("found duplicate %TAG directive", mark)
+		}
+	}
+
+	// [Go] I suspect the copy is unnecessary. This was likely done
+	// because there was no way to track ownership of the data.
+	value_copy := TagDirective{
+		handle: make([]byte, len(value.handle)),
+		prefix: make([]byte, len(value.prefix)),
+	}
+	copy(value_copy.handle, value.handle)
+	copy(value_copy.prefix, value.prefix)
+	parser.tag_directives = append(parser.tag_directives, value_copy)
+	return nil
 }

 // Parse the productions:
-// block_node_or_indentless_sequence    ::=
 //
-//	ALIAS
-//	*****
-//	| properties (block_content | indentless_block_sequence)?
-//	  **********  *
-//	| block_content | indentless_block_sequence
-//	  *
-//
-// block_node           ::= ALIAS
-//
-//	*****
-//	| properties block_content?
-//	  ********** *
-//	| block_content
-//	  *
-//
-// flow_node            ::= ALIAS
-//
-//	*****
-//	| properties flow_content?
-//	  ********** *
-//	| flow_content
-//	  *
-//
-// properties           ::= TAG ANCHOR? | ANCHOR TAG?
-//
-//	*************************
-//
-// block_content        ::= block_collection | flow_collection | SCALAR
-//
-//	******
-//
-// flow_content         ::= flow_collection | SCALAR
-//
-//	******
+//	block_node_or_indentless_sequence    ::=
+//	                         ALIAS
+//	                         *****
+//	                         | properties (block_content | indentless_block_sequence)?
+//	                           **********  *
+//	                         | block_content | indentless_block_sequence
+//	                           *
+//	block_node           ::= ALIAS
+//	                         *****
+//	                         | properties block_content?
+//	                           ********** *
+//	                         | block_content
+//	                           *
+//	flow_node            ::= ALIAS
+//	                         *****
+//	                         | properties flow_content?
+//	                           ********** *
+//	                         | flow_content
+//	                           *
+//	properties           ::= TAG ANCHOR? | ANCHOR TAG?
+//	                         *************************
+//	block_content        ::= block_collection | flow_collection | SCALAR
+//	                                                              ******
+//	flow_content         ::= flow_collection | SCALAR
+//	                                           ******
 func (parser *Parser) parseNode(event *Event, block, indentless_sequence bool) error {
 	// defer trace("yaml_parser_parse_node", "block:", block, "indentless_sequence:", indentless_sequence)()

@@ -683,9 +947,9 @@ func (parser *Parser) parseNode(event *Event, block, indentless_sequence bool) e
 }

 // Parse the productions:
-// block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
 //
-//	********************  *********** *             *********
+//	block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
+//	                   ********************  *********** *             *********
 func (parser *Parser) parseBlockSequenceEntry(event *Event, first bool) error {
 	if first {
 		var token *Token
@@ -742,9 +1006,9 @@ func (parser *Parser) parseBlockSequenceEntry(event *Event, first bool) error {
 }

 // Parse the productions:
-// indentless_sequence  ::= (BLOCK-ENTRY block_node?)+
 //
-//	*********** *
+//	indentless_sequence  ::= (BLOCK-ENTRY block_node?)+
+//	                          *********** *
 func (parser *Parser) parseIndentlessSequenceEntry(event *Event) error {
 	var token *Token
 	if err := parser.peekToken(&token); err != nil {
@@ -784,10 +1048,11 @@ func (parser *Parser) parseIndentlessSequenceEntry(event *Event) error {

 // Split stem comment from head comment.
 //
-// When a sequence or map is found under a sequence entry, the former head comment
-// is assigned to the underlying sequence or map as a whole, not the individual
-// sequence or map entry as would be expected otherwise. To handle this case the
-// previous head comment is moved aside as the stem comment.
+// When a sequence or map is found under a sequence entry, the former head
+// comment is assigned to the underlying sequence or map as a whole, not the
+// individual sequence or map entry as would be expected otherwise.
+// To handle this case the previous head comment is moved aside as the stem
+// comment.
 func (parser *Parser) splitStemComment(stem_len int) error {
 	if stem_len == 0 {
 		return nil
@@ -813,15 +1078,15 @@ func (parser *Parser) splitStemComment(stem_len int) error {
 }

 // Parse the productions:
-// block_mapping        ::= BLOCK-MAPPING_START
 //
-//	*******************
-//	((KEY block_node_or_indentless_sequence?)?
-//	  *** *
-//	(VALUE block_node_or_indentless_sequence?)?)*
+//	block_mapping        ::= BLOCK-MAPPING_START
+//	                         *******************
+//	                         ((KEY block_node_or_indentless_sequence?)?
+//	                           *** *
+//	                         (VALUE block_node_or_indentless_sequence?)?)*
 //
-//	BLOCK-END
-//	*********
+//	                         BLOCK-END
+//	                         *********
 func (parser *Parser) parseBlockMappingKey(event *Event, first bool) error {
 	if first {
 		var token *Token
@@ -837,8 +1102,9 @@ func (parser *Parser) parseBlockMappingKey(event *Event, first bool) error {
 		return err
 	}

-	// [Go] A tail comment was left from the prior mapping value processed. Emit an event
-	//      as it needs to be processed with that value and not the following key.
+	// [Go] A tail comment was left from the prior mapping value processed.
+	// Emit an event as it needs to be processed with that value and not
+	// the following key.
 	if len(parser.tail_comment) > 0 {
 		*event = Event{
 			Type:        TAIL_COMMENT_EVENT,
@@ -888,13 +1154,14 @@ func (parser *Parser) parseBlockMappingKey(event *Event, first bool) error {
 }

 // Parse the productions:
-// block_mapping        ::= BLOCK-MAPPING_START
 //
-//	((KEY block_node_or_indentless_sequence?)?
+//	block_mapping        ::= BLOCK-MAPPING_START
 //
-//	(VALUE block_node_or_indentless_sequence?)?)*
-//	 ***** *
-//	BLOCK-END
+//	                          ((KEY block_node_or_indentless_sequence?)?
+//
+//	                          (VALUE block_node_or_indentless_sequence?)?)*
+//	                           ***** *
+//	                          BLOCK-END
 func (parser *Parser) parseBlockMappingValue(event *Event) error {
 	var token *Token
 	if err := parser.peekToken(&token); err != nil {
@@ -920,19 +1187,17 @@ func (parser *Parser) parseBlockMappingValue(event *Event) error {
 }

 // Parse the productions:
-// flow_sequence        ::= FLOW-SEQUENCE-START
 //
-//	*******************
-//	(flow_sequence_entry FLOW-ENTRY)*
-//	 *                   **********
-//	flow_sequence_entry?
-//	*
-//	FLOW-SEQUENCE-END
-//	*****************
-//
-// flow_sequence_entry  ::= flow_node | KEY flow_node? (VALUE flow_node?)?
-//
-//	*
+//	flow_sequence        ::= FLOW-SEQUENCE-START
+//	                         *******************
+//	                         (flow_sequence_entry FLOW-ENTRY)*
+//	                          *                   **********
+//	                         flow_sequence_entry?
+//	                         *
+//	                         FLOW-SEQUENCE-END
+//	                         *****************
+//	flow_sequence_entry  ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+//	                         *
 func (parser *Parser) parseFlowSequenceEntry(event *Event, first bool) error {
 	if first {
 		var token *Token
@@ -995,9 +1260,9 @@ func (parser *Parser) parseFlowSequenceEntry(event *Event, first bool) error {
 }

 // Parse the productions:
-// flow_sequence_entry  ::= flow_node | KEY flow_node? (VALUE flow_node?)?
 //
-//	*** *
+//	flow_sequence_entry  ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+//	                                     *** *
 func (parser *Parser) parseFlowSequenceEntryMappingKey(event *Event) error {
 	var token *Token
 	if err := parser.peekToken(&token); err != nil {
@@ -1016,9 +1281,9 @@ func (parser *Parser) parseFlowSequenceEntryMappingKey(event *Event) error {
 }

 // Parse the productions:
-// flow_sequence_entry  ::= flow_node | KEY flow_node? (VALUE flow_node?)?
 //
-//	***** *
+//	flow_sequence_entry  ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+//	                                                     ***** *
 func (parser *Parser) parseFlowSequenceEntryMappingValue(event *Event) error {
 	var token *Token
 	if err := parser.peekToken(&token); err != nil {
@@ -1040,9 +1305,9 @@ func (parser *Parser) parseFlowSequenceEntryMappingValue(event *Event) error {
 }

 // Parse the productions:
-// flow_sequence_entry  ::= flow_node | KEY flow_node? (VALUE flow_node?)?
 //
-//	*
+//	flow_sequence_entry  ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+//	                                                                     *
 func (parser *Parser) parseFlowSequenceEntryMappingEnd(event *Event) error {
 	var token *Token
 	if err := parser.peekToken(&token); err != nil {
@@ -1058,18 +1323,17 @@ func (parser *Parser) parseFlowSequenceEntryMappingEnd(event *Event) error {
 }

 // Parse the productions:
-// flow_mapping         ::= FLOW-MAPPING-START
 //
-//	******************
-//	(flow_mapping_entry FLOW-ENTRY)*
-//	 *                  **********
-//	flow_mapping_entry?
-//	******************
-//	FLOW-MAPPING-END
-//	****************
-//
-// flow_mapping_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
-//   - *** *
+//	flow_mapping         ::= FLOW-MAPPING-START
+//	                         ******************
+//	                         (flow_mapping_entry FLOW-ENTRY)*
+//	                          *                  **********
+//	                         flow_mapping_entry?
+//	                         ******************
+//	                         FLOW-MAPPING-END
+//	                         ****************
+//	flow_mapping_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+//	                         *           *** *
 func (parser *Parser) parseFlowMappingKey(event *Event, first bool) error {
 	if first {
 		var token *Token
@@ -1135,8 +1399,9 @@ func (parser *Parser) parseFlowMappingKey(event *Event, first bool) error {
 }

 // Parse the productions:
-// flow_mapping_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
-//   - ***** *
+//
+//	flow_mapping_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+//	                                  *                  ***** *
 func (parser *Parser) parseFlowMappingValue(event *Event, empty bool) error {
 	var token *Token
 	if err := parser.peekToken(&token); err != nil {
@@ -1160,6 +1425,96 @@ func (parser *Parser) parseFlowMappingValue(event *Event, empty bool) error {
 	return parser.processEmptyScalar(event, token.StartMark)
 }

+// Peek the next token in the token queue.
+func (parser *Parser) peekToken(out **Token) error {
+	if !parser.token_available {
+		if err := parser.fetchMoreTokens(); err != nil {
+			return err
+		}
+	}
+
+	token := &parser.tokens[parser.tokens_head]
+	parser.UnfoldComments(token)
+	*out = token
+	return nil
+}
+
+// UnfoldComments walks through the comments queue and joins all
+// comments behind the position of the provided token into the respective
+// top-level comment slices in the parser.
+func (parser *Parser) UnfoldComments(token *Token) {
+	for parser.comments_head < len(parser.comments) && token.StartMark.Index >= parser.comments[parser.comments_head].TokenMark.Index {
+		comment := &parser.comments[parser.comments_head]
+		if len(comment.Head) > 0 {
+			if token.Type == BLOCK_END_TOKEN {
+				// No heads on ends, so keep comment.Head for a follow up token.
+				break
+			}
+			if len(parser.HeadComment) > 0 {
+				parser.HeadComment = append(parser.HeadComment, '\n')
+			}
+			parser.HeadComment = append(parser.HeadComment, comment.Head...)
+		}
+		if len(comment.Foot) > 0 {
+			if len(parser.FootComment) > 0 {
+				parser.FootComment = append(parser.FootComment, '\n')
+			}
+			parser.FootComment = append(parser.FootComment, comment.Foot...)
+		}
+		if len(comment.Line) > 0 {
+			if len(parser.LineComment) > 0 {
+				parser.LineComment = append(parser.LineComment, '\n')
+			}
+			parser.LineComment = append(parser.LineComment, comment.Line...)
+		}
+		*comment = Comment{}
+		parser.comments_head++
+	}
+}
+
+// Remove the next token from the queue (must be called after peek_token).
+func (parser *Parser) skipToken() {
+	parser.token_available = false
+	parser.tokens_parsed++
+	parser.stream_end_produced = parser.tokens[parser.tokens_head].Type == STREAM_END_TOKEN
+	parser.tokens_head++
+}
+
+// formatParserError creates a LoadError with the given problem message
+// and mark position.
+func formatParserError(problem string, problemMark Mark) *LoadError {
+	return &LoadError{
+		Stage:   ParserStage,
+		Mark:    problemMark,
+		Message: problem,
+	}
+}
+
+// formatParserErrorContext creates a LoadError with both context and
+// problem information, each with their own mark positions.
+func formatParserErrorContext(context string, contextMark Mark, problem string, problemMark Mark) *LoadError {
+	return &LoadError{
+		Stage:       ParserStage,
+		ContextMark: contextMark,
+		ContextMsg:  context,
+		Mark:        problemMark,
+		Message:     problem,
+	}
+}
+
+// setEventComments transfers accumulated comments from the parser to the
+// event and clears the parser's comment state.
+func (parser *Parser) setEventComments(event *Event) {
+	event.HeadComment = parser.HeadComment
+	event.LineComment = parser.LineComment
+	event.FootComment = parser.FootComment
+	parser.HeadComment = nil
+	parser.LineComment = nil
+	parser.FootComment = nil
+	parser.tail_comment = nil
+	parser.stem_comment = nil
+}
+
 // Generate an empty scalar event.
 func (parser *Parser) processEmptyScalar(event *Event, mark Mark) error {
 	*event = Event{
@@ -1173,94 +1528,9 @@ func (parser *Parser) processEmptyScalar(event *Event, mark Mark) error {
 	return nil
 }

-var default_tag_directives = []TagDirective{
-	{[]byte("!"), []byte("!")},
-	{[]byte("!!"), []byte("tag:yaml.org,2002:")},
-}
-
-// Parse directives.
-func (parser *Parser) processDirectives(version_directive_ref **VersionDirective, tag_directives_ref *[]TagDirective) error {
-	var version_directive *VersionDirective
-	var tag_directives []TagDirective
-
-	var token *Token
-	if err := parser.peekToken(&token); err != nil {
-		return err
-	}
-
-	for token.Type == VERSION_DIRECTIVE_TOKEN || token.Type == TAG_DIRECTIVE_TOKEN {
-		switch token.Type {
-		case VERSION_DIRECTIVE_TOKEN:
-			if version_directive != nil {
-				return formatParserError(
-					"found duplicate %YAML directive", token.StartMark)
-			}
-			if token.major != 1 || token.minor != 1 {
-				return formatParserError(
-					"found incompatible YAML document", token.StartMark)
-			}
-			version_directive = &VersionDirective{
-				major: token.major,
-				minor: token.minor,
-			}
-		case TAG_DIRECTIVE_TOKEN:
-			value := TagDirective{
-				handle: token.Value,
-				prefix: token.prefix,
-			}
-			if err := parser.appendTagDirective(value, false, token.StartMark); err != nil {
-				return err
-			}
-			tag_directives = append(tag_directives, value)
-		}
-
-		parser.skipToken()
-		if err := parser.peekToken(&token); err != nil {
-			return err
-		}
-	}
-
-	for i := range default_tag_directives {
-		if err := parser.appendTagDirective(default_tag_directives[i], true, token.StartMark); err != nil {
-			return err
-		}
-	}
-
-	if version_directive_ref != nil {
-		*version_directive_ref = version_directive
-	}
-	if tag_directives_ref != nil {
-		*tag_directives_ref = tag_directives
-	}
-	return nil
-}
-
-// Append a tag directive to the directives stack.
-func (parser *Parser) appendTagDirective(value TagDirective, allow_duplicates bool, mark Mark) error {
-	for i := range parser.tag_directives {
-		if bytes.Equal(value.handle, parser.tag_directives[i].handle) {
-			if allow_duplicates {
-				return nil
-			}
-			return formatParserError("found duplicate %TAG directive", mark)
-		}
-	}
-
-	// [Go] I suspect the copy is unnecessary. This was likely done
-	// because there was no way to track ownership of the data.
-	value_copy := TagDirective{
-		handle: make([]byte, len(value.handle)),
-		prefix: make([]byte, len(value.prefix)),
-	}
-	copy(value_copy.handle, value.handle)
-	copy(value_copy.prefix, value.prefix)
-	parser.tag_directives = append(parser.tag_directives, value_copy)
-	return nil
-}
-
 // ParserGetEvents parses the YAML input and returns the generated event stream.
 func ParserGetEvents(in []byte) (string, error) {
-	p := NewComposer(in)
+	p := NewComposer(in, nil)
 	defer p.Destroy()
 	var events strings.Builder
 	var event Event
@@ -1280,6 +1550,8 @@ func ParserGetEvents(in []byte) (string, error) {
 	return events.String(), nil
 }

+// formatEvent formats an event as a human-readable string for debugging
+// and testing purposes.
 func formatEvent(e *Event) string {
 	var b strings.Builder
 	switch e.Type {