working commit

2026-03-13 19:02:42 +02:00
parent bebbf79c7a
commit 5c1da77f4c
1329 changed files with 314708 additions and 39 deletions
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2020-2022 wazero authors
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
@@ -0,0 +1,118 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"math"
+
+	"github.com/tetratelabs/wabin/leb128"
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+func decodeCode(r *bytes.Reader) (*wasm.Code, error) {
+	ss, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get the size of code: %w", err)
+	}
+	remaining := int64(ss)
+
+	// parse locals
+	ls, bytesRead, err := leb128.DecodeUint32(r)
+	remaining -= int64(bytesRead)
+	if err != nil {
+		return nil, fmt.Errorf("get the size locals: %v", err)
+	} else if remaining < 0 {
+		return nil, io.EOF
+	}
+
+	var nums []uint64
+	var types []wasm.ValueType
+	var sum uint64
+	var n uint32
+	for i := uint32(0); i < ls; i++ {
+		n, bytesRead, err = leb128.DecodeUint32(r)
+		remaining -= int64(bytesRead) + 1 // +1 for the subsequent ReadByte
+		if err != nil {
+			return nil, fmt.Errorf("read n of locals: %v", err)
+		} else if remaining < 0 {
+			return nil, io.EOF
+		}
+
+		sum += uint64(n)
+		nums = append(nums, uint64(n))
+
+		b, err := r.ReadByte()
+		if err != nil {
+			return nil, fmt.Errorf("read type of local: %v", err)
+		}
+		switch vt := b; vt {
+		case wasm.ValueTypeI32, wasm.ValueTypeF32, wasm.ValueTypeI64, wasm.ValueTypeF64,
+			wasm.ValueTypeFuncref, wasm.ValueTypeExternref, wasm.ValueTypeV128:
+			types = append(types, vt)
+		default:
+			return nil, fmt.Errorf("invalid local type: 0x%x", vt)
+		}
+	}
+
+	if sum > math.MaxUint32 {
+		return nil, fmt.Errorf("too many locals: %d", sum)
+	}
+
+	var localTypes []wasm.ValueType
+	for i, num := range nums {
+		t := types[i]
+		for j := uint64(0); j < num; j++ {
+			localTypes = append(localTypes, t)
+		}
+	}
+
+	body := make([]byte, remaining)
+	if _, err = io.ReadFull(r, body); err != nil {
+		return nil, fmt.Errorf("read body: %w", err)
+	}
+
+	if endIndex := len(body) - 1; endIndex < 0 || body[endIndex] != wasm.OpcodeEnd {
+		return nil, fmt.Errorf("expr not end with OpcodeEnd")
+	}
+
+	return &wasm.Code{Body: body, LocalTypes: localTypes}, nil
+}
+
+// encodeCode returns the wasm.Code encoded in WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-code
+func encodeCode(c *wasm.Code) []byte {
+	// local blocks compress locals while preserving index order by grouping locals of the same type.
+	// https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#code-section%E2%91%A0
+	localBlockCount := uint32(0) // how many blocks of locals with the same type (types can repeat!)
+	var localBlocks []byte
+	localTypeLen := len(c.LocalTypes)
+	if localTypeLen > 0 {
+		i := localTypeLen - 1
+		var runCount uint32              // count of the same type
+		var lastValueType wasm.ValueType // initialize to an invalid type 0
+
+		// iterate backwards so it is easier to size prefix
+		for ; i >= 0; i-- {
+			vt := c.LocalTypes[i]
+			if lastValueType != vt {
+				if runCount != 0 { // Only on the first iteration, this is zero when vt is compared against invalid
+					localBlocks = append(leb128.EncodeUint32(runCount), localBlocks...)
+				}
+				lastValueType = vt
+				localBlocks = append(leb128.EncodeUint32(uint32(vt)), localBlocks...) // reuse the EncodeUint32 cache
+				localBlockCount++
+				runCount = 1
+			} else {
+				runCount++
+			}
+		}
+		localBlocks = append(leb128.EncodeUint32(runCount), localBlocks...)
+		localBlocks = append(leb128.EncodeUint32(localBlockCount), localBlocks...)
+	} else {
+		localBlocks = leb128.EncodeUint32(0)
+	}
+	code := append(localBlocks, c.Body...)
+	return append(leb128.EncodeUint32(uint32(len(code))), code...)
+}
@@ -0,0 +1,102 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wabin/ieee754"
+	"github.com/tetratelabs/wabin/leb128"
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+func decodeConstantExpression(r *bytes.Reader, features wasm.CoreFeatures) (*wasm.ConstantExpression, error) {
+	b, err := r.ReadByte()
+	if err != nil {
+		return nil, fmt.Errorf("read opcode: %v", err)
+	}
+
+	remainingBeforeData := int64(r.Len())
+	offsetAtData := r.Size() - remainingBeforeData
+
+	opcode := b
+	switch opcode {
+	case wasm.OpcodeI32Const:
+		// Treat constants as signed as their interpretation is not yet known per /RATIONALE.md
+		_, _, err = leb128.DecodeInt32(r)
+	case wasm.OpcodeI64Const:
+		// Treat constants as signed as their interpretation is not yet known per /RATIONALE.md
+		_, _, err = leb128.DecodeInt64(r)
+	case wasm.OpcodeF32Const:
+		_, err = ieee754.DecodeFloat32(r)
+	case wasm.OpcodeF64Const:
+		_, err = ieee754.DecodeFloat64(r)
+	case wasm.OpcodeGlobalGet:
+		_, _, err = leb128.DecodeUint32(r)
+	case wasm.OpcodeRefNull:
+		if err := features.RequireEnabled(wasm.CoreFeatureBulkMemoryOperations); err != nil {
+			return nil, fmt.Errorf("ref.null is not supported as %w", err)
+		}
+		reftype, err := r.ReadByte()
+		if err != nil {
+			return nil, fmt.Errorf("read reference type for ref.null: %w", err)
+		} else if reftype != wasm.RefTypeFuncref && reftype != wasm.RefTypeExternref {
+			return nil, fmt.Errorf("invalid type for ref.null: 0x%x", reftype)
+		}
+	case wasm.OpcodeRefFunc:
+		if err := features.RequireEnabled(wasm.CoreFeatureBulkMemoryOperations); err != nil {
+			return nil, fmt.Errorf("ref.func is not supported as %w", err)
+		}
+		// Parsing index.
+		_, _, err = leb128.DecodeUint32(r)
+	case wasm.OpcodeVecPrefix:
+		if err := features.RequireEnabled(wasm.CoreFeatureSIMD); err != nil {
+			return nil, fmt.Errorf("vector instructions are not supported as %w", err)
+		}
+		opcode, err = r.ReadByte()
+		if err != nil {
+			return nil, fmt.Errorf("read vector instruction opcode suffix: %w", err)
+		}
+
+		if opcode != wasm.OpcodeVecV128Const {
+			return nil, fmt.Errorf("invalid vector opcode for const expression: %#x", opcode)
+		}
+
+		remainingBeforeData = int64(r.Len())
+		offsetAtData = r.Size() - remainingBeforeData
+
+		n, err := r.Read(make([]byte, 16))
+		if err != nil {
+			return nil, fmt.Errorf("read vector const instruction immediates: %w", err)
+		} else if n != 16 {
+			return nil, fmt.Errorf("read vector const instruction immediates: needs 16 bytes but was %d bytes", n)
+		}
+	default:
+		return nil, fmt.Errorf("%v for const expression opt code: %#x", ErrInvalidByte, b)
+	}
+
+	if err != nil {
+		return nil, fmt.Errorf("read value: %v", err)
+	}
+
+	if b, err = r.ReadByte(); err != nil {
+		return nil, fmt.Errorf("look for end opcode: %v", err)
+	}
+
+	if b != wasm.OpcodeEnd {
+		return nil, fmt.Errorf("constant expression has been not terminated")
+	}
+
+	data := make([]byte, remainingBeforeData-int64(r.Len())-1)
+	if _, err := r.ReadAt(data, offsetAtData); err != nil {
+		return nil, fmt.Errorf("error re-buffering ConstantExpression.Data")
+	}
+
+	return &wasm.ConstantExpression{Opcode: opcode, Data: data}, nil
+}
+
+func encodeConstantExpression(expr *wasm.ConstantExpression) (ret []byte) {
+	ret = append(ret, expr.Opcode)
+	ret = append(ret, expr.Data...)
+	ret = append(ret, wasm.OpcodeEnd)
+	return
+}
@@ -0,0 +1,22 @@
+package binary
+
+import (
+	"bytes"
+
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+// decodeCustomSection deserializes the data **not** associated with the "name" key in SectionIDCustom.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#custom-section%E2%91%A0
+func decodeCustomSection(r *bytes.Reader, name string, limit uint64) (result *wasm.CustomSection, err error) {
+	buf := make([]byte, limit)
+	_, err = r.Read(buf)
+
+	result = &wasm.CustomSection{
+		Name: name,
+		Data: buf,
+	}
+
+	return
+}
@@ -0,0 +1,94 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+
+	"github.com/tetratelabs/wabin/leb128"
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+// dataSegmentPrefix represents three types of data segments.
+//
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#data-section
+type dataSegmentPrefix = uint32
+
+const (
+	// dataSegmentPrefixActive is the prefix for the version 1.0 compatible
+	// data segment, which is classified as "active" in 2.0.
+	dataSegmentPrefixActive dataSegmentPrefix = 0x0
+	// dataSegmentPrefixPassive prefixes the "passive" data segment as in
+	// version 2.0 specification.
+	dataSegmentPrefixPassive dataSegmentPrefix = 0x1
+	// dataSegmentPrefixActiveWithMemoryIndex is the active prefix with memory
+	//index encoded which is defined for future use as of 2.0.
+	dataSegmentPrefixActiveWithMemoryIndex dataSegmentPrefix = 0x2
+)
+
+func decodeDataSegment(r *bytes.Reader, features wasm.CoreFeatures) (*wasm.DataSegment, error) {
+	dataSegmentPrefix, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("read data segment prefix: %w", err)
+	}
+
+	if dataSegmentPrefix != dataSegmentPrefixActive {
+		if err := features.RequireEnabled(wasm.CoreFeatureBulkMemoryOperations); err != nil {
+			return nil, fmt.Errorf("non-zero prefix for data segment is invalid as %w", err)
+		}
+	}
+
+	var expr *wasm.ConstantExpression
+	switch dataSegmentPrefix {
+	case dataSegmentPrefixActive,
+		dataSegmentPrefixActiveWithMemoryIndex:
+		// Active data segment as in
+		// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#data-section
+		if dataSegmentPrefix == 0x2 {
+			d, _, err := leb128.DecodeUint32(r)
+			if err != nil {
+				return nil, fmt.Errorf("read memory index: %v", err)
+			} else if d != 0 {
+				return nil, fmt.Errorf("memory index must be zero but was %d", d)
+			}
+		}
+
+		expr, err = decodeConstantExpression(r, features)
+		if err != nil {
+			return nil, fmt.Errorf("read offset expression: %v", err)
+		}
+	case dataSegmentPrefixPassive:
+		// Passive data segment doesn't need const expr nor memory index encoded.
+		// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#data-section
+	default:
+		return nil, fmt.Errorf("invalid data segment prefix: 0x%x", dataSegmentPrefix)
+	}
+
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get the size of vector: %v", err)
+	}
+
+	b := make([]byte, vs)
+	if _, err := io.ReadFull(r, b); err != nil {
+		return nil, fmt.Errorf("read bytes for init: %v", err)
+	}
+
+	return &wasm.DataSegment{
+		OffsetExpression: expr,
+		Init:             b,
+	}, nil
+}
+
+func encodeDataSegment(d *wasm.DataSegment) (ret []byte) {
+	if d.OffsetExpression == nil {
+		ret = append(ret, leb128.EncodeInt32(int32(dataSegmentPrefixPassive))...)
+	} else {
+		// Currently multiple memories are not supported.
+		ret = append(ret, leb128.EncodeInt32(int32(dataSegmentPrefixActive))...)
+		ret = append(ret, encodeConstantExpression(d.OffsetExpression)...)
+	}
+	ret = append(ret, leb128.EncodeUint32(uint32(len(d.Init)))...)
+	ret = append(ret, d.Init...)
+	return
+}
@@ -0,0 +1,126 @@
+package binary
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+
+	"github.com/tetratelabs/wabin/leb128"
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+// DecodeModule implements wasm.DecodeModule for the WebAssembly Binary Format
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-format%E2%91%A0
+func DecodeModule(binary []byte, features wasm.CoreFeatures) (*wasm.Module, error) {
+	r := bytes.NewReader(binary)
+
+	// Magic number.
+	buf := make([]byte, 4)
+	if _, err := io.ReadFull(r, buf); err != nil || !bytes.Equal(buf, Magic) {
+		return nil, ErrInvalidMagicNumber
+	}
+
+	// Version.
+	if _, err := io.ReadFull(r, buf); err != nil || !bytes.Equal(buf, version) {
+		return nil, ErrInvalidVersion
+	}
+
+	m := &wasm.Module{}
+	for {
+		// TODO: except custom sections, all others are required to be in order, but we aren't checking yet.
+		// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#modules%E2%91%A0%E2%93%AA
+		sectionID, err := r.ReadByte()
+		if err == io.EOF {
+			break
+		} else if err != nil {
+			return nil, fmt.Errorf("read section id: %w", err)
+		}
+
+		sectionSize, _, err := leb128.DecodeUint32(r)
+		if err != nil {
+			return nil, fmt.Errorf("get size of section %s: %v", wasm.SectionIDName(sectionID), err)
+		}
+
+		sectionContentStart := r.Len()
+		switch sectionID {
+		case wasm.SectionIDCustom:
+			// First, validate the section and determine if the section for this name has already been set
+			name, nameSize, decodeErr := decodeUTF8(r, "custom section name")
+			if decodeErr != nil {
+				err = decodeErr
+				break
+			} else if sectionSize < nameSize {
+				err = fmt.Errorf("malformed custom section %s", name)
+				break
+			} else if name == "name" && m.NameSection != nil {
+				err = fmt.Errorf("redundant custom section %s", name)
+				break
+			}
+
+			// Now, either decode the NameSection or CustomSection
+			limit := sectionSize - nameSize
+			if name == "name" {
+				m.NameSection, err = decodeNameSection(r, uint64(limit))
+			} else {
+				custom, err := decodeCustomSection(r, name, uint64(limit))
+				if err != nil {
+					return nil, fmt.Errorf("failed to read custom section name[%s]: %w", name, err)
+				}
+				m.CustomSections = append(m.CustomSections, custom)
+			}
+
+		case wasm.SectionIDType:
+			m.TypeSection, err = decodeTypeSection(features, r)
+		case wasm.SectionIDImport:
+			if m.ImportSection, err = decodeImportSection(r, features); err != nil {
+				return nil, err // avoid re-wrapping the error.
+			}
+		case wasm.SectionIDFunction:
+			m.FunctionSection, err = decodeFunctionSection(r)
+		case wasm.SectionIDTable:
+			m.TableSection, err = decodeTableSection(r, features)
+		case wasm.SectionIDMemory:
+			m.MemorySection, err = decodeMemorySection(r)
+		case wasm.SectionIDGlobal:
+			if m.GlobalSection, err = decodeGlobalSection(r, features); err != nil {
+				return nil, err // avoid re-wrapping the error.
+			}
+		case wasm.SectionIDExport:
+			m.ExportSection, err = decodeExportSection(r)
+		case wasm.SectionIDStart:
+			if m.StartSection != nil {
+				return nil, errors.New("multiple start sections are invalid")
+			}
+			m.StartSection, err = decodeStartSection(r)
+		case wasm.SectionIDElement:
+			m.ElementSection, err = decodeElementSection(r, features)
+		case wasm.SectionIDCode:
+			m.CodeSection, err = decodeCodeSection(r)
+		case wasm.SectionIDData:
+			m.DataSection, err = decodeDataSection(r, features)
+		case wasm.SectionIDDataCount:
+			if err := features.RequireEnabled(wasm.CoreFeatureBulkMemoryOperations); err != nil {
+				return nil, fmt.Errorf("data count section not supported as %v", err)
+			}
+			m.DataCountSection, err = decodeDataCountSection(r)
+		default:
+			err = ErrInvalidSectionID
+		}
+
+		readBytes := sectionContentStart - r.Len()
+		if err == nil && int(sectionSize) != readBytes {
+			err = fmt.Errorf("invalid section length: expected to be %d but got %d", sectionSize, readBytes)
+		}
+
+		if err != nil {
+			return nil, fmt.Errorf("section %s: %v", wasm.SectionIDName(sectionID), err)
+		}
+	}
+
+	functionCount, codeCount := m.SectionElementCount(wasm.SectionIDFunction), m.SectionElementCount(wasm.SectionIDCode)
+	if functionCount != codeCount {
+		return nil, fmt.Errorf("function and code section have inconsistent lengths: %d != %d", functionCount, codeCount)
+	}
+	return m, nil
+}
@@ -0,0 +1,308 @@
+package binary
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+
+	"github.com/tetratelabs/wabin/leb128"
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+func ensureElementKindFuncRef(r *bytes.Reader) error {
+	elemKind, err := r.ReadByte()
+	if err != nil {
+		return fmt.Errorf("read element prefix: %w", err)
+	}
+	if elemKind != 0x0 { // ElemKind is fixed to 0x0 now: https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#element-section
+		return fmt.Errorf("element kind must be zero but was 0x%x", elemKind)
+	}
+	return nil
+}
+
+func decodeElementInitValueVector(r *bytes.Reader) ([]*wasm.Index, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	vec := make([]*wasm.Index, vs)
+	for i := range vec {
+		u32, _, err := leb128.DecodeUint32(r)
+		if err != nil {
+			return nil, fmt.Errorf("read function index: %w", err)
+		}
+		vec[i] = &u32
+	}
+	return vec, nil
+}
+
+func decodeElementConstExprVector(r *bytes.Reader, elemType wasm.RefType, features wasm.CoreFeatures) ([]*wasm.Index, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get the size of constexpr vector: %w", err)
+	}
+	vec := make([]*wasm.Index, vs)
+	for i := range vec {
+		expr, err := decodeConstantExpression(r, features)
+		if err != nil {
+			return nil, err
+		}
+		switch expr.Opcode {
+		case wasm.OpcodeRefFunc:
+			if elemType != wasm.RefTypeFuncref {
+				return nil, fmt.Errorf("element type mismatch: want %s, but constexpr has funcref", wasm.RefTypeName(elemType))
+			}
+			v, _, _ := leb128.DecodeUint32(bytes.NewReader(expr.Data))
+			vec[i] = &v
+		case wasm.OpcodeRefNull:
+			if elemType != expr.Data[0] {
+				return nil, fmt.Errorf("element type mismatch: want %s, but constexpr has %s",
+					wasm.RefTypeName(elemType), wasm.RefTypeName(expr.Data[0]))
+			}
+			// vec[i] is already nil, so nothing to do.
+		default:
+			return nil, fmt.Errorf("const expr must be either ref.null or ref.func but was %s", wasm.InstructionName(expr.Opcode))
+		}
+	}
+	return vec, nil
+}
+
+func decodeElementRefType(r *bytes.Reader) (ret wasm.RefType, err error) {
+	ret, err = r.ReadByte()
+	if err != nil {
+		err = fmt.Errorf("read element ref type: %w", err)
+		return
+	}
+	if ret != wasm.RefTypeFuncref && ret != wasm.RefTypeExternref {
+		return 0, errors.New("ref type must be funcref or externref for element as of WebAssembly 2.0")
+	}
+	return
+}
+
+const (
+	// The prefix is explained at https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#element-section
+
+	// elementSegmentPrefixLegacy is the legacy prefix and is only valid one
+	// before FeatureBulkMemoryOperations.
+	elementSegmentPrefixLegacy = iota
+	// elementSegmentPrefixPassiveFuncrefValueVector is the passive element
+	// whose indexes are encoded as vec(varint), and reftype is fixed to funcref.
+	elementSegmentPrefixPassiveFuncrefValueVector
+	// elementSegmentPrefixActiveFuncrefValueVectorWithTableIndex is the same
+	// as elementSegmentPrefixPassiveFuncrefValueVector but active and table
+	// index is encoded.
+	elementSegmentPrefixActiveFuncrefValueVectorWithTableIndex
+	// elementSegmentPrefixDeclarativeFuncrefValueVector is the same as
+	// elementSegmentPrefixPassiveFuncrefValueVector but declarative.
+	elementSegmentPrefixDeclarativeFuncrefValueVector
+	// elementSegmentPrefixActiveFuncrefConstExprVector is active and reftype
+	// is fixed to funcref and indexes are encoded as vec(const_expr).
+	elementSegmentPrefixActiveFuncrefConstExprVector
+	// elementSegmentPrefixPassiveConstExprVector is passive where indexes
+	// are encoded as vec(const_expr), and reftype is encoded.
+	elementSegmentPrefixPassiveConstExprVector
+	// elementSegmentPrefixPassiveConstExprVector is active where indexes are
+	// encoded as vec(const_expr), and reftype and table index are encoded.
+	elementSegmentPrefixActiveConstExprVector
+	// elementSegmentPrefixDeclarativeConstExprVector is declarative where
+	// indexes are encoded as vec(const_expr), and reftype is encoded.
+	elementSegmentPrefixDeclarativeConstExprVector
+)
+
+func decodeElementSegment(r *bytes.Reader, features wasm.CoreFeatures) (*wasm.ElementSegment, error) {
+	prefix, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("read element prefix: %w", err)
+	}
+
+	if prefix != elementSegmentPrefixLegacy {
+		if err := features.RequireEnabled(wasm.CoreFeatureBulkMemoryOperations); err != nil {
+			return nil, fmt.Errorf("non-zero prefix for element segment is invalid as %w", err)
+		}
+	}
+
+	// Encoding depends on the prefix and described at https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#element-section
+	switch prefix {
+	case elementSegmentPrefixLegacy:
+		// Legacy prefix which is WebAssembly 1.0 compatible.
+		expr, err := decodeConstantExpression(r, features)
+		if err != nil {
+			return nil, fmt.Errorf("read expr for offset: %w", err)
+		}
+
+		init, err := decodeElementInitValueVector(r)
+		if err != nil {
+			return nil, err
+		}
+
+		return &wasm.ElementSegment{
+			OffsetExpr: expr,
+			Init:       init,
+			Type:       wasm.RefTypeFuncref,
+			Mode:       wasm.ElementModeActive,
+			// Legacy prefix has the fixed table index zero.
+			TableIndex: 0,
+		}, nil
+	case elementSegmentPrefixPassiveFuncrefValueVector:
+		// Prefix 1 requires funcref.
+		if err = ensureElementKindFuncRef(r); err != nil {
+			return nil, err
+		}
+
+		init, err := decodeElementInitValueVector(r)
+		if err != nil {
+			return nil, err
+		}
+		return &wasm.ElementSegment{
+			Init: init,
+			Type: wasm.RefTypeFuncref,
+			Mode: wasm.ElementModePassive,
+		}, nil
+	case elementSegmentPrefixActiveFuncrefValueVectorWithTableIndex:
+		tableIndex, _, err := leb128.DecodeUint32(r)
+		if err != nil {
+			return nil, fmt.Errorf("get size of vector: %w", err)
+		}
+
+		if tableIndex != 0 {
+			if err := features.RequireEnabled(wasm.CoreFeatureReferenceTypes); err != nil {
+				return nil, fmt.Errorf("table index must be zero but was %d: %w", tableIndex, err)
+			}
+		}
+
+		expr, err := decodeConstantExpression(r, features)
+		if err != nil {
+			return nil, fmt.Errorf("read expr for offset: %w", err)
+		}
+
+		// Prefix 2 requires funcref.
+		if err = ensureElementKindFuncRef(r); err != nil {
+			return nil, err
+		}
+
+		init, err := decodeElementInitValueVector(r)
+		if err != nil {
+			return nil, err
+		}
+		return &wasm.ElementSegment{
+			OffsetExpr: expr,
+			Init:       init,
+			Type:       wasm.RefTypeFuncref,
+			Mode:       wasm.ElementModeActive,
+			TableIndex: tableIndex,
+		}, nil
+	case elementSegmentPrefixDeclarativeFuncrefValueVector:
+		// Prefix 3 requires funcref.
+		if err = ensureElementKindFuncRef(r); err != nil {
+			return nil, err
+		}
+		init, err := decodeElementInitValueVector(r)
+		if err != nil {
+			return nil, err
+		}
+		return &wasm.ElementSegment{
+			Init: init,
+			Type: wasm.RefTypeFuncref,
+			Mode: wasm.ElementModeDeclarative,
+		}, nil
+	case elementSegmentPrefixActiveFuncrefConstExprVector:
+		expr, err := decodeConstantExpression(r, features)
+		if err != nil {
+			return nil, fmt.Errorf("read expr for offset: %w", err)
+		}
+
+		init, err := decodeElementConstExprVector(r, wasm.RefTypeFuncref, features)
+		if err != nil {
+			return nil, err
+		}
+
+		return &wasm.ElementSegment{
+			OffsetExpr: expr,
+			Init:       init,
+			Type:       wasm.RefTypeFuncref,
+			Mode:       wasm.ElementModeActive,
+			TableIndex: 0,
+		}, nil
+	case elementSegmentPrefixPassiveConstExprVector:
+		refType, err := decodeElementRefType(r)
+		if err != nil {
+			return nil, err
+		}
+		init, err := decodeElementConstExprVector(r, refType, features)
+		if err != nil {
+			return nil, err
+		}
+		return &wasm.ElementSegment{
+			Init: init,
+			Type: refType,
+			Mode: wasm.ElementModePassive,
+		}, nil
+	case elementSegmentPrefixActiveConstExprVector:
+		tableIndex, _, err := leb128.DecodeUint32(r)
+		if err != nil {
+			return nil, fmt.Errorf("get size of vector: %w", err)
+		}
+
+		if tableIndex != 0 {
+			if err := features.RequireEnabled(wasm.CoreFeatureReferenceTypes); err != nil {
+				return nil, fmt.Errorf("table index must be zero but was %d: %w", tableIndex, err)
+			}
+		}
+		expr, err := decodeConstantExpression(r, features)
+		if err != nil {
+			return nil, fmt.Errorf("read expr for offset: %w", err)
+		}
+
+		refType, err := decodeElementRefType(r)
+		if err != nil {
+			return nil, err
+		}
+
+		init, err := decodeElementConstExprVector(r, refType, features)
+		if err != nil {
+			return nil, err
+		}
+
+		return &wasm.ElementSegment{
+			OffsetExpr: expr,
+			Init:       init,
+			Type:       refType,
+			Mode:       wasm.ElementModeActive,
+			TableIndex: tableIndex,
+		}, nil
+	case elementSegmentPrefixDeclarativeConstExprVector:
+		refType, err := decodeElementRefType(r)
+		if err != nil {
+			return nil, err
+		}
+		init, err := decodeElementConstExprVector(r, refType, features)
+		if err != nil {
+			return nil, err
+		}
+		return &wasm.ElementSegment{
+			Init: init,
+			Type: refType,
+			Mode: wasm.ElementModeDeclarative,
+		}, nil
+	default:
+		return nil, fmt.Errorf("invalid element segment prefix: 0x%x", prefix)
+	}
+}
+
+// encodeCode returns the wasm.ElementSegment encoded in WebAssembly Binary Format.
+//
+// https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#element-section%E2%91%A0
+func encodeElement(e *wasm.ElementSegment) (ret []byte) {
+	if e.Mode == wasm.ElementModeActive {
+		ret = append(ret, leb128.EncodeInt32(int32(e.TableIndex))...)
+		ret = append(ret, encodeConstantExpression(e.OffsetExpr)...)
+		ret = append(ret, leb128.EncodeUint32(uint32(len(e.Init)))...)
+		for _, idx := range e.Init {
+			ret = append(ret, leb128.EncodeInt32(int32(*idx))...)
+		}
+	} else {
+		panic("TODO: support encoding for non-active elements in bulk-memory-operations proposal")
+	}
+	return
+}
@@ -0,0 +1,59 @@
+package binary
+
+import (
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+var sizePrefixedName = []byte{4, 'n', 'a', 'm', 'e'}
+
+// EncodeModule implements wasm.EncodeModule for the WebAssembly Binary Format.
+// Note: If saving to a file, the conventional extension is wasm
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-format%E2%91%A0
+func EncodeModule(m *wasm.Module) (bytes []byte) {
+	bytes = append(Magic, version...)
+	if m.SectionElementCount(wasm.SectionIDType) > 0 {
+		bytes = append(bytes, encodeTypeSection(m.TypeSection)...)
+	}
+	if m.SectionElementCount(wasm.SectionIDImport) > 0 {
+		bytes = append(bytes, encodeImportSection(m.ImportSection)...)
+	}
+	if m.SectionElementCount(wasm.SectionIDFunction) > 0 {
+		bytes = append(bytes, encodeFunctionSection(m.FunctionSection)...)
+	}
+	if m.SectionElementCount(wasm.SectionIDTable) > 0 {
+		bytes = append(bytes, encodeTableSection(m.TableSection)...)
+	}
+	if m.SectionElementCount(wasm.SectionIDMemory) > 0 {
+		bytes = append(bytes, encodeMemorySection(m.MemorySection)...)
+	}
+	if m.SectionElementCount(wasm.SectionIDGlobal) > 0 {
+		bytes = append(bytes, encodeGlobalSection(m.GlobalSection)...)
+	}
+	if m.SectionElementCount(wasm.SectionIDExport) > 0 {
+		bytes = append(bytes, encodeExportSection(m.ExportSection)...)
+	}
+	if m.SectionElementCount(wasm.SectionIDStart) > 0 {
+		bytes = append(bytes, encodeStartSection(*m.StartSection)...)
+	}
+	if m.SectionElementCount(wasm.SectionIDElement) > 0 {
+		bytes = append(bytes, encodeElementSection(m.ElementSection)...)
+	}
+	if m.SectionElementCount(wasm.SectionIDCode) > 0 {
+		bytes = append(bytes, encodeCodeSection(m.CodeSection)...)
+	}
+	if m.SectionElementCount(wasm.SectionIDData) > 0 {
+		bytes = append(bytes, encodeDataSection(m.DataSection)...)
+	}
+	if m.SectionElementCount(wasm.SectionIDCustom) > 0 {
+		// >> The name section should appear only once in a module, and only after the data section.
+		// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-namesec
+		if m.NameSection != nil {
+			nameSection := append(sizePrefixedName, encodeNameSectionData(m.NameSection)...)
+			bytes = append(bytes, encodeSection(wasm.SectionIDCustom, nameSection)...)
+		}
+		for _, custom := range m.CustomSections {
+			bytes = append(bytes, encodeSection(wasm.SectionIDCustom, encodeCustomSection(custom))...)
+		}
+	}
+	return
+}
@@ -0,0 +1,11 @@
+package binary
+
+import "errors"
+
+var (
+	ErrInvalidByte           = errors.New("invalid byte")
+	ErrInvalidMagicNumber    = errors.New("invalid magic number")
+	ErrInvalidVersion        = errors.New("invalid version header")
+	ErrInvalidSectionID      = errors.New("invalid section id")
+	ErrCustomSectionNotFound = errors.New("custom section not found")
+)
@@ -0,0 +1,43 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wabin/leb128"
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+func decodeExport(r *bytes.Reader) (i *wasm.Export, err error) {
+	i = &wasm.Export{}
+
+	if i.Name, _, err = decodeUTF8(r, "export name"); err != nil {
+		return nil, err
+	}
+
+	b, err := r.ReadByte()
+	if err != nil {
+		return nil, fmt.Errorf("error decoding export kind: %w", err)
+	}
+
+	i.Type = b
+	switch i.Type {
+	case wasm.ExternTypeFunc, wasm.ExternTypeTable, wasm.ExternTypeMemory, wasm.ExternTypeGlobal:
+		if i.Index, _, err = leb128.DecodeUint32(r); err != nil {
+			return nil, fmt.Errorf("error decoding export index: %w", err)
+		}
+	default:
+		return nil, fmt.Errorf("%w: invalid byte for exportdesc: %#x", ErrInvalidByte, b)
+	}
+	return
+}
+
+// encodeExport returns the wasm.Export encoded in WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#export-section%E2%91%A0
+func encodeExport(i *wasm.Export) []byte {
+	data := encodeSizePrefixed([]byte(i.Name))
+	data = append(data, i.Type)
+	data = append(data, leb128.EncodeUint32(i.Index)...)
+	return data
+}
@@ -0,0 +1,100 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wabin/leb128"
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+var nullary = []byte{0x60, 0, 0}
+
+// encodedOneParam is a cache of wasm.FunctionType values for param length 1 and result length 0
+var encodedOneParam = map[wasm.ValueType][]byte{
+	wasm.ValueTypeI32: {0x60, 1, wasm.ValueTypeI32, 0},
+	wasm.ValueTypeI64: {0x60, 1, wasm.ValueTypeI64, 0},
+	wasm.ValueTypeF32: {0x60, 1, wasm.ValueTypeF32, 0},
+	wasm.ValueTypeF64: {0x60, 1, wasm.ValueTypeF64, 0},
+}
+
+// encodedOneResult is a cache of wasm.FunctionType values for param length 0 and result length 1
+var encodedOneResult = map[wasm.ValueType][]byte{
+	wasm.ValueTypeI32: {0x60, 0, 1, wasm.ValueTypeI32},
+	wasm.ValueTypeI64: {0x60, 0, 1, wasm.ValueTypeI64},
+	wasm.ValueTypeF32: {0x60, 0, 1, wasm.ValueTypeF32},
+	wasm.ValueTypeF64: {0x60, 0, 1, wasm.ValueTypeF64},
+}
+
+// encodeFunctionType returns the wasm.FunctionType encoded in WebAssembly Binary Format.
+//
+// Note: Function types are encoded by the byte 0x60 followed by the respective vectors of parameter and result types.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#function-types%E2%91%A4
+func encodeFunctionType(t *wasm.FunctionType) []byte {
+	paramCount, resultCount := len(t.Params), len(t.Results)
+	if paramCount == 0 && resultCount == 0 {
+		return nullary
+	}
+	if resultCount == 0 {
+		if paramCount == 1 {
+			if encoded, ok := encodedOneParam[t.Params[0]]; ok {
+				return encoded
+			}
+		}
+		return append(append([]byte{0x60}, encodeValTypes(t.Params)...), 0)
+	} else if resultCount == 1 {
+		if paramCount == 0 {
+			if encoded, ok := encodedOneResult[t.Results[0]]; ok {
+				return encoded
+			}
+		}
+		return append(append([]byte{0x60}, encodeValTypes(t.Params)...), 1, t.Results[0])
+	}
+	// Only reached when "multi-value" is enabled because WebAssembly supports at most 1 result.
+	data := append([]byte{0x60}, encodeValTypes(t.Params)...)
+	return append(data, encodeValTypes(t.Results)...)
+}
+
+func decodeFunctionType(features wasm.CoreFeatures, r *bytes.Reader) (*wasm.FunctionType, error) {
+	b, err := r.ReadByte()
+	if err != nil {
+		return nil, fmt.Errorf("read leading byte: %w", err)
+	}
+
+	if b != 0x60 {
+		return nil, fmt.Errorf("%w: %#x != 0x60", ErrInvalidByte, b)
+	}
+
+	paramCount, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("could not read parameter count: %w", err)
+	}
+
+	paramTypes, err := decodeValueTypes(r, paramCount)
+	if err != nil {
+		return nil, fmt.Errorf("could not read parameter types: %w", err)
+	}
+
+	resultCount, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("could not read result count: %w", err)
+	}
+
+	// Guard >1.0 feature multi-value
+	if resultCount > 1 {
+		if err = features.RequireEnabled(wasm.CoreFeatureMultiValue); err != nil {
+			return nil, fmt.Errorf("multiple result types invalid as %v", err)
+		}
+	}
+
+	resultTypes, err := decodeValueTypes(r, resultCount)
+	if err != nil {
+		return nil, fmt.Errorf("could not read result types: %w", err)
+	}
+
+	ret := &wasm.FunctionType{
+		Params:  paramTypes,
+		Results: resultTypes,
+	}
+	return ret, nil
+}
@@ -0,0 +1,66 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+// decodeGlobal returns the wasm.Global decoded with the WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-global
+func decodeGlobal(r *bytes.Reader, features wasm.CoreFeatures) (*wasm.Global, error) {
+	gt, err := decodeGlobalType(r)
+	if err != nil {
+		return nil, err
+	}
+
+	init, err := decodeConstantExpression(r, features)
+	if err != nil {
+		return nil, err
+	}
+
+	return &wasm.Global{Type: gt, Init: init}, nil
+}
+
+// decodeGlobalType returns the wasm.GlobalType decoded with the WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-globaltype
+func decodeGlobalType(r *bytes.Reader) (*wasm.GlobalType, error) {
+	vt, err := decodeValueTypes(r, 1)
+	if err != nil {
+		return nil, fmt.Errorf("read value type: %w", err)
+	}
+
+	ret := &wasm.GlobalType{
+		ValType: vt[0],
+	}
+
+	b, err := r.ReadByte()
+	if err != nil {
+		return nil, fmt.Errorf("read mutablity: %w", err)
+	}
+
+	switch mut := b; mut {
+	case 0x00: // not mutable
+	case 0x01: // mutable
+		ret.Mutable = true
+	default:
+		return nil, fmt.Errorf("%w for mutability: %#x != 0x00 or 0x01", ErrInvalidByte, mut)
+	}
+	return ret, nil
+}
+
+// encodeGlobal returns the wasm.Global encoded in WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#global-section%E2%91%A0
+func encodeGlobal(g *wasm.Global) (data []byte) {
+	var mutable byte
+	if g.Type.Mutable {
+		mutable = 1
+	}
+	data = []byte{g.Type.ValType, mutable}
+	data = append(data, encodeConstantExpression(g.Init)...)
+	return
+}
@@ -0,0 +1,9 @@
+package binary
+
+// Magic is the 4 byte preamble (literally "\0asm") of the binary format
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-magic
+var Magic = []byte{0x00, 0x61, 0x73, 0x6D}
+
+// version is format version and doesn't change between known specification versions
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-version
+var version = []byte{0x01, 0x00, 0x00, 0x00}
@@ -0,0 +1,78 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wabin/leb128"
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+func decodeImport(
+	r *bytes.Reader,
+	idx uint32,
+	features wasm.CoreFeatures,
+) (i *wasm.Import, err error) {
+	i = &wasm.Import{}
+	if i.Module, _, err = decodeUTF8(r, "import module"); err != nil {
+		return nil, fmt.Errorf("import[%d] error decoding module: %w", idx, err)
+	}
+
+	if i.Name, _, err = decodeUTF8(r, "import name"); err != nil {
+		return nil, fmt.Errorf("import[%d] error decoding name: %w", idx, err)
+	}
+
+	b, err := r.ReadByte()
+	if err != nil {
+		return nil, fmt.Errorf("import[%d] error decoding type: %w", idx, err)
+	}
+	i.Type = b
+	switch i.Type {
+	case wasm.ExternTypeFunc:
+		i.DescFunc, _, err = leb128.DecodeUint32(r)
+	case wasm.ExternTypeTable:
+		i.DescTable, err = decodeTable(r, features)
+	case wasm.ExternTypeMemory:
+		i.DescMem, err = decodeMemory(r)
+	case wasm.ExternTypeGlobal:
+		i.DescGlobal, err = decodeGlobalType(r)
+	default:
+		err = fmt.Errorf("%w: invalid byte for importdesc: %#x", ErrInvalidByte, b)
+	}
+	if err != nil {
+		return nil, fmt.Errorf("import[%d] %s[%s.%s]: %w", idx, wasm.ExternTypeName(i.Type), i.Module, i.Name, err)
+	}
+	return
+}
+
+// encodeImport returns the wasm.Import encoded in WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-import
+func encodeImport(i *wasm.Import) []byte {
+	data := encodeSizePrefixed([]byte(i.Module))
+	data = append(data, encodeSizePrefixed([]byte(i.Name))...)
+	data = append(data, i.Type)
+	switch i.Type {
+	case wasm.ExternTypeFunc:
+		data = append(data, leb128.EncodeUint32(i.DescFunc)...)
+	case wasm.ExternTypeTable:
+		data = append(data, wasm.RefTypeFuncref)
+		data = append(data, encodeLimitsType(i.DescTable.Min, i.DescTable.Max)...)
+	case wasm.ExternTypeMemory:
+		maxPtr := &i.DescMem.Max
+		if !i.DescMem.IsMaxEncoded {
+			maxPtr = nil
+		}
+		data = append(data, encodeLimitsType(i.DescMem.Min, maxPtr)...)
+	case wasm.ExternTypeGlobal:
+		g := i.DescGlobal
+		var mutable byte
+		if g.Mutable {
+			mutable = 1
+		}
+		data = append(data, g.ValType, mutable)
+	default:
+		panic(fmt.Errorf("invalid externtype: %s", wasm.ExternTypeName(i.Type)))
+	}
+	return data
+}
@@ -0,0 +1,52 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wabin/leb128"
+)
+
+// decodeLimitsType returns the `limitsType` (min, max) decoded with the WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#limits%E2%91%A6
+func decodeLimitsType(r *bytes.Reader) (min uint32, max *uint32, err error) {
+	var flag byte
+	if flag, err = r.ReadByte(); err != nil {
+		err = fmt.Errorf("read leading byte: %v", err)
+		return
+	}
+
+	switch flag {
+	case 0x00:
+		min, _, err = leb128.DecodeUint32(r)
+		if err != nil {
+			err = fmt.Errorf("read min of limit: %v", err)
+		}
+	case 0x01:
+		min, _, err = leb128.DecodeUint32(r)
+		if err != nil {
+			err = fmt.Errorf("read min of limit: %v", err)
+			return
+		}
+		var m uint32
+		if m, _, err = leb128.DecodeUint32(r); err != nil {
+			err = fmt.Errorf("read max of limit: %v", err)
+		} else {
+			max = &m
+		}
+	default:
+		err = fmt.Errorf("%v for limits: %#x != 0x00 or 0x01", ErrInvalidByte, flag)
+	}
+	return
+}
+
+// encodeLimitsType returns the `limitsType` (min, max) encoded in WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#limits%E2%91%A6
+func encodeLimitsType(min uint32, max *uint32) []byte {
+	if max == nil {
+		return append(leb128.EncodeUint32(0x00), leb128.EncodeUint32(min)...)
+	}
+	return append(leb128.EncodeUint32(0x01), append(leb128.EncodeUint32(min), leb128.EncodeUint32(*max)...)...)
+}
@@ -0,0 +1,43 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+// decodeMemory returns the wasm.Memory decoded with the WebAssembly
+// Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-memory
+func decodeMemory(r *bytes.Reader) (*wasm.Memory, error) {
+	min, maxP, err := decodeLimitsType(r)
+	if err != nil {
+		return nil, err
+	}
+
+	mem := &wasm.Memory{Min: min}
+	if maxP != nil {
+		mem.Max = *maxP
+		mem.IsMaxEncoded = true
+
+		if min > mem.Max {
+			return nil, fmt.Errorf("min %d pages (%s) > max %d pages (%s)",
+				min, wasm.PagesToUnitOfBytes(min), mem.Max, wasm.PagesToUnitOfBytes(mem.Max))
+		}
+	}
+
+	return mem, nil
+}
+
+// encodeMemory returns the wasm.Memory encoded in WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-memory
+func encodeMemory(i *wasm.Memory) []byte {
+	maxPtr := &i.Max
+	if !i.IsMaxEncoded {
+		maxPtr = nil
+	}
+	return encodeLimitsType(i.Min, maxPtr)
+}
@@ -0,0 +1,228 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+
+	"github.com/tetratelabs/wabin/leb128"
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+const (
+	// subsectionIDModuleName contains only the module name.
+	subsectionIDModuleName = uint8(0)
+	// subsectionIDFunctionNames is a map of indices to function names, in ascending order by function index
+	subsectionIDFunctionNames = uint8(1)
+	// subsectionIDLocalNames contain a map of function indices to a map of local indices to their names, in ascending
+	// order by function and local index
+	subsectionIDLocalNames = uint8(2)
+)
+
+// decodeNameSection deserializes the data associated with the "name" key in SectionIDCustom according to the
+// standard:
+//
+// * ModuleName decode from subsection 0
+// * FunctionNames decode from subsection 1
+// * LocalNames decode from subsection 2
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-namesec
+func decodeNameSection(r *bytes.Reader, limit uint64) (result *wasm.NameSection, err error) {
+	// TODO: add leb128 functions that work on []byte and offset. While using a reader allows us to reuse reader-based
+	// leb128 functions, it is less efficient, causes untestable code and in some cases more complex vs plain []byte.
+	result = &wasm.NameSection{}
+
+	// subsectionID is decoded if known, and skipped if not
+	var subsectionID uint8
+	// subsectionSize is the length to skip when the subsectionID is unknown
+	var subsectionSize uint32
+	var bytesRead uint64
+	for limit > 0 {
+		if subsectionID, err = r.ReadByte(); err != nil {
+			if err == io.EOF {
+				return result, nil
+			}
+			// TODO: untestable as this can't fail for a reason beside EOF reading a byte from a buffer
+			return nil, fmt.Errorf("failed to read a subsection ID: %w", err)
+		}
+		limit--
+
+		if subsectionSize, bytesRead, err = leb128.DecodeUint32(r); err != nil {
+			return nil, fmt.Errorf("failed to read the size of subsection[%d]: %w", subsectionID, err)
+		}
+		limit -= bytesRead
+
+		switch subsectionID {
+		case subsectionIDModuleName:
+			if result.ModuleName, _, err = decodeUTF8(r, "module name"); err != nil {
+				return nil, err
+			}
+		case subsectionIDFunctionNames:
+			if result.FunctionNames, err = decodeFunctionNames(r); err != nil {
+				return nil, err
+			}
+		case subsectionIDLocalNames:
+			if result.LocalNames, err = decodeLocalNames(r); err != nil {
+				return nil, err
+			}
+		default: // Skip other subsections.
+			// Note: Not Seek because it doesn't err when given an offset past EOF. Rather, it leads to undefined state.
+			if _, err = io.CopyN(io.Discard, r, int64(subsectionSize)); err != nil {
+				return nil, fmt.Errorf("failed to skip subsection[%d]: %w", subsectionID, err)
+			}
+		}
+		limit -= uint64(subsectionSize)
+	}
+	return
+}
+
+func decodeFunctionNames(r *bytes.Reader) (wasm.NameMap, error) {
+	functionCount, err := decodeFunctionCount(r, subsectionIDFunctionNames)
+	if err != nil {
+		return nil, err
+	}
+
+	result := make(wasm.NameMap, functionCount)
+	for i := uint32(0); i < functionCount; i++ {
+		functionIndex, err := decodeFunctionIndex(r, subsectionIDFunctionNames)
+		if err != nil {
+			return nil, err
+		}
+
+		name, _, err := decodeUTF8(r, "function[%d] name", functionIndex)
+		if err != nil {
+			return nil, err
+		}
+		result[i] = &wasm.NameAssoc{Index: functionIndex, Name: name}
+	}
+	return result, nil
+}
+
+func decodeLocalNames(r *bytes.Reader) (wasm.IndirectNameMap, error) {
+	functionCount, err := decodeFunctionCount(r, subsectionIDLocalNames)
+	if err != nil {
+		return nil, err
+	}
+
+	result := make(wasm.IndirectNameMap, functionCount)
+	for i := uint32(0); i < functionCount; i++ {
+		functionIndex, err := decodeFunctionIndex(r, subsectionIDLocalNames)
+		if err != nil {
+			return nil, err
+		}
+
+		localCount, _, err := leb128.DecodeUint32(r)
+		if err != nil {
+			return nil, fmt.Errorf("failed to read the local count for function[%d]: %w", functionIndex, err)
+		}
+
+		locals := make(wasm.NameMap, localCount)
+		for j := uint32(0); j < localCount; j++ {
+			localIndex, _, err := leb128.DecodeUint32(r)
+			if err != nil {
+				return nil, fmt.Errorf("failed to read a local index of function[%d]: %w", functionIndex, err)
+			}
+
+			name, _, err := decodeUTF8(r, "function[%d] local[%d] name", functionIndex, localIndex)
+			if err != nil {
+				return nil, err
+			}
+			locals[j] = &wasm.NameAssoc{Index: localIndex, Name: name}
+		}
+		result[i] = &wasm.NameMapAssoc{Index: functionIndex, NameMap: locals}
+	}
+	return result, nil
+}
+
+func decodeFunctionIndex(r *bytes.Reader, subsectionID uint8) (uint32, error) {
+	functionIndex, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return 0, fmt.Errorf("failed to read a function index in subsection[%d]: %w", subsectionID, err)
+	}
+	return functionIndex, nil
+}
+
+func decodeFunctionCount(r *bytes.Reader, subsectionID uint8) (uint32, error) {
+	functionCount, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return 0, fmt.Errorf("failed to read the function count of subsection[%d]: %w", subsectionID, err)
+	}
+	return functionCount, nil
+}
+
+// encodeNameSectionData serializes the data for the "name" key in wasm.SectionIDCustom according to the
+// standard:
+//
+// Note: The result can be nil because this does not encode empty subsections
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-namesec
+func encodeNameSectionData(n *wasm.NameSection) (data []byte) {
+	if n.ModuleName != "" {
+		data = append(data, encodeNameSubsection(subsectionIDModuleName, encodeSizePrefixed([]byte(n.ModuleName)))...)
+	}
+	if fd := encodeFunctionNameData(n); len(fd) > 0 {
+		data = append(data, encodeNameSubsection(subsectionIDFunctionNames, fd)...)
+	}
+	if ld := encodeLocalNameData(n); len(ld) > 0 {
+		data = append(data, encodeNameSubsection(subsectionIDLocalNames, ld)...)
+	}
+	return
+}
+
+// encodeFunctionNameData encodes the data for the function name subsection.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-funcnamesec
+func encodeFunctionNameData(n *wasm.NameSection) []byte {
+	if len(n.FunctionNames) == 0 {
+		return nil
+	}
+
+	return encodeNameMap(n.FunctionNames)
+}
+
+func encodeNameMap(m wasm.NameMap) []byte {
+	count := uint32(len(m))
+	data := leb128.EncodeUint32(count)
+	for _, na := range m {
+		data = append(data, encodeNameAssoc(na)...)
+	}
+	return data
+}
+
+// encodeLocalNameData encodes the data for the local name subsection.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-localnamesec
+func encodeLocalNameData(n *wasm.NameSection) []byte {
+	if len(n.LocalNames) == 0 {
+		return nil
+	}
+
+	funcNameCount := uint32(len(n.LocalNames))
+	subsection := leb128.EncodeUint32(funcNameCount)
+
+	for _, na := range n.LocalNames {
+		locals := encodeNameMap(na.NameMap)
+		subsection = append(subsection, append(leb128.EncodeUint32(na.Index), locals...)...)
+	}
+	return subsection
+}
+
+// encodeNameSubsection returns a buffer encoding the given subsection
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#subsections%E2%91%A0
+func encodeNameSubsection(subsectionID uint8, content []byte) []byte {
+	contentSizeInBytes := leb128.EncodeUint32(uint32(len(content)))
+	result := []byte{subsectionID}
+	result = append(result, contentSizeInBytes...)
+	result = append(result, content...)
+	return result
+}
+
+// encodeNameAssoc encodes the index and data prefixed by their size.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-namemap
+func encodeNameAssoc(na *wasm.NameAssoc) []byte {
+	return append(leb128.EncodeUint32(na.Index), encodeSizePrefixed([]byte(na.Name))...)
+}
+
+// encodeSizePrefixed encodes the data prefixed by their size.
+func encodeSizePrefixed(data []byte) []byte {
+	size := leb128.EncodeUint32(uint32(len(data)))
+	return append(size, data...)
+}
@@ -0,0 +1,342 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+
+	"github.com/tetratelabs/wabin/leb128"
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+func decodeTypeSection(features wasm.CoreFeatures, r *bytes.Reader) ([]*wasm.FunctionType, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]*wasm.FunctionType, vs)
+	for i := uint32(0); i < vs; i++ {
+		if result[i], err = decodeFunctionType(features, r); err != nil {
+			return nil, fmt.Errorf("read %d-th type: %v", i, err)
+		}
+	}
+	return result, nil
+}
+
+func decodeImportSection(r *bytes.Reader, features wasm.CoreFeatures) ([]*wasm.Import, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]*wasm.Import, vs)
+	for i := uint32(0); i < vs; i++ {
+		if result[i], err = decodeImport(r, i, features); err != nil {
+			return nil, err
+		}
+	}
+	return result, nil
+}
+
+func decodeFunctionSection(r *bytes.Reader) ([]uint32, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]uint32, vs)
+	for i := uint32(0); i < vs; i++ {
+		if result[i], _, err = leb128.DecodeUint32(r); err != nil {
+			return nil, fmt.Errorf("get type index: %w", err)
+		}
+	}
+	return result, err
+}
+
+func decodeTableSection(r *bytes.Reader, features wasm.CoreFeatures) ([]*wasm.Table, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("error reading size")
+	}
+	if vs > 1 {
+		if err := features.RequireEnabled(wasm.CoreFeatureReferenceTypes); err != nil {
+			return nil, fmt.Errorf("at most one table allowed in module as %w", err)
+		}
+	}
+
+	ret := make([]*wasm.Table, vs)
+	for i := range ret {
+		table, err := decodeTable(r, features)
+		if err != nil {
+			return nil, err
+		}
+		ret[i] = table
+	}
+	return ret, nil
+}
+
+func decodeMemorySection(r *bytes.Reader) (*wasm.Memory, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("error reading size")
+	}
+	if vs > 1 {
+		return nil, fmt.Errorf("at most one memory allowed in module, but read %d", vs)
+	}
+
+	return decodeMemory(r)
+}
+
+func decodeGlobalSection(r *bytes.Reader, features wasm.CoreFeatures) ([]*wasm.Global, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]*wasm.Global, vs)
+	for i := uint32(0); i < vs; i++ {
+		if result[i], err = decodeGlobal(r, features); err != nil {
+			return nil, fmt.Errorf("global[%d]: %w", i, err)
+		}
+	}
+	return result, nil
+}
+
+func decodeExportSection(r *bytes.Reader) ([]*wasm.Export, error) {
+	vs, _, sizeErr := leb128.DecodeUint32(r)
+	if sizeErr != nil {
+		return nil, fmt.Errorf("get size of vector: %v", sizeErr)
+	}
+
+	usedName := make(map[string]struct{}, vs)
+	exportSection := make([]*wasm.Export, 0, vs)
+	for i := wasm.Index(0); i < vs; i++ {
+		export, err := decodeExport(r)
+		if err != nil {
+			return nil, fmt.Errorf("read export: %w", err)
+		}
+		if _, ok := usedName[export.Name]; ok {
+			return nil, fmt.Errorf("export[%d] duplicates name %q", i, export.Name)
+		} else {
+			usedName[export.Name] = struct{}{}
+		}
+		exportSection = append(exportSection, export)
+	}
+	return exportSection, nil
+}
+
+func decodeStartSection(r *bytes.Reader) (*wasm.Index, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get function index: %w", err)
+	}
+	return &vs, nil
+}
+
+func decodeElementSection(r *bytes.Reader, features wasm.CoreFeatures) ([]*wasm.ElementSegment, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]*wasm.ElementSegment, vs)
+	for i := uint32(0); i < vs; i++ {
+		if result[i], err = decodeElementSegment(r, features); err != nil {
+			return nil, fmt.Errorf("read element: %w", err)
+		}
+	}
+	return result, nil
+}
+
+func decodeCodeSection(r *bytes.Reader) ([]*wasm.Code, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]*wasm.Code, vs)
+	for i := uint32(0); i < vs; i++ {
+		if result[i], err = decodeCode(r); err != nil {
+			return nil, fmt.Errorf("read %d-th code segment: %v", i, err)
+		}
+	}
+	return result, nil
+}
+
+func decodeDataSection(r *bytes.Reader, features wasm.CoreFeatures) ([]*wasm.DataSegment, error) {
+	vs, _, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return nil, fmt.Errorf("get size of vector: %w", err)
+	}
+
+	result := make([]*wasm.DataSegment, vs)
+	for i := uint32(0); i < vs; i++ {
+		if result[i], err = decodeDataSegment(r, features); err != nil {
+			return nil, fmt.Errorf("read data segment: %w", err)
+		}
+	}
+	return result, nil
+}
+
+func decodeDataCountSection(r *bytes.Reader) (count *uint32, err error) {
+	v, _, err := leb128.DecodeUint32(r)
+	if err != nil && err != io.EOF {
+		// data count is optional, so EOF is fine.
+		return nil, err
+	}
+	return &v, nil
+}
+
+// encodeSection encodes the sectionID, the size of its contents in bytes,
+// followed by the contents.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#sections%E2%91%A0
+func encodeSection(sectionID wasm.SectionID, contents []byte) []byte {
+	return append([]byte{sectionID}, encodeSizePrefixed(contents)...)
+}
+
+// encodeTypeSection encodes a wasm.SectionIDType for the given imports in
+// WebAssembly Binary Format.
+//
+// See encodeFunctionType
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#type-section%E2%91%A0
+func encodeTypeSection(types []*wasm.FunctionType) []byte {
+	contents := leb128.EncodeUint32(uint32(len(types)))
+	for _, t := range types {
+		contents = append(contents, encodeFunctionType(t)...)
+	}
+	return encodeSection(wasm.SectionIDType, contents)
+}
+
+// encodeImportSection encodes a wasm.SectionIDImport for the given imports in
+// WebAssembly Binary Format.
+//
+// See encodeImport
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#import-section%E2%91%A0
+func encodeImportSection(imports []*wasm.Import) []byte {
+	contents := leb128.EncodeUint32(uint32(len(imports)))
+	for _, i := range imports {
+		contents = append(contents, encodeImport(i)...)
+	}
+	return encodeSection(wasm.SectionIDImport, contents)
+}
+
+// encodeFunctionSection encodes a wasm.SectionIDFunction for the type indices
+// associated with module-defined functions in WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#function-section%E2%91%A0
+func encodeFunctionSection(typeIndices []wasm.Index) []byte {
+	contents := leb128.EncodeUint32(uint32(len(typeIndices)))
+	for _, index := range typeIndices {
+		contents = append(contents, leb128.EncodeUint32(index)...)
+	}
+	return encodeSection(wasm.SectionIDFunction, contents)
+}
+
+// encodeCodeSection encodes a wasm.SectionIDCode for the module-defined
+// function in WebAssembly Binary Format.
+//
+// See encodeCode
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#code-section%E2%91%A0
+func encodeCodeSection(code []*wasm.Code) []byte {
+	contents := leb128.EncodeUint32(uint32(len(code)))
+	for _, i := range code {
+		contents = append(contents, encodeCode(i)...)
+	}
+	return encodeSection(wasm.SectionIDCode, contents)
+}
+
+// encodeTableSection encodes a wasm.SectionIDTable for the module-defined
+// function in WebAssembly Binary Format.
+//
+// See encodeTable
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#table-section%E2%91%A0
+func encodeTableSection(tables []*wasm.Table) []byte {
+	var contents = leb128.EncodeUint32(uint32(len(tables)))
+	for _, table := range tables {
+		contents = append(contents, encodeTable(table)...)
+	}
+	return encodeSection(wasm.SectionIDTable, contents)
+}
+
+// encodeMemorySection encodes a wasm.SectionIDMemory for the module-defined
+// function in WebAssembly Binary Format.
+//
+// See encodeMemory
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#memory-section%E2%91%A0
+func encodeMemorySection(memory *wasm.Memory) []byte {
+	contents := append([]byte{1}, encodeMemory(memory)...)
+	return encodeSection(wasm.SectionIDMemory, contents)
+}
+
+// encodeGlobalSection encodes a wasm.SectionIDGlobal for the given globals in
+// WebAssembly Binary Format.
+//
+// See encodeGlobal
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#global-section%E2%91%A0
+func encodeGlobalSection(globals []*wasm.Global) []byte {
+	contents := leb128.EncodeUint32(uint32(len(globals)))
+	for _, g := range globals {
+		contents = append(contents, encodeGlobal(g)...)
+	}
+	return encodeSection(wasm.SectionIDGlobal, contents)
+}
+
+// encodeExportSection encodes a wasm.SectionIDExport for the given exports in
+// WebAssembly Binary Format.
+//
+// See encodeExport
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#export-section%E2%91%A0
+func encodeExportSection(exports []*wasm.Export) []byte {
+	contents := leb128.EncodeUint32(uint32(len(exports)))
+	for _, e := range exports {
+		contents = append(contents, encodeExport(e)...)
+	}
+	return encodeSection(wasm.SectionIDExport, contents)
+}
+
+// encodeStartSection encodes a wasm.SectionIDStart for the given function
+// index in WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#start-section%E2%91%A0
+func encodeStartSection(funcidx wasm.Index) []byte {
+	return encodeSection(wasm.SectionIDStart, leb128.EncodeUint32(funcidx))
+}
+
+// encodeElementSection encodes a wasm.SectionIDElement for the elements in
+// WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#element-section%E2%91%A0
+func encodeElementSection(elements []*wasm.ElementSegment) []byte {
+	contents := leb128.EncodeUint32(uint32(len(elements)))
+	for _, e := range elements {
+		contents = append(contents, encodeElement(e)...)
+	}
+	return encodeSection(wasm.SectionIDElement, contents)
+}
+
+// encodeDataSection encodes a wasm.SectionIDData for the data in WebAssembly 1.0 (20191205)
+// Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#data-section%E2%91%A0
+func encodeDataSection(datum []*wasm.DataSegment) []byte {
+	contents := leb128.EncodeUint32(uint32(len(datum)))
+	for _, d := range datum {
+		contents = append(contents, encodeDataSegment(d)...)
+	}
+	return encodeSection(wasm.SectionIDData, contents)
+}
+
+// encodeCustomSection encodes a wasm.SectionIDCustom for the data in WebAssembly 1.0 (20191205)
+// Binary Format. This is used for custom sections that are **not** associated with the "name" key.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#custom-section%E2%91%A0
+func encodeCustomSection(c *wasm.CustomSection) (data []byte) {
+	data = make([]byte, 0, 1+len(c.Name)+len(c.Data))
+	l := byte(len(c.Name))
+	data = append(data, l)
+	data = append(data, []byte(c.Name)...)
+	data = append(data, c.Data...)
+	return
+}
@@ -0,0 +1,45 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+// decodeTable returns the wasm.Table decoded with the WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-table
+func decodeTable(r *bytes.Reader, features wasm.CoreFeatures) (*wasm.Table, error) {
+	tableType, err := r.ReadByte()
+	if err != nil {
+		return nil, fmt.Errorf("read leading byte: %v", err)
+	}
+
+	if tableType != wasm.RefTypeFuncref {
+		if err := features.RequireEnabled(wasm.CoreFeatureReferenceTypes); err != nil {
+			return nil, fmt.Errorf("table type funcref is invalid: %w", err)
+		}
+	}
+
+	min, max, err := decodeLimitsType(r)
+	if err != nil {
+		return nil, fmt.Errorf("read limits: %v", err)
+	}
+	if min > wasm.MaximumFunctionIndex {
+		return nil, fmt.Errorf("table min must be at most %d", wasm.MaximumFunctionIndex)
+	}
+	if max != nil {
+		if *max < min {
+			return nil, fmt.Errorf("table size minimum must not be greater than maximum")
+		}
+	}
+	return &wasm.Table{Min: min, Max: max, Type: tableType}, nil
+}
+
+// encodeTable returns the wasm.Table encoded in WebAssembly Binary Format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-table
+func encodeTable(i *wasm.Table) []byte {
+	return append([]byte{i.Type}, encodeLimitsType(i.Min, i.Max)...)
+}
@@ -0,0 +1,89 @@
+package binary
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"unicode/utf8"
+
+	"github.com/tetratelabs/wabin/leb128"
+	"github.com/tetratelabs/wabin/wasm"
+)
+
+var noValType = []byte{0}
+
+// encodedValTypes is a cache of size prefixed binary encoding of known val types.
+var encodedValTypes = map[wasm.ValueType][]byte{
+	wasm.ValueTypeI32:       {1, wasm.ValueTypeI32},
+	wasm.ValueTypeI64:       {1, wasm.ValueTypeI64},
+	wasm.ValueTypeF32:       {1, wasm.ValueTypeF32},
+	wasm.ValueTypeF64:       {1, wasm.ValueTypeF64},
+	wasm.ValueTypeExternref: {1, wasm.ValueTypeExternref},
+	wasm.ValueTypeFuncref:   {1, wasm.ValueTypeFuncref},
+	wasm.ValueTypeV128:      {1, wasm.ValueTypeV128},
+}
+
+// encodeValTypes fast paths binary encoding of common value type lengths
+func encodeValTypes(vt []wasm.ValueType) []byte {
+	// Special case nullary and parameter lengths of wasi_snapshot_preview1 to avoid excess allocations
+	switch uint32(len(vt)) {
+	case 0: // nullary
+		return noValType
+	case 1: // ex $wasi.fd_close or any result
+		if encoded, ok := encodedValTypes[vt[0]]; ok {
+			return encoded
+		}
+	case 2: // ex $wasi.environ_sizes_get
+		return []byte{2, vt[0], vt[1]}
+	case 4: // ex $wasi.fd_write
+		return []byte{4, vt[0], vt[1], vt[2], vt[3]}
+	case 9: // ex $wasi.fd_write
+		return []byte{9, vt[0], vt[1], vt[2], vt[3], vt[4], vt[5], vt[6], vt[7], vt[8]}
+	}
+	// Slow path others until someone complains with a valid signature
+	count := leb128.EncodeUint32(uint32(len(vt)))
+	return append(count, vt...)
+}
+
+func decodeValueTypes(r *bytes.Reader, num uint32) ([]wasm.ValueType, error) {
+	if num == 0 {
+		return nil, nil
+	}
+	ret := make([]wasm.ValueType, num)
+	buf := make([]wasm.ValueType, num)
+	_, err := io.ReadFull(r, buf)
+	if err != nil {
+		return nil, err
+	}
+
+	for i, v := range buf {
+		switch v {
+		case wasm.ValueTypeI32, wasm.ValueTypeF32, wasm.ValueTypeI64, wasm.ValueTypeF64,
+			wasm.ValueTypeExternref, wasm.ValueTypeFuncref, wasm.ValueTypeV128:
+			ret[i] = v
+		default:
+			return nil, fmt.Errorf("invalid value type: %d", v)
+		}
+	}
+	return ret, nil
+}
+
+// decodeUTF8 decodes a size prefixed string from the reader, returning it and the count of bytes read.
+// contextFormat and contextArgs apply an error format when present
+func decodeUTF8(r *bytes.Reader, contextFormat string, contextArgs ...interface{}) (string, uint32, error) {
+	size, sizeOfSize, err := leb128.DecodeUint32(r)
+	if err != nil {
+		return "", 0, fmt.Errorf("failed to read %s size: %w", fmt.Sprintf(contextFormat, contextArgs...), err)
+	}
+
+	buf := make([]byte, size)
+	if _, err = io.ReadFull(r, buf); err != nil {
+		return "", 0, fmt.Errorf("failed to read %s: %w", fmt.Sprintf(contextFormat, contextArgs...), err)
+	}
+
+	if !utf8.Valid(buf) {
+		return "", 0, fmt.Errorf("%s is not valid UTF-8", fmt.Sprintf(contextFormat, contextArgs...))
+	}
+
+	return string(buf), size + uint32(sizeOfSize), nil
+}
@@ -0,0 +1,31 @@
+package ieee754
+
+import (
+	"encoding/binary"
+	"io"
+	"math"
+)
+
+// DecodeFloat32 decodes a float32 in IEEE 754 binary representation.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#floating-point%E2%91%A2
+func DecodeFloat32(r io.Reader) (float32, error) {
+	buf := make([]byte, 4)
+	_, err := io.ReadFull(r, buf)
+	if err != nil {
+		return 0, err
+	}
+	raw := binary.LittleEndian.Uint32(buf)
+	return math.Float32frombits(raw), nil
+}
+
+// DecodeFloat64 decodes a float64 in IEEE 754 binary representation.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#floating-point%E2%91%A2
+func DecodeFloat64(r io.Reader) (float64, error) {
+	buf := make([]byte, 8)
+	_, err := io.ReadFull(r, buf)
+	if err != nil {
+		return 0, err
+	}
+	raw := binary.LittleEndian.Uint64(buf)
+	return math.Float64frombits(raw), nil
+}
@@ -0,0 +1,240 @@
+package leb128
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+)
+
+const (
+	maxVarintLen32 = 5
+	maxVarintLen64 = 10
+)
+
+var (
+	errOverflow32 = errors.New("overflows a 32-bit integer")
+	errOverflow33 = errors.New("overflows a 33-bit integer")
+	errOverflow64 = errors.New("overflows a 64-bit integer")
+)
+
+// EncodeInt32 encodes the signed value into a buffer in LEB128 format
+//
+// See https://en.wikipedia.org/wiki/LEB128#Encode_signed_integer
+func EncodeInt32(value int32) []byte {
+	return EncodeInt64(int64(value))
+}
+
+// EncodeInt64 encodes the signed value into a buffer in LEB128 format
+//
+// See https://en.wikipedia.org/wiki/LEB128#Encode_signed_integer
+func EncodeInt64(value int64) (buf []byte) {
+	for {
+		// Take 7 remaining low-order bits from the value into b.
+		b := uint8(value & 0x7f)
+		// Extract the sign bit.
+		s := uint8(value & 0x40)
+		value >>= 7
+
+		// The encoding unsigned numbers is simpler as it only needs to check if the value is non-zero to tell if there
+		// are more bits to encode. Signed is a little more complicated as you have to double-check the sign bit.
+		// If either case, set the high-order bit to tell the reader there are more bytes in this int.
+		if (value != -1 || s == 0) && (value != 0 || s != 0) {
+			b |= 0x80
+		}
+
+		// Append b into the buffer
+		buf = append(buf, b)
+		if b&0x80 == 0 {
+			break
+		}
+	}
+	return buf
+}
+
+// EncodeUint32 encodes the value into a buffer in LEB128 format
+//
+// See https://en.wikipedia.org/wiki/LEB128#Encode_unsigned_integer
+func EncodeUint32(value uint32) []byte {
+	return EncodeUint64(uint64(value))
+}
+
+// EncodeUint64 encodes the value into a buffer in LEB128 format
+//
+// See https://en.wikipedia.org/wiki/LEB128#Encode_unsigned_integer
+func EncodeUint64(value uint64) (buf []byte) {
+	// This is effectively a do/while loop where we take 7 bits of the value and encode them until it is zero.
+	for {
+		// Take 7 remaining low-order bits from the value into b.
+		b := uint8(value & 0x7f)
+		value = value >> 7
+
+		// If there are remaining bits, the value won't be zero: Set the high
+		// order bit to tell the reader there are more bytes in this uint.
+		if value != 0 {
+			b |= 0x80
+		}
+
+		// Append b into the buffer
+		buf = append(buf, b)
+		if b&0x80 == 0 {
+			return buf
+		}
+	}
+}
+
+func DecodeUint32(r *bytes.Reader) (ret uint32, bytesRead uint64, err error) {
+	// Derived from https://github.com/golang/go/blob/aafad20b617ee63d58fcd4f6e0d98fe27760678c/src/encoding/binary/varint.go
+	// with the modification on the overflow handling tailored for 32-bits.
+	var s uint32
+	for i := 0; i < maxVarintLen32; i++ {
+		b, err := r.ReadByte()
+		if err != nil {
+			return 0, 0, err
+		}
+		if b < 0x80 {
+			// Unused bits must be all zero.
+			if i == maxVarintLen32-1 && (b&0xf0) > 0 {
+				return 0, 0, errOverflow32
+			}
+			return ret | uint32(b)<<s, uint64(i) + 1, nil
+		}
+		ret |= (uint32(b) & 0x7f) << s
+		s += 7
+	}
+	return 0, 0, errOverflow32
+}
+
+func DecodeUint64(r *bytes.Reader) (ret uint64, bytesRead uint64, err error) {
+	// Derived from https://github.com/golang/go/blob/aafad20b617ee63d58fcd4f6e0d98fe27760678c/src/encoding/binary/varint.go
+	var s uint64
+	for i := 0; i < maxVarintLen64; i++ {
+		b, err := r.ReadByte()
+		if err != nil {
+			return 0, 0, err
+		}
+		if b < 0x80 {
+			// Unused bits (non first bit) must all be zero.
+			if i == maxVarintLen64-1 && b > 1 {
+				return 0, 0, errOverflow64
+			}
+			return ret | uint64(b)<<s, uint64(i) + 1, nil
+		}
+		ret |= (uint64(b) & 0x7f) << s
+		s += 7
+	}
+	return 0, 0, errOverflow64
+}
+
+func DecodeInt32(r *bytes.Reader) (ret int32, bytesRead uint64, err error) {
+	var shift int
+	var b byte
+	for {
+		b, err = r.ReadByte()
+		if err != nil {
+			return 0, 0, fmt.Errorf("readByte failed: %w", err)
+		}
+		ret |= (int32(b) & 0x7f) << shift
+		shift += 7
+		bytesRead++
+		if b&0x80 == 0 {
+			if shift < 32 && (b&0x40) != 0 {
+				ret |= ^0 << shift
+			}
+			// Over flow checks.
+			// fixme: can be optimized.
+			if bytesRead > 5 {
+				return 0, 0, errOverflow32
+			} else if unused := b & 0b00110000; bytesRead == 5 && ret < 0 && unused != 0b00110000 {
+				return 0, 0, errOverflow32
+			} else if bytesRead == 5 && ret >= 0 && unused != 0x00 {
+				return 0, 0, errOverflow32
+			}
+			return
+		}
+	}
+}
+
+// DecodeInt33AsInt64 is a special cased decoder for wasm.BlockType which is encoded as a positive signed integer, yet
+// still needs to fit the 32-bit range of allowed indices. Hence, this is 33, not 32-bit!
+//
+// See https://webassembly.github.io/spec/core/binary/instructions.html#control-instructions
+func DecodeInt33AsInt64(r *bytes.Reader) (ret int64, bytesRead uint64, err error) {
+	const (
+		int33Mask  int64 = 1 << 7
+		int33Mask2       = ^int33Mask
+		int33Mask3       = 1 << 6
+		int33Mask4       = 8589934591 // 2^33-1
+		int33Mask5       = 1 << 32
+		int33Mask6       = int33Mask4 + 1 // 2^33
+	)
+	var shift int
+	var b int64
+	var rb byte
+	for shift < 35 {
+		rb, err = r.ReadByte()
+		if err != nil {
+			return 0, 0, fmt.Errorf("readByte failed: %w", err)
+		}
+		b = int64(rb)
+		ret |= (b & int33Mask2) << shift
+		shift += 7
+		bytesRead++
+		if b&int33Mask == 0 {
+			break
+		}
+	}
+
+	// fixme: can be optimized
+	if shift < 33 && (b&int33Mask3) == int33Mask3 {
+		ret |= int33Mask4 << shift
+	}
+	ret = ret & int33Mask4
+
+	// if 33rd bit == 1, we translate it as a corresponding signed-33bit minus value
+	if ret&int33Mask5 > 0 {
+		ret = ret - int33Mask6
+	}
+	// Over flow checks.
+	// fixme: can be optimized.
+	if bytesRead > 5 {
+		return 0, 0, errOverflow33
+	} else if unused := b & 0b00100000; bytesRead == 5 && ret < 0 && unused != 0b00100000 {
+		return 0, 0, errOverflow33
+	} else if bytesRead == 5 && ret >= 0 && unused != 0x00 {
+		return 0, 0, errOverflow33
+	}
+	return ret, bytesRead, nil
+}
+
+func DecodeInt64(r *bytes.Reader) (ret int64, bytesRead uint64, err error) {
+	const (
+		int64Mask3 = 1 << 6
+		int64Mask4 = ^0
+	)
+	var shift int
+	var b byte
+	for {
+		b, err = r.ReadByte()
+		if err != nil {
+			return 0, 0, fmt.Errorf("readByte failed: %w", err)
+		}
+		ret |= (int64(b) & 0x7f) << shift
+		shift += 7
+		bytesRead++
+		if b&0x80 == 0 {
+			if shift < 64 && (b&int64Mask3) == int64Mask3 {
+				ret |= int64Mask4 << shift
+			}
+			// Over flow checks.
+			// fixme: can be optimized.
+			if bytesRead > 10 {
+				return 0, 0, errOverflow64
+			} else if unused := b & 0b00111110; bytesRead == 10 && ret < 0 && unused != 0b00111110 {
+				return 0, 0, errOverflow64
+			} else if bytesRead == 10 && ret >= 0 && unused != 0x00 {
+				return 0, 0, errOverflow64
+			}
+			return
+		}
+	}
+}
@@ -0,0 +1,86 @@
+package wasm
+
+import "fmt"
+
+// ImportFuncCount returns the possibly empty count of imported functions. This plus SectionElementCount of
+// SectionIDFunction is the size of the function index namespace.
+func (m *Module) ImportFuncCount() uint32 {
+	return m.importCount(ExternTypeFunc)
+}
+
+// ImportTableCount returns the possibly empty count of imported tables. This plus SectionElementCount of SectionIDTable
+// is the size of the table index namespace.
+func (m *Module) ImportTableCount() uint32 {
+	return m.importCount(ExternTypeTable)
+}
+
+// ImportMemoryCount returns the possibly empty count of imported memories. This plus SectionElementCount of
+// SectionIDMemory is the size of the memory index namespace.
+func (m *Module) ImportMemoryCount() uint32 {
+	return m.importCount(ExternTypeMemory) // TODO: once validation happens on decode, this is zero or one.
+}
+
+// ImportGlobalCount returns the possibly empty count of imported globals. This plus SectionElementCount of
+// SectionIDGlobal is the size of the global index namespace.
+func (m *Module) ImportGlobalCount() uint32 {
+	return m.importCount(ExternTypeGlobal)
+}
+
+// importCount returns the count of a specific type of import. This is important because it is easy to mistake the
+// length of the import section with the count of a specific kind of import.
+func (m *Module) importCount(et ExternType) (res uint32) {
+	for _, im := range m.ImportSection {
+		if im.Type == et {
+			res++
+		}
+	}
+	return
+}
+
+// SectionElementCount returns the count of elements in a given section ID
+//
+// For example...
+// * SectionIDType returns the count of FunctionType
+// * SectionIDCustom returns one if the NameSection is present
+// * SectionIDHostFunction returns the count of HostFunctionSection
+// * SectionIDExport returns the count of unique export names
+func (m *Module) SectionElementCount(sectionID SectionID) uint32 { // element as in vector elements!
+	switch sectionID {
+	case SectionIDCustom:
+		numCustomSections := uint32(len(m.CustomSections))
+		if m.NameSection != nil {
+			numCustomSections++
+		}
+		return numCustomSections
+	case SectionIDType:
+		return uint32(len(m.TypeSection))
+	case SectionIDImport:
+		return uint32(len(m.ImportSection))
+	case SectionIDFunction:
+		return uint32(len(m.FunctionSection))
+	case SectionIDTable:
+		return uint32(len(m.TableSection))
+	case SectionIDMemory:
+		if m.MemorySection != nil {
+			return 1
+		}
+		return 0
+	case SectionIDGlobal:
+		return uint32(len(m.GlobalSection))
+	case SectionIDExport:
+		return uint32(len(m.ExportSection))
+	case SectionIDStart:
+		if m.StartSection != nil {
+			return 1
+		}
+		return 0
+	case SectionIDElement:
+		return uint32(len(m.ElementSection))
+	case SectionIDCode:
+		return uint32(len(m.CodeSection))
+	case SectionIDData:
+		return uint32(len(m.DataSection))
+	default:
+		panic(fmt.Errorf("BUG: unknown section: %d", sectionID))
+	}
+}
@@ -0,0 +1,212 @@
+package wasm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// CoreFeatures is a bit flag of WebAssembly Core specification features. See
+// https://github.com/WebAssembly/proposals for proposals and their status.
+//
+// Constants define individual features, such as CoreFeatureMultiValue, or
+// groups of "finished" features, assigned to a WebAssembly Core Specification
+// version, ex. CoreFeaturesV1 or CoreFeaturesV2.
+//
+// Note: Numeric values are not intended to be interpreted except as bit flags.
+type CoreFeatures uint64
+
+// CoreFeaturesV1 are features included in the WebAssembly Core Specification
+// 1.0. As of late 2022, this is the only version that is a Web Standard (W3C
+// Recommendation).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/
+const CoreFeaturesV1 = CoreFeatureMutableGlobal
+
+// CoreFeaturesV2 are features included in the WebAssembly Core Specification
+// 2.0 (20220419). As of late 2022, version 2.0 is a W3C working draft, not yet
+// a Web Standard (W3C Recommendation).
+//
+// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#release-1-1
+const CoreFeaturesV2 = CoreFeaturesV1 |
+	CoreFeatureBulkMemoryOperations |
+	CoreFeatureMultiValue |
+	CoreFeatureNonTrappingFloatToIntConversion |
+	CoreFeatureReferenceTypes |
+	CoreFeatureSignExtensionOps |
+	CoreFeatureSIMD
+
+const (
+	// CoreFeatureBulkMemoryOperations adds instructions modify ranges of
+	// memory or table entries ("bulk-memory-operations"). This is included in
+	// CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Here are the notable effects:
+	//   - Adds `memory.fill`, `memory.init`, `memory.copy` and `data.drop`
+	//     instructions.
+	//   - Adds `table.init`, `table.copy` and `elem.drop` instructions.
+	//   - Introduces a "passive" form of element and data segments.
+	//   - Stops checking "active" element and data segment boundaries at
+	//     compile-time, meaning they can error at runtime.
+	//
+	// Note: "bulk-memory-operations" is mixed with the "reference-types"
+	// proposal due to the WebAssembly Working Group merging them
+	// "mutually dependent". Therefore, enabling this feature requires enabling
+	// CoreFeatureReferenceTypes, and vice-versa.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md and
+	// https://github.com/WebAssembly/spec/pull/1287
+	CoreFeatureBulkMemoryOperations CoreFeatures = 1 << iota
+
+	// CoreFeatureMultiValue enables multiple values ("multi-value"). This is
+	// included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Here are the notable effects:
+	//   - Function (`func`) types allow more than one result.
+	//   - Block types (`block`, `loop` and `if`) can be arbitrary function
+	//     types.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/multi-value/Overview.md
+	CoreFeatureMultiValue
+
+	// CoreFeatureMutableGlobal allows globals to be mutable. This is included
+	// in both CoreFeaturesV1 and CoreFeaturesV2.
+	//
+	// When false, an api.Global can never be cast to an api.MutableGlobal, and
+	// any wasm that includes global vars will fail to parse.
+	CoreFeatureMutableGlobal
+
+	// CoreFeatureNonTrappingFloatToIntConversion enables non-trapping
+	// float-to-int conversions ("nontrapping-float-to-int-conversion"). This
+	// is included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// The only effect of enabling is allowing the following instructions,
+	// which return 0 on NaN instead of panicking.
+	//   - `i32.trunc_sat_f32_s`
+	//   - `i32.trunc_sat_f32_u`
+	//   - `i32.trunc_sat_f64_s`
+	//   - `i32.trunc_sat_f64_u`
+	//   - `i64.trunc_sat_f32_s`
+	//   - `i64.trunc_sat_f32_u`
+	//   - `i64.trunc_sat_f64_s`
+	//   - `i64.trunc_sat_f64_u`
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/nontrapping-float-to-int-conversion/Overview.md
+	CoreFeatureNonTrappingFloatToIntConversion
+
+	// CoreFeatureReferenceTypes enables various instructions and features
+	// related to table and new reference types. This is included in
+	// CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	//   - Introduction of new value types: `funcref` and `externref`.
+	//   - Support for the following new instructions:
+	//     - `ref.null`
+	//     - `ref.func`
+	//     - `ref.is_null`
+	//     - `table.fill`
+	//     - `table.get`
+	//     - `table.grow`
+	//     - `table.set`
+	//     - `table.size`
+	//   - Support for multiple tables per module:
+	//     - `call_indirect`, `table.init`, `table.copy` and `elem.drop`
+	//   - Support for instructions can take non-zero table index.
+	//     - Element segments can take non-zero table index.
+	//
+	// Note: "reference-types" is mixed with the "bulk-memory-operations"
+	// proposal due to the WebAssembly Working Group merging them
+	// "mutually dependent". Therefore, enabling this feature requires enabling
+	// CoreFeatureBulkMemoryOperations, and vice-versa.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md and
+	// https://github.com/WebAssembly/spec/pull/1287
+	CoreFeatureReferenceTypes
+
+	// CoreFeatureSignExtensionOps enables sign extension instructions
+	// ("sign-extension-ops"). This is included in CoreFeaturesV2, but not
+	// CoreFeaturesV1.
+	//
+	// Adds instructions:
+	//   - `i32.extend8_s`
+	//   - `i32.extend16_s`
+	//   - `i64.extend8_s`
+	//   - `i64.extend16_s`
+	//   - `i64.extend32_s`
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/sign-extension-ops/Overview.md
+	CoreFeatureSignExtensionOps
+
+	// CoreFeatureSIMD enables the vector value type and vector instructions
+	// (aka SIMD). This is included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Note: The instruction list is too long to enumerate in godoc.
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md
+	CoreFeatureSIMD
+)
+
+// SetEnabled enables or disables the feature or group of features.
+func (f CoreFeatures) SetEnabled(feature CoreFeatures, val bool) CoreFeatures {
+	if val {
+		return f | feature
+	}
+	return f &^ feature
+}
+
+// IsEnabled returns true if the feature (or group of features) is enabled.
+func (f CoreFeatures) IsEnabled(feature CoreFeatures) bool {
+	return f&feature != 0
+}
+
+// RequireEnabled returns an error if the feature (or group of features) is not
+// enabled.
+func (f CoreFeatures) RequireEnabled(feature CoreFeatures) error {
+	if f&feature == 0 {
+		return fmt.Errorf("feature %q is disabled", feature)
+	}
+	return nil
+}
+
+// String implements fmt.Stringer by returning each enabled feature.
+func (f CoreFeatures) String() string {
+	var builder strings.Builder
+	for i := 0; i <= 63; i++ { // cycle through all bits to reduce code and maintenance
+		target := CoreFeatures(1 << i)
+		if f.IsEnabled(target) {
+			if name := featureName(target); name != "" {
+				if builder.Len() > 0 {
+					builder.WriteByte('|')
+				}
+				builder.WriteString(name)
+			}
+		}
+	}
+	return builder.String()
+}
+
+func featureName(f CoreFeatures) string {
+	switch f {
+	case CoreFeatureMutableGlobal:
+		// match https://github.com/WebAssembly/mutable-global
+		return "mutable-global"
+	case CoreFeatureSignExtensionOps:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/sign-extension-ops/Overview.md
+		return "sign-extension-ops"
+	case CoreFeatureMultiValue:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/multi-value/Overview.md
+		return "multi-value"
+	case CoreFeatureNonTrappingFloatToIntConversion:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/nontrapping-float-to-int-conversion/Overview.md
+		return "nontrapping-float-to-int-conversion"
+	case CoreFeatureBulkMemoryOperations:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+		return "bulk-memory-operations"
+	case CoreFeatureReferenceTypes:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md
+		return "reference-types"
+	case CoreFeatureSIMD:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md
+		return "simd"
+	}
+	return ""
+}
@@ -0,0 +1,45 @@
+package wasm
+
+import (
+	"fmt"
+)
+
+const (
+	// MemoryPageSize is the unit of memory length in WebAssembly,
+	// and is defined as 2^16 = 65536.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#memory-instances%E2%91%A0
+	MemoryPageSize = uint32(65536)
+	// MemoryPageSizeInBits satisfies the relation: "1 << MemoryPageSizeInBits == MemoryPageSize".
+	MemoryPageSizeInBits = 16
+)
+
+// MemoryPagesToBytesNum converts the given pages into the number of bytes contained in these pages.
+func MemoryPagesToBytesNum(pages uint32) (bytesNum uint64) {
+	return uint64(pages) << MemoryPageSizeInBits
+}
+
+// PagesToUnitOfBytes converts the pages to a human-readable form similar to what's specified. Ex. 1 -> "64Ki"
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#memory-instances%E2%91%A0
+func PagesToUnitOfBytes(pages uint32) string {
+	k := pages * 64
+	if k < 1024 {
+		return fmt.Sprintf("%d Ki", k)
+	}
+	m := k / 1024
+	if m < 1024 {
+		return fmt.Sprintf("%d Mi", m)
+	}
+	g := m / 1024
+	if g < 1024 {
+		return fmt.Sprintf("%d Gi", g)
+	}
+	return fmt.Sprintf("%d Ti", g/1024)
+}
+
+// Below are raw functions used to implement the api.Memory API:
+
+// memoryBytesNumToPages converts the given number of bytes into the number of pages.
+func memoryBytesNumToPages(bytesNum uint64) (pages uint32) {
+	return uint32(bytesNum >> MemoryPageSizeInBits)
+}
@@ -0,0 +1,496 @@
+package wasm
+
+import (
+	"bytes"
+	"fmt"
+)
+
+// DecodeModule parses the WebAssembly Binary Format (%.wasm) into a Module. This function returns when the input is
+// exhausted or an error occurs. The result can be initialized for use via Store.Instantiate.
+//
+// Here's a description of the return values:
+// * result is the module parsed or nil on error
+// * err is a FormatError invoking the parser, dangling block comments or unexpected characters.
+// See binary.DecodeModule and text.DecodeModule
+type DecodeModule func(
+	wasm []byte,
+	features CoreFeatures,
+	memorySizer func(minPages uint32, maxPages *uint32) (min, capacity, max uint32),
+) (result *Module, err error)
+
+// EncodeModule encodes the given module into a byte slice depending on the format of the implementation.
+// See binary.EncodeModule
+type EncodeModule func(m *Module) (bytes []byte)
+
+// The wazero specific limitation described at RATIONALE.md.
+// TL;DR; We multiply by 8 (to get offsets in bytes) and the multiplication result must be less than 32bit max
+const (
+	MaximumGlobals       = uint32(1 << 27)
+	MaximumFunctionIndex = uint32(1 << 27)
+	MaximumTableIndex    = uint32(1 << 27)
+)
+
+// Module is a WebAssembly binary representation.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#modules%E2%91%A8
+//
+// Differences from the specification:
+// * NameSection is the only key ("name") decoded from the SectionIDCustom.
+// * ExportSection is represented as a map for lookup convenience.
+// * Code.GoFunc is contains any go `func`. It may be present when Code.Body is not.
+type Module struct {
+	// TypeSection contains the unique FunctionType of functions imported or defined in this module.
+	//
+	// Note: Currently, there is no type ambiguity in the index as WebAssembly 1.0 only defines function type.
+	// In the future, other types may be introduced to support CoreFeatures such as module linking.
+	//
+	// Note: In the Binary Format, this is SectionIDType.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#types%E2%91%A0%E2%91%A0
+	TypeSection []*FunctionType
+
+	// ImportSection contains imported functions, tables, memories or globals required for instantiation
+	// (Store.Instantiate).
+	//
+	// Note: there are no unique constraints relating to the two-level namespace of Import.Module and Import.Name.
+	//
+	// Note: In the Binary Format, this is SectionIDImport.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#import-section%E2%91%A0
+	ImportSection []*Import
+
+	// FunctionSection contains the index in TypeSection of each function defined in this module.
+	//
+	// Note: The function Index namespace begins with imported functions and ends with those defined in this module.
+	// For example, if there are two imported functions and one defined in this module, the function Index 3 is defined
+	// in this module at FunctionSection[0].
+	//
+	// Note: FunctionSection is index correlated with the CodeSection. If given the same position, ex. 2, a function
+	// type is at TypeSection[FunctionSection[2]], while its locals and body are at CodeSection[2].
+	//
+	// Note: In the Binary Format, this is SectionIDFunction.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#function-section%E2%91%A0
+	FunctionSection []Index
+
+	// TableSection contains each table defined in this module.
+	//
+	// Note: The table Index namespace begins with imported tables and ends with those defined in this module.
+	// For example, if there are two imported tables and one defined in this module, the table Index 3 is defined in
+	// this module at TableSection[0].
+	//
+	// Note: Version of the WebAssembly spec allows at most one table definition per module, so the
+	// length of the TableSection can be zero or one, and can only be one if there is no imported table.
+	//
+	// Note: In the Binary Format, this is SectionIDTable.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#table-section%E2%91%A0
+	TableSection []*Table
+
+	// MemorySection contains each memory defined in this module.
+	//
+	// Note: The memory Index namespace begins with imported memories and ends with those defined in this module.
+	// For example, if there are two imported memories and one defined in this module, the memory Index 3 is defined in
+	// this module at TableSection[0].
+	//
+	// Note: Version of the WebAssembly spec allows at most one memory definition per module, so the
+	// length of the MemorySection can be zero or one, and can only be one if there is no imported memory.
+	//
+	// Note: In the Binary Format, this is SectionIDMemory.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#memory-section%E2%91%A0
+	MemorySection *Memory
+
+	// GlobalSection contains each global defined in this module.
+	//
+	// Global indexes are offset by any imported globals because the global index space begins with imports, followed by
+	// ones defined in this module. For example, if there are two imported globals and three defined in this module, the
+	// global at index 3 is defined in this module at GlobalSection[0].
+	//
+	// Note: In the Binary Format, this is SectionIDGlobal.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#global-section%E2%91%A0
+	GlobalSection []*Global
+
+	// ExportSection contains each export defined in this module.
+	//
+	// Note: In the Binary Format, this is SectionIDExport.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A0
+	ExportSection []*Export
+
+	// StartSection is the index of a function to call before returning from Store.Instantiate.
+	//
+	// Note: The index here is not the position in the FunctionSection, rather in the function index namespace, which
+	// begins with imported functions.
+	//
+	// Note: In the Binary Format, this is SectionIDStart.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#start-section%E2%91%A0
+	StartSection *Index
+
+	// Note: In the Binary Format, this is SectionIDElement.
+	ElementSection []*ElementSegment
+
+	// CodeSection is index-correlated with FunctionSection and contains each
+	// function's locals and body.
+	//
+	// When present, the HostFunctionSection of the same index must be nil.
+	//
+	// Note: In the Binary Format, this is SectionIDCode.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#code-section%E2%91%A0
+	CodeSection []*Code
+
+	// Note: In the Binary Format, this is SectionIDData.
+	DataSection []*DataSegment
+
+	// DataCountSection is the optional section and holds the number of data segments in the data section.
+	//
+	// Note: This may exist in WebAssembly 2.0 or WebAssembly 1.0 with FeatureBulkMemoryOperations.
+	// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#data-count-section
+	// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions
+	DataCountSection *uint32
+
+	// NameSection is set when the SectionIDCustom "name" was successfully decoded from the binary format.
+	//
+	// Note: This is the only SectionIDCustom defined in the WebAssembly Binary Format.
+	// Others are read into CustomSections.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#name-section%E2%91%A0
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#custom-section%E2%91%A0
+	NameSection *NameSection
+
+	// CustomSections are set when the SectionIDCustom other than "name" were successfully decoded from the binary format.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#custom-section%E2%91%A0
+	CustomSections []*CustomSection
+}
+
+// Index is the offset in an index namespace, not necessarily an absolute position in a Module section. This is because
+// index namespaces are often preceded by a corresponding type in the Module.ImportSection.
+//
+// For example, the function index namespace starts with any ExternTypeFunc in the Module.ImportSection followed by
+// the Module.FunctionSection
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-index
+type Index = uint32
+
+// FunctionType is a possibly empty function signature.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#function-types%E2%91%A0
+type FunctionType struct {
+	// Params are the possibly empty sequence of value types accepted by a function with this signature.
+	Params []ValueType
+
+	// Results are the possibly empty sequence of value types returned by a function with this signature.
+	//
+	// Note: In WebAssembly 1.0 (20191205), there can be at most one result.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#result-types%E2%91%A0
+	Results []ValueType
+}
+
+// EqualsSignature returns true if the function type has the same parameters and results.
+func (f *FunctionType) EqualsSignature(params []ValueType, results []ValueType) bool {
+	return bytes.Equal(f.Params, params) && bytes.Equal(f.Results, results)
+}
+
+// Import is the binary representation of an import indicated by Type
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-import
+type Import struct {
+	Type ExternType
+	// Module is the possibly empty primary namespace of this import
+	Module string
+	// Module is the possibly empty secondary namespace of this import
+	Name string
+	// DescFunc is the index in Module.TypeSection when Type equals ExternTypeFunc
+	DescFunc Index
+	// DescTable is the inlined Table when Type equals ExternTypeTable
+	DescTable *Table
+	// DescMem is the inlined Memory when Type equals ExternTypeMemory
+	DescMem *Memory
+	// DescGlobal is the inlined GlobalType when Type equals ExternTypeGlobal
+	DescGlobal *GlobalType
+}
+
+// Memory describes the limits of pages (64KB) in a memory.
+type Memory struct {
+	Min, Max uint32
+	// IsMaxEncoded true if the Max is encoded in the original source (binary or text).
+	IsMaxEncoded bool
+}
+
+// Table describes the limits of elements and its type in a table.
+type Table struct {
+	Min  uint32
+	Max  *uint32
+	Type RefType
+}
+
+// RefType is either RefTypeFuncref or RefTypeExternref as of WebAssembly core 2.0.
+type RefType = byte
+
+const (
+	// RefTypeFuncref represents a reference to a function.
+	RefTypeFuncref = ValueTypeFuncref
+	// RefTypeExternref represents a reference to a host object, which is not currently supported in wazero.
+	RefTypeExternref = ValueTypeExternref
+)
+
+func RefTypeName(t RefType) (ret string) {
+	switch t {
+	case RefTypeFuncref:
+		ret = "funcref"
+	case RefTypeExternref:
+		ret = "externref"
+	default:
+		ret = fmt.Sprintf("unknown(0x%x)", t)
+	}
+	return
+}
+
+// ElementMode represents a mode of element segment which is either active, passive or declarative.
+//
+// https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/syntax/modules.html#element-segments
+type ElementMode = byte
+
+const (
+	// ElementModeActive is the mode which requires the runtime to initialize table with the contents in .Init field combined with OffsetExpr.
+	ElementModeActive ElementMode = iota
+	// ElementModePassive is the mode which doesn't require the runtime to initialize table, and only used with OpcodeTableInitName.
+	ElementModePassive
+	// ElementModeDeclarative is introduced in reference-types proposal which can be used to declare function indexes used by OpcodeRefFunc.
+	ElementModeDeclarative
+)
+
+// ElementSegment are initialization instructions for a TableInstance
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#syntax-elem
+type ElementSegment struct {
+	// OffsetExpr returns the table element offset to apply to Init indices.
+	// Note: This can be validated prior to instantiation unless it includes OpcodeGlobalGet (an imported global).
+	// Note: This is only set when Mode is active.
+	OffsetExpr *ConstantExpression
+
+	// TableIndex is the table's index to which this element segment is applied.
+	// Note: This is used if and only if the Mode is active.
+	TableIndex Index
+
+	// Followings are set/used regardless of the Mode.
+
+	// Init indices are (nullable) table elements where each index is the function index by which the module initialize the table.
+	Init []*Index
+
+	// Type holds the type of this element segment, which is the RefType in WebAssembly 2.0.
+	Type RefType
+
+	// Mode is the mode of this element segment.
+	Mode ElementMode
+}
+
+// TableInstance represents a table of (RefTypeFuncref) elements in a module.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#table-instances%E2%91%A0
+type TableInstance struct {
+	// References holds references whose type is either RefTypeFuncref or RefTypeExternref (unsupported).
+	//
+	// Currently, only function references are supported.
+	References []Reference
+
+	// Min is the minimum (function) elements in this table and cannot grow to accommodate ElementSegment.
+	Min uint32
+
+	// Max if present is the maximum (function) elements in this table, or nil if unbounded.
+	Max *uint32
+
+	// Type is either RefTypeFuncref or RefTypeExternRef.
+	Type RefType
+}
+
+// ElementInstance represents an element instance in a module.
+//
+// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/exec/runtime.html#element-instances
+type ElementInstance struct {
+	// References holds references whose type is either RefTypeFuncref or RefTypeExternref (unsupported).
+	References []Reference
+	// Type is the RefType of the references in this instance's References.
+	Type RefType
+}
+
+// Reference is the runtime representation of RefType which is either RefTypeFuncref or RefTypeExternref.
+type Reference = uintptr
+
+type GlobalType struct {
+	ValType ValueType
+	Mutable bool
+}
+
+type Global struct {
+	Type *GlobalType
+	Init *ConstantExpression
+}
+
+type ConstantExpression struct {
+	Opcode Opcode
+	Data   []byte
+}
+
+// Export is the binary representation of an export indicated by Type
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-export
+type Export struct {
+	Type ExternType
+
+	// Name is what the host refers to this definition as.
+	Name string
+
+	// Index is the index of the definition to export, the index namespace is by Type
+	// Ex. If ExternTypeFunc, this is a position in the function index namespace.
+	Index Index
+}
+
+// Code is an entry in the Module.CodeSection containing the locals and body of the function.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-code
+type Code struct {
+	// LocalTypes are any function-scoped variables in insertion order.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-local
+	LocalTypes []ValueType
+
+	// Body is a sequence of expressions ending in OpcodeEnd
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-expr
+	Body []byte
+}
+
+type DataSegment struct {
+	OffsetExpression *ConstantExpression
+	Init             []byte
+}
+
+// NameSection represent the known custom name subsections defined in the WebAssembly Binary Format
+//
+// Note: This can be nil if no names were decoded for any reason including configuration.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#name-section%E2%91%A0
+type NameSection struct {
+	// ModuleName is the symbolic identifier for a module. Ex. math
+	//
+	// Note: This can be empty for any reason including configuration.
+	ModuleName string
+
+	// FunctionNames is an association of a function index to its symbolic identifier. Ex. add
+	//
+	// * the key (idx) is in the function namespace, where module defined functions are preceded by imported ones.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#functions%E2%91%A7
+	//
+	// Ex. Assuming the below text format is the second import, you would expect FunctionNames[1] = "mul"
+	//	(import "Math" "Mul" (func $mul (param $x f32) (param $y f32) (result f32)))
+	//
+	// Note: FunctionNames are only used for debugging. At runtime, functions are called based on raw numeric index.
+	// Note: This can be nil for any reason including configuration.
+	FunctionNames NameMap
+
+	// LocalNames contains symbolic names for function parameters or locals that have one.
+	//
+	// Note: In the Text Format, function local names can inherit parameter names from their type. Ex.
+	//  * (module (import (func (param $x i32) (param i32))) (func (type 0))) = [{0, {x,0}}]
+	//  * (module (import (func (param i32) (param $y i32))) (func (type 0) (local $z i32))) = [0, [{y,1},{z,2}]]
+	//  * (module (func (param $x i32) (local $y i32) (local $z i32))) = [{x,0},{y,1},{z,2}]
+	//
+	// Note: LocalNames are only used for debugging. At runtime, locals are called based on raw numeric index.
+	// Note: This can be nil for any reason including configuration.
+	LocalNames IndirectNameMap
+}
+
+// CustomSection contains the name and raw data of a custom section.
+type CustomSection struct {
+	Name string
+	Data []byte
+}
+
+// NameMap associates an index with any associated names.
+//
+// Note: Often the index namespace bridges multiple sections. For example, the function index namespace starts with any
+// ExternTypeFunc in the Module.ImportSection followed by the Module.FunctionSection
+//
+// Note: NameMap is unique by NameAssoc.Index, but NameAssoc.Name needn't be unique.
+// Note: When encoding in the Binary format, this must be ordered by NameAssoc.Index
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-namemap
+type NameMap []*NameAssoc
+
+type NameAssoc struct {
+	Index Index
+	Name  string
+}
+
+// IndirectNameMap associates an index with an association of names.
+//
+// Note: IndirectNameMap is unique by NameMapAssoc.Index, but NameMapAssoc.NameMap needn't be unique.
+// Note: When encoding in the Binary format, this must be ordered by NameMapAssoc.Index
+// https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-indirectnamemap
+type IndirectNameMap []*NameMapAssoc
+
+type NameMapAssoc struct {
+	Index   Index
+	NameMap NameMap
+}
+
+// SectionID identifies the sections of a Module in the WebAssembly Binary Format.
+//
+// Note: these are defined in the wasm package, instead of the binary package, as a key per section is needed regardless
+// of format, and deferring to the binary type avoids confusion.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#sections%E2%91%A0
+type SectionID = byte
+
+const (
+	// SectionIDCustom includes the standard defined NameSection and possibly others not defined in the standard.
+	SectionIDCustom SectionID = iota // don't add anything not in https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#sections%E2%91%A0
+	SectionIDType
+	SectionIDImport
+	SectionIDFunction
+	SectionIDTable
+	SectionIDMemory
+	SectionIDGlobal
+	SectionIDExport
+	SectionIDStart
+	SectionIDElement
+	SectionIDCode
+	SectionIDData
+
+	// SectionIDDataCount may exist in WebAssembly 2.0 or WebAssembly 1.0 with FeatureBulkMemoryOperations enabled.
+	//
+	// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/binary/modules.html#data-count-section
+	// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#bulk-memory-and-table-instructions
+	SectionIDDataCount
+)
+
+// SectionIDName returns the canonical name of a module section.
+// https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#sections%E2%91%A0
+func SectionIDName(sectionID SectionID) string {
+	switch sectionID {
+	case SectionIDCustom:
+		return "custom"
+	case SectionIDType:
+		return "type"
+	case SectionIDImport:
+		return "import"
+	case SectionIDFunction:
+		return "function"
+	case SectionIDTable:
+		return "table"
+	case SectionIDMemory:
+		return "memory"
+	case SectionIDGlobal:
+		return "global"
+	case SectionIDExport:
+		return "export"
+	case SectionIDStart:
+		return "start"
+	case SectionIDElement:
+		return "element"
+	case SectionIDCode:
+		return "code"
+	case SectionIDData:
+		return "data"
+	case SectionIDDataCount:
+		return "data_count"
+	}
+	return "unknown"
+}
@@ -0,0 +1,180 @@
+package wasm
+
+import (
+	"fmt"
+	"math"
+)
+
+// ValueType describes a numeric type used in Web Assembly 1.0 (20191205). For example, Function parameters and results are
+// only definable as a value type.
+//
+// The following describes how to convert between Wasm and Golang types:
+//
+//   - ValueTypeI32 - uint64(uint32,int32)
+//   - ValueTypeI64 - uint64(int64)
+//   - ValueTypeF32 - EncodeF32 DecodeF32 from float32
+//   - ValueTypeF64 - EncodeF64 DecodeF64 from float64
+//   - ValueTypeExternref - uintptr(unsafe.Pointer(p)) where p is any pointer type in Go (e.g. *string)
+//
+// Ex. Given a Text Format type use (param i64) (result i64), no conversion is necessary.
+//
+//	results, _ := fn(ctx, input)
+//	result := result[0]
+//
+// Ex. Given a Text Format type use (param f64) (result f64), conversion is necessary.
+//
+//	results, _ := fn(ctx, api.EncodeF64(input))
+//	result := api.DecodeF64(result[0])
+//
+// Note: This is a type alias as it is easier to encode and decode in the binary format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-valtype
+type ValueType = byte
+
+const (
+	// ValueTypeI32 is a 32-bit integer.
+	ValueTypeI32 ValueType = 0x7f
+	// ValueTypeI64 is a 64-bit integer.
+	ValueTypeI64 ValueType = 0x7e
+	// ValueTypeF32 is a 32-bit floating point number.
+	ValueTypeF32 ValueType = 0x7d
+	// ValueTypeF64 is a 64-bit floating point number.
+	ValueTypeF64 ValueType = 0x7c
+
+	// ValueTypeExternref is an externref type.
+	//
+	// Note: in wazero, externref type value are opaque raw 64-bit pointers,
+	// and the ValueTypeExternref type in the signature will be translated as
+	// uintptr in wazero's API level.
+	//
+	// For example, given the import function:
+	//	(func (import "env" "f") (param externref) (result externref))
+	//
+	// This can be defined in Go as:
+	//  r.NewModuleBuilder("env").ExportFunctions(map[string]interface{}{
+	//    "f": func(externref uintptr) (resultExternRef uintptr) { return },
+	//  })
+	//
+	// Note: The usage of this type is toggled with WithFeatureBulkMemoryOperations.
+	ValueTypeExternref ValueType = 0x6f
+
+	ValueTypeV128    ValueType = 0x7b
+	ValueTypeFuncref ValueType = 0x70
+)
+
+// ValueTypeName returns the type name of the given ValueType as a string.
+// These type names match the names used in the WebAssembly text format.
+//
+// Note: This returns "unknown", if an undefined ValueType value is passed.
+func ValueTypeName(t ValueType) string {
+	switch t {
+	case ValueTypeI32:
+		return "i32"
+	case ValueTypeI64:
+		return "i64"
+	case ValueTypeF32:
+		return "f32"
+	case ValueTypeF64:
+		return "f64"
+	case ValueTypeExternref:
+		return "externref"
+	case ValueTypeFuncref:
+		return "funcref"
+	case ValueTypeV128:
+		return "v128"
+	}
+	return "unknown"
+}
+
+// ExternType classifies imports and exports with their respective types.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#external-types%E2%91%A0
+type ExternType = byte
+
+const (
+	ExternTypeFunc   ExternType = 0x00
+	ExternTypeTable  ExternType = 0x01
+	ExternTypeMemory ExternType = 0x02
+	ExternTypeGlobal ExternType = 0x03
+)
+
+// The below are exported to consolidate parsing behavior for external types.
+const (
+	// ExternTypeFuncName is the name of the WebAssembly Text Format field for ExternTypeFunc.
+	ExternTypeFuncName = "func"
+	// ExternTypeTableName is the name of the WebAssembly Text Format field for ExternTypeTable.
+	ExternTypeTableName = "table"
+	// ExternTypeMemoryName is the name of the WebAssembly Text Format field for ExternTypeMemory.
+	ExternTypeMemoryName = "memory"
+	// ExternTypeGlobalName is the name of the WebAssembly Text Format field for ExternTypeGlobal.
+	ExternTypeGlobalName = "global"
+)
+
+// ExternTypeName returns the name of the WebAssembly Text Format field of the given type.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A4
+func ExternTypeName(et ExternType) string {
+	switch et {
+	case ExternTypeFunc:
+		return ExternTypeFuncName
+	case ExternTypeTable:
+		return ExternTypeTableName
+	case ExternTypeMemory:
+		return ExternTypeMemoryName
+	case ExternTypeGlobal:
+		return ExternTypeGlobalName
+	}
+	return fmt.Sprintf("%#x", et)
+}
+
+// EncodeI32 encodes the input as a ValueTypeI32.
+func EncodeI32(input int32) uint64 {
+	return uint64(uint32(input))
+}
+
+// EncodeI64 encodes the input as a ValueTypeI64.
+func EncodeI64(input int64) uint64 {
+	return uint64(input)
+}
+
+// EncodeF32 encodes the input as a ValueTypeF32.
+//
+// See DecodeF32
+func EncodeF32(input float32) uint64 {
+	return uint64(math.Float32bits(input))
+}
+
+// DecodeF32 decodes the input as a ValueTypeF32.
+//
+// See EncodeF32
+func DecodeF32(input uint64) float32 {
+	return math.Float32frombits(uint32(input))
+}
+
+// EncodeF64 encodes the input as a ValueTypeF64.
+//
+// See EncodeF32
+func EncodeF64(input float64) uint64 {
+	return math.Float64bits(input)
+}
+
+// DecodeF64 decodes the input as a ValueTypeF64.
+//
+// See EncodeF64
+func DecodeF64(input uint64) float64 {
+	return math.Float64frombits(input)
+}
+
+// EncodeExternref encodes the input as a ValueTypeExternref.
+//
+// See DecodeExternref
+func EncodeExternref(input uintptr) uint64 {
+	return uint64(input)
+}
+
+// DecodeExternref decodes the input as a ValueTypeExternref.
+//
+// See EncodeExternref
+func DecodeExternref(input uint64) uintptr {
+	return uintptr(input)
+}
@@ -0,0 +1,7 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
@@ -0,0 +1,2 @@
+# Improves experience of commands like `make format` on Windows
+* text=auto eol=lf
@@ -0,0 +1,46 @@
+# If you prefer the allow list template instead of the deny list, see community template:
+# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
+#
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+/wazero
+build
+dist
+
+# Test binary, built with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Dependency directories (remove the comment below to include it)
+# vendor/
+
+# Go workspace file
+go.work
+
+# Goland
+.idea
+
+# AssemblyScript
+node_modules
+package-lock.json
+
+# codecov.io
+/coverage.txt
+
+.vagrant
+
+zig-cache/
+.zig-cache/
+zig-out/
+
+.DS_Store
+
+# Ignore compiled stdlib test cases.
+/internal/integration_test/stdlibs/testdata
+/internal/integration_test/libsodium/testdata
@@ -0,0 +1,3 @@
+[submodule "site/themes/hello-friend"]
+	path = site/themes/hello-friend
+	url = https://github.com/panr/hugo-theme-hello-friend.git
@@ -0,0 +1,75 @@
+# Contributing
+
+We welcome contributions from the community. Please read the following guidelines carefully to maximize the chances of your PR being merged.
+
+## Coding Style
+
+- To ensure your change passes format checks, run `make check`. To format your files, you can run `make format`.
+- We follow standard Go table-driven tests and use an internal [testing library](./internal/testing/require) to assert correctness. To verify all tests pass, you can run `make test`.
+
+## DCO
+
+We require DCO signoff line in every commit to this repo.
+
+The sign-off is a simple line at the end of the explanation for the
+patch, which certifies that you wrote it or otherwise have the right to
+pass it on as an open-source patch. The rules are pretty simple: if you
+can certify the below (from
+[developercertificate.org](https://developercertificate.org/)):
+
+```
+Developer Certificate of Origin
+Version 1.1
+Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
+660 York Street, Suite 102,
+San Francisco, CA 94110 USA
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+Developer's Certificate of Origin 1.1
+By making a contribution to this project, I certify that:
+(a) The contribution was created in whole or in part by me and I
+    have the right to submit it under the open source license
+    indicated in the file; or
+(b) The contribution is based upon previous work that, to the best
+    of my knowledge, is covered under an appropriate open source
+    license and I have the right under that license to submit that
+    work with modifications, whether created in whole or in part
+    by me, under the same open source license (unless I am
+    permitted to submit under a different license), as indicated
+    in the file; or
+(c) The contribution was provided directly to me by some other
+    person who certified (a), (b) or (c) and I have not modified
+    it.
+(d) I understand and agree that this project and the contribution
+    are public and that a record of the contribution (including all
+    personal information I submit with it, including my sign-off) is
+    maintained indefinitely and may be redistributed consistent with
+    this project or the open source license(s) involved.
+```
+
+then you just add a line to every git commit message:
+
+    Signed-off-by: Joe Smith <joe@gmail.com>
+
+using your real name (sorry, no pseudonyms or anonymous contributions.)
+
+You can add the sign off when creating the git commit via `git commit -s`.
+
+## Code Reviews
+
+* The pull request title should describe what the change does and not embed issue numbers.
+The pull request should only be blank when the change is minor. Any feature should include
+a description of the change and what motivated it. If the change or design changes through
+review, please keep the title and description updated accordingly.
+* A single approval is sufficient to merge. If a reviewer asks for
+changes in a PR they should be addressed before the PR is merged,
+even if another reviewer has already approved the PR.
+* During the review, address the comments and commit the changes
+_without_ squashing the commits. This facilitates incremental reviews
+since the reviewer does not go through all the code again to find out
+what has changed since the last review. When a change goes out of sync with main,
+please rebase and force push, keeping the original commits where practical.
+* Commits are squashed prior to merging a pull request, using the title
+as commit message by default. Maintainers may request contributors to
+edit the pull request tite to ensure that it remains descriptive as a
+commit message. Alternatively, maintainers may change the commit message directly.
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2020-2023 wazero authors
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
@@ -0,0 +1,2 @@
+wazero
+Copyright 2020-2023 wazero authors
@@ -0,0 +1,135 @@
+# wazero: the zero dependency WebAssembly runtime for Go developers
+
+[![Go Reference](https://pkg.go.dev/badge/github.com/tetratelabs/wazero.svg)](https://pkg.go.dev/github.com/tetratelabs/wazero) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+
+WebAssembly is a way to safely run code compiled in other languages. Runtimes
+execute WebAssembly Modules (Wasm), which are most often binaries with a `.wasm`
+extension.
+
+wazero is a WebAssembly Core Specification [1.0][1] and [2.0][2] compliant
+runtime written in Go. It has *zero dependencies*, and doesn't rely on CGO.
+This means you can run applications in other languages and still keep cross
+compilation.
+
+Import wazero and extend your Go application with code written in any language!
+
+## Example
+
+The best way to learn wazero is by trying one of our [examples](examples/README.md). The
+most [basic example](examples/basic) extends a Go application with an addition
+function defined in WebAssembly.
+
+## Runtime
+
+There are two runtime configurations supported in wazero: _Compiler_ is default:
+
+By default, ex `wazero.NewRuntime(ctx)`, the Compiler is used if supported. You
+can also force the interpreter like so:
+```go
+r := wazero.NewRuntimeWithConfig(ctx, wazero.NewRuntimeConfigInterpreter())
+```
+
+### Interpreter
+Interpreter is a naive interpreter-based implementation of Wasm virtual
+machine. Its implementation doesn't have any platform (GOARCH, GOOS) specific
+code, therefore _interpreter_ can be used for any compilation target available
+for Go (such as `riscv64`).
+
+### Compiler
+Compiler compiles WebAssembly modules into machine code ahead of time (AOT),
+during `Runtime.CompileModule`. This means your WebAssembly functions execute
+natively at runtime. Compiler is faster than Interpreter, often by order of
+magnitude (10x) or more. This is done without host-specific dependencies.
+
+### Conformance
+
+Both runtimes pass WebAssembly Core [1.0][3] and [2.0][4] specification tests
+on supported platforms:
+
+|   Runtime   |                 Usage                  | amd64 | arm64 | others |
+|:-----------:|:--------------------------------------:|:-----:|:-----:|:------:|
+| Interpreter | `wazero.NewRuntimeConfigInterpreter()` |   ✅   |   ✅   |   ✅    |
+|  Compiler   |  `wazero.NewRuntimeConfigCompiler()`   |   ✅   |   ✅   |   ❌    |
+
+## Support Policy
+
+The below support policy focuses on compatibility concerns of those embedding
+wazero into their Go applications.
+
+### wazero
+
+wazero's [1.0 release][8] happened in March 2023, and is [in use][9] by many
+projects and production sites.
+
+We offer an API stability promise with semantic versioning. In other words, we
+promise to not break any exported function signature without incrementing the
+major version. This does not mean no innovation: New features and behaviors
+happen with a minor version increment, e.g. 1.0.11 to 1.2.0. We also fix bugs
+or change internal details with a patch version, e.g. 1.0.0 to 1.0.1.
+
+You can get the latest version of wazero like this.
+```bash
+go get github.com/tetratelabs/wazero@latest
+```
+
+Please give us a [star][10] if you end up using wazero!
+
+### Go
+
+wazero has no dependencies except Go and [`x/sys`][12], so the only source of
+conflict in your project's use of wazero is the Go version.
+
+wazero follows the same version policy as Go's [Release Policy][5]: two
+versions. wazero will ensure these versions work and bugs are valid if there's
+an issue with a current Go version.
+
+### Platform
+
+wazero has two runtime modes: Interpreter and Compiler. The only supported operating
+systems are ones we test, but that doesn't necessarily mean other operating
+system versions won't work.
+
+We currently test Linux (Ubuntu and scratch), MacOS and Windows as packaged by
+[GitHub Actions][6], as well as nested VMs running on Linux for FreeBSD, NetBSD,
+OpenBSD, DragonFly BSD, illumos and Solaris.
+
+We also test cross compilation for many `GOOS` and `GOARCH` combinations.
+
+* Interpreter
+  * Linux is tested on amd64 and arm64 (native) as well as riscv64 via emulation.
+  * Windows, FreeBSD, NetBSD, OpenBSD, DragonFly BSD, illumos and Solaris are
+    tested only on amd64.
+  * macOS is tested only on arm64.
+* Compiler
+  * Linux is tested on amd64 and arm64.
+  * Windows, FreeBSD, NetBSD, DragonFly BSD, illumos and Solaris are
+    tested only on amd64.
+  * macOS is tested only on arm64.
+
+wazero has no dependencies and doesn't require CGO. This means it can also be
+embedded in an application that doesn't use an operating system. This is a main
+differentiator between wazero and alternatives.
+
+We verify zero dependencies by running tests in Docker's [scratch image][7].
+This approach ensures compatibility with any parent image.
+
+### macOS code-signing entitlements
+
+If you're developing for macOS and need to code-sign your application,
+please read issue [#2393][11].
+
+-----
+wazero is a registered trademark of Tetrate.io, Inc. in the United States and/or other countries
+
+[1]: https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/
+[2]: https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/
+[3]: https://github.com/WebAssembly/spec/tree/wg-1.0/test/core
+[4]: https://github.com/WebAssembly/spec/tree/d39195773112a22b245ffbe864bab6d1182ccb06/test/core
+[5]: https://go.dev/doc/devel/release
+[6]: https://github.com/actions/virtual-environments
+[7]: https://docs.docker.com/develop/develop-images/baseimages/#create-a-simple-parent-image-using-scratch
+[8]: https://tetrate.io/blog/introducing-wazero-from-tetrate/
+[9]: https://wazero.io/community/users/
+[10]: https://github.com/wazero/wazero/stargazers
+[11]: https://github.com/wazero/wazero/issues/2393
+[12]: https://pkg.go.dev/golang.org/x/sys
@@ -0,0 +1,214 @@
+package api
+
+import (
+	"fmt"
+	"strings"
+)
+
+// CoreFeatures is a bit flag of WebAssembly Core specification features. See
+// https://github.com/WebAssembly/proposals for proposals and their status.
+//
+// Constants define individual features, such as CoreFeatureMultiValue, or
+// groups of "finished" features, assigned to a WebAssembly Core Specification
+// version, e.g. CoreFeaturesV1 or CoreFeaturesV2.
+//
+// Note: Numeric values are not intended to be interpreted except as bit flags.
+type CoreFeatures uint64
+
+// CoreFeaturesV1 are features included in the WebAssembly Core Specification
+// 1.0. As of late 2022, this is the only version that is a Web Standard (W3C
+// Recommendation).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/
+const CoreFeaturesV1 = CoreFeatureMutableGlobal
+
+// CoreFeaturesV2 are features included in the WebAssembly Core Specification
+// 2.0 (20220419). As of late 2022, version 2.0 is a W3C working draft, not yet
+// a Web Standard (W3C Recommendation).
+//
+// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#release-1-1
+const CoreFeaturesV2 = CoreFeaturesV1 |
+	CoreFeatureBulkMemoryOperations |
+	CoreFeatureMultiValue |
+	CoreFeatureNonTrappingFloatToIntConversion |
+	CoreFeatureReferenceTypes |
+	CoreFeatureSignExtensionOps |
+	CoreFeatureSIMD
+
+const (
+	// CoreFeatureBulkMemoryOperations adds instructions modify ranges of
+	// memory or table entries ("bulk-memory-operations"). This is included in
+	// CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Here are the notable effects:
+	//   - Adds `memory.fill`, `memory.init`, `memory.copy` and `data.drop`
+	//     instructions.
+	//   - Adds `table.init`, `table.copy` and `elem.drop` instructions.
+	//   - Introduces a "passive" form of element and data segments.
+	//   - Stops checking "active" element and data segment boundaries at
+	//     compile-time, meaning they can error at runtime.
+	//
+	// Note: "bulk-memory-operations" is mixed with the "reference-types"
+	// proposal due to the WebAssembly Working Group merging them
+	// "mutually dependent". Therefore, enabling this feature requires enabling
+	// CoreFeatureReferenceTypes, and vice-versa.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md and
+	// https://github.com/WebAssembly/spec/pull/1287
+	CoreFeatureBulkMemoryOperations CoreFeatures = 1 << iota
+
+	// CoreFeatureMultiValue enables multiple values ("multi-value"). This is
+	// included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Here are the notable effects:
+	//   - Function (`func`) types allow more than one result.
+	//   - Block types (`block`, `loop` and `if`) can be arbitrary function
+	//     types.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/multi-value/Overview.md
+	CoreFeatureMultiValue
+
+	// CoreFeatureMutableGlobal allows globals to be mutable. This is included
+	// in both CoreFeaturesV1 and CoreFeaturesV2.
+	//
+	// When false, an api.Global can never be cast to an api.MutableGlobal, and
+	// any wasm that includes global vars will fail to parse.
+	CoreFeatureMutableGlobal
+
+	// CoreFeatureNonTrappingFloatToIntConversion enables non-trapping
+	// float-to-int conversions ("nontrapping-float-to-int-conversion"). This
+	// is included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// The only effect of enabling is allowing the following instructions,
+	// which return 0 on NaN instead of panicking.
+	//   - `i32.trunc_sat_f32_s`
+	//   - `i32.trunc_sat_f32_u`
+	//   - `i32.trunc_sat_f64_s`
+	//   - `i32.trunc_sat_f64_u`
+	//   - `i64.trunc_sat_f32_s`
+	//   - `i64.trunc_sat_f32_u`
+	//   - `i64.trunc_sat_f64_s`
+	//   - `i64.trunc_sat_f64_u`
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/nontrapping-float-to-int-conversion/Overview.md
+	CoreFeatureNonTrappingFloatToIntConversion
+
+	// CoreFeatureReferenceTypes enables various instructions and features
+	// related to table and new reference types. This is included in
+	// CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	//   - Introduction of new value types: `funcref` and `externref`.
+	//   - Support for the following new instructions:
+	//     - `ref.null`
+	//     - `ref.func`
+	//     - `ref.is_null`
+	//     - `table.fill`
+	//     - `table.get`
+	//     - `table.grow`
+	//     - `table.set`
+	//     - `table.size`
+	//   - Support for multiple tables per module:
+	//     - `call_indirect`, `table.init`, `table.copy` and `elem.drop`
+	//   - Support for instructions can take non-zero table index.
+	//     - Element segments can take non-zero table index.
+	//
+	// Note: "reference-types" is mixed with the "bulk-memory-operations"
+	// proposal due to the WebAssembly Working Group merging them
+	// "mutually dependent". Therefore, enabling this feature requires enabling
+	// CoreFeatureBulkMemoryOperations, and vice-versa.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md and
+	// https://github.com/WebAssembly/spec/pull/1287
+	CoreFeatureReferenceTypes
+
+	// CoreFeatureSignExtensionOps enables sign extension instructions
+	// ("sign-extension-ops"). This is included in CoreFeaturesV2, but not
+	// CoreFeaturesV1.
+	//
+	// Adds instructions:
+	//   - `i32.extend8_s`
+	//   - `i32.extend16_s`
+	//   - `i64.extend8_s`
+	//   - `i64.extend16_s`
+	//   - `i64.extend32_s`
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/sign-extension-ops/Overview.md
+	CoreFeatureSignExtensionOps
+
+	// CoreFeatureSIMD enables the vector value type and vector instructions
+	// (aka SIMD). This is included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Note: The instruction list is too long to enumerate in godoc.
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md
+	CoreFeatureSIMD
+
+	// Update experimental/features.go when adding elements here.
+)
+
+// SetEnabled enables or disables the feature or group of features.
+func (f CoreFeatures) SetEnabled(feature CoreFeatures, val bool) CoreFeatures {
+	if val {
+		return f | feature
+	}
+	return f &^ feature
+}
+
+// IsEnabled returns true if the feature (or group of features) is enabled.
+func (f CoreFeatures) IsEnabled(feature CoreFeatures) bool {
+	return f&feature != 0
+}
+
+// RequireEnabled returns an error if the feature (or group of features) is not
+// enabled.
+func (f CoreFeatures) RequireEnabled(feature CoreFeatures) error {
+	if f&feature == 0 {
+		return fmt.Errorf("feature %q is disabled", feature)
+	}
+	return nil
+}
+
+// String implements fmt.Stringer by returning each enabled feature.
+func (f CoreFeatures) String() string {
+	var builder strings.Builder
+	for i := 0; i <= 63; i++ { // cycle through all bits to reduce code and maintenance
+		target := CoreFeatures(1 << i)
+		if f.IsEnabled(target) {
+			if name := featureName(target); name != "" {
+				if builder.Len() > 0 {
+					builder.WriteByte('|')
+				}
+				builder.WriteString(name)
+			}
+		}
+	}
+	return builder.String()
+}
+
+func featureName(f CoreFeatures) string {
+	switch f {
+	case CoreFeatureMutableGlobal:
+		// match https://github.com/WebAssembly/mutable-global
+		return "mutable-global"
+	case CoreFeatureSignExtensionOps:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/sign-extension-ops/Overview.md
+		return "sign-extension-ops"
+	case CoreFeatureMultiValue:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/multi-value/Overview.md
+		return "multi-value"
+	case CoreFeatureNonTrappingFloatToIntConversion:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/nontrapping-float-to-int-conversion/Overview.md
+		return "nontrapping-float-to-int-conversion"
+	case CoreFeatureBulkMemoryOperations:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+		return "bulk-memory-operations"
+	case CoreFeatureReferenceTypes:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md
+		return "reference-types"
+	case CoreFeatureSIMD:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md
+		return "simd"
+	}
+	return ""
+}
@@ -0,0 +1,766 @@
+// Package api includes constants and interfaces used by both end-users and internal implementations.
+package api
+
+import (
+	"context"
+	"fmt"
+	"math"
+
+	"github.com/tetratelabs/wazero/internal/internalapi"
+)
+
+// ExternType classifies imports and exports with their respective types.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#external-types%E2%91%A0
+type ExternType = byte
+
+const (
+	ExternTypeFunc   ExternType = 0x00
+	ExternTypeTable  ExternType = 0x01
+	ExternTypeMemory ExternType = 0x02
+	ExternTypeGlobal ExternType = 0x03
+)
+
+// The below are exported to consolidate parsing behavior for external types.
+const (
+	// ExternTypeFuncName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeFunc.
+	ExternTypeFuncName = "func"
+	// ExternTypeTableName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeTable.
+	ExternTypeTableName = "table"
+	// ExternTypeMemoryName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeMemory.
+	ExternTypeMemoryName = "memory"
+	// ExternTypeGlobalName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeGlobal.
+	ExternTypeGlobalName = "global"
+)
+
+// ExternTypeName returns the name of the WebAssembly 1.0 (20191205) Text Format field of the given type.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A4
+func ExternTypeName(et ExternType) string {
+	switch et {
+	case ExternTypeFunc:
+		return ExternTypeFuncName
+	case ExternTypeTable:
+		return ExternTypeTableName
+	case ExternTypeMemory:
+		return ExternTypeMemoryName
+	case ExternTypeGlobal:
+		return ExternTypeGlobalName
+	}
+	return fmt.Sprintf("%#x", et)
+}
+
+// ValueType describes a parameter or result type mapped to a WebAssembly
+// function signature.
+//
+// The following describes how to convert between Wasm and Golang types:
+//
+//   - ValueTypeI32 - EncodeU32 DecodeU32 for uint32 / EncodeI32 DecodeI32 for int32
+//   - ValueTypeI64 - uint64(int64)
+//   - ValueTypeF32 - EncodeF32 DecodeF32 from float32
+//   - ValueTypeF64 - EncodeF64 DecodeF64 from float64
+//   - ValueTypeExternref - unintptr(unsafe.Pointer(p)) where p is any pointer
+//     type in Go (e.g. *string)
+//
+// e.g. Given a Text Format type use (param i64) (result i64), no conversion is
+// necessary.
+//
+//	results, _ := fn(ctx, input)
+//	result := result[0]
+//
+// e.g. Given a Text Format type use (param f64) (result f64), conversion is
+// necessary.
+//
+//	results, _ := fn(ctx, api.EncodeF64(input))
+//	result := api.DecodeF64(result[0])
+//
+// Note: This is a type alias as it is easier to encode and decode in the
+// binary format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-valtype
+type ValueType = byte
+
+const (
+	// ValueTypeI32 is a 32-bit integer.
+	ValueTypeI32 ValueType = 0x7f
+	// ValueTypeI64 is a 64-bit integer.
+	ValueTypeI64 ValueType = 0x7e
+	// ValueTypeF32 is a 32-bit floating point number.
+	ValueTypeF32 ValueType = 0x7d
+	// ValueTypeF64 is a 64-bit floating point number.
+	ValueTypeF64 ValueType = 0x7c
+
+	// ValueTypeExternref is a externref type.
+	//
+	// Note: in wazero, externref type value are opaque raw 64-bit pointers,
+	// and the ValueTypeExternref type in the signature will be translated as
+	// uintptr in wazero's API level.
+	//
+	// For example, given the import function:
+	//	(func (import "env" "f") (param externref) (result externref))
+	//
+	// This can be defined in Go as:
+	//  r.NewHostModuleBuilder("env").
+	//		NewFunctionBuilder().
+	//		WithFunc(func(context.Context, _ uintptr) (_ uintptr) { return }).
+	//		Export("f")
+	//
+	// Note: The usage of this type is toggled with api.CoreFeatureBulkMemoryOperations.
+	ValueTypeExternref ValueType = 0x6f
+)
+
+// ValueTypeName returns the type name of the given ValueType as a string.
+// These type names match the names used in the WebAssembly text format.
+//
+// Note: This returns "unknown", if an undefined ValueType value is passed.
+func ValueTypeName(t ValueType) string {
+	switch t {
+	case ValueTypeI32:
+		return "i32"
+	case ValueTypeI64:
+		return "i64"
+	case ValueTypeF32:
+		return "f32"
+	case ValueTypeF64:
+		return "f64"
+	case ValueTypeExternref:
+		return "externref"
+	}
+	return "unknown"
+}
+
+// Module is a sandboxed, ready to execute Wasm module. This can be used to get exported functions, etc.
+//
+// In WebAssembly terminology, this corresponds to a "Module Instance", but wazero calls pre-instantiation module as
+// "Compiled Module" as in wazero.CompiledModule, therefore we call this post-instantiation module simply "Module".
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#module-instances%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - Closing the wazero.Runtime closes any Module it instantiated.
+type Module interface {
+	fmt.Stringer
+
+	// Name is the name this module was instantiated with. Exported functions can be imported with this name.
+	Name() string
+
+	// Memory returns a memory defined in this module or nil if there are none wasn't.
+	Memory() Memory
+
+	// ExportedFunction returns a function exported from this module or nil if it wasn't.
+	//
+	// # Notes
+	//   - The default wazero.ModuleConfig attempts to invoke `_start`, which
+	//     in rare cases can close the module. When in doubt, check IsClosed prior
+	//     to invoking a function export after instantiation.
+	//   - The semantics of host functions assumes the existence of an "importing module" because, for example, the host function needs access to
+	//     the memory of the importing module. Therefore, direct use of ExportedFunction is forbidden for host modules.
+	//     Practically speaking, it is usually meaningless to directly call a host function from Go code as it is already somewhere in Go code.
+	ExportedFunction(name string) Function
+
+	// ExportedFunctionDefinitions returns all the exported function
+	// definitions in this module, keyed on export name.
+	ExportedFunctionDefinitions() map[string]FunctionDefinition
+
+	// TODO: Table
+
+	// ExportedMemory returns a memory exported from this module or nil if it wasn't.
+	//
+	// WASI modules require exporting a Memory named "memory". This means that a module successfully initialized
+	// as a WASI Command or Reactor will never return nil for this name.
+	//
+	// See https://github.com/WebAssembly/WASI/blob/snapshot-01/design/application-abi.md#current-unstable-abi
+	ExportedMemory(name string) Memory
+
+	// ExportedMemoryDefinitions returns all the exported memory definitions
+	// in this module, keyed on export name.
+	//
+	// Note: As of WebAssembly Core Specification 2.0, there can be at most one
+	// memory.
+	ExportedMemoryDefinitions() map[string]MemoryDefinition
+
+	// ExportedGlobal a global exported from this module or nil if it wasn't.
+	ExportedGlobal(name string) Global
+
+	// CloseWithExitCode releases resources allocated for this Module. Use a non-zero exitCode parameter to indicate a
+	// failure to ExportedFunction callers.
+	//
+	// The error returned here, if present, is about resource de-allocation (such as I/O errors). Only the last error is
+	// returned, so a non-nil return means at least one error happened. Regardless of error, this Module will
+	// be removed, making its name available again.
+	//
+	// Calling this inside a host function is safe, and may cause ExportedFunction callers to receive a sys.ExitError
+	// with the exitCode.
+	CloseWithExitCode(ctx context.Context, exitCode uint32) error
+
+	// Closer closes this module by delegating to CloseWithExitCode with an exit code of zero.
+	Closer
+
+	// IsClosed returns true if the module is closed, so no longer usable.
+	//
+	// This can happen for the following reasons:
+	//   - Closer was called directly.
+	//   - A guest function called Closer indirectly, such as `_start` calling
+	//     `proc_exit`, which internally closed the module.
+	//   - wazero.RuntimeConfig `WithCloseOnContextDone` was enabled and a
+	//     context completion closed the module.
+	//
+	// Where any of the above are possible, check this value before calling an
+	// ExportedFunction, even if you didn't formerly receive a sys.ExitError.
+	// sys.ExitError is only returned on non-zero code, something that closes
+	// the module successfully will not result it one.
+	IsClosed() bool
+
+	internalapi.WazeroOnly
+}
+
+// Closer closes a resource.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type Closer interface {
+	// Close closes the resource.
+	//
+	// Note: The context parameter is used for value lookup, such as for
+	// logging. A canceled or otherwise done context will not prevent Close
+	// from succeeding.
+	Close(context.Context) error
+}
+
+// ExportDefinition is a WebAssembly type exported in a module
+// (wazero.CompiledModule).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type ExportDefinition interface {
+	// ModuleName is the possibly empty name of the module defining this
+	// export.
+	//
+	// Note: This may be different from Module.Name, because a compiled module
+	// can be instantiated multiple times as different names.
+	ModuleName() string
+
+	// Index is the position in the module's index, imports first.
+	Index() uint32
+
+	// Import returns true with the module and name when this was imported.
+	// Otherwise, it returns false.
+	//
+	// Note: Empty string is valid for both names in the WebAssembly Core
+	// Specification, so "" "" is possible.
+	Import() (moduleName, name string, isImport bool)
+
+	// ExportNames include all exported names.
+	//
+	// Note: The empty name is allowed in the WebAssembly Core Specification,
+	// so "" is possible.
+	ExportNames() []string
+
+	internalapi.WazeroOnly
+}
+
+// MemoryDefinition is a WebAssembly memory exported in a module
+// (wazero.CompiledModule). Units are in pages (64KB).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type MemoryDefinition interface {
+	ExportDefinition
+
+	// Min returns the possibly zero initial count of 64KB pages.
+	Min() uint32
+
+	// Max returns the possibly zero max count of 64KB pages, or false if
+	// unbounded.
+	Max() (uint32, bool)
+
+	internalapi.WazeroOnly
+}
+
+// FunctionDefinition is a WebAssembly function exported in a module
+// (wazero.CompiledModule).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type FunctionDefinition interface {
+	ExportDefinition
+
+	// Name is the module-defined name of the function, which is not necessarily
+	// the same as its export name.
+	Name() string
+
+	// DebugName identifies this function based on its Index or Name in the
+	// module. This is used for errors and stack traces. e.g. "env.abort".
+	//
+	// When the function name is empty, a substitute name is generated by
+	// prefixing '$' to its position in the index. Ex ".$0" is the
+	// first function (possibly imported) in an unnamed module.
+	//
+	// The format is dot-delimited module and function name, but there are no
+	// restrictions on the module and function name. This means either can be
+	// empty or include dots. e.g. "x.x.x" could mean module "x" and name "x.x",
+	// or it could mean module "x.x" and name "x".
+	//
+	// Note: This name is stable regardless of import or export. For example,
+	// if Import returns true, the value is still based on the Name or Index
+	// and not the imported function name.
+	DebugName() string
+
+	// GoFunction is non-nil when implemented by the embedder instead of a wasm
+	// binary, e.g. via wazero.HostModuleBuilder
+	//
+	// The expected results are nil, GoFunction or GoModuleFunction.
+	GoFunction() interface{}
+
+	// ParamTypes are the possibly empty sequence of value types accepted by a
+	// function with this signature.
+	//
+	// See ValueType documentation for encoding rules.
+	ParamTypes() []ValueType
+
+	// ParamNames are index-correlated with ParamTypes or nil if not available
+	// for one or more parameters.
+	ParamNames() []string
+
+	// ResultTypes are the results of the function.
+	//
+	// When WebAssembly 1.0 (20191205), there can be at most one result.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#result-types%E2%91%A0
+	//
+	// See ValueType documentation for encoding rules.
+	ResultTypes() []ValueType
+
+	// ResultNames are index-correlated with ResultTypes or nil if not
+	// available for one or more results.
+	ResultNames() []string
+
+	internalapi.WazeroOnly
+}
+
+// Function is a WebAssembly function exported from an instantiated module
+// (wazero.Runtime InstantiateModule).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#syntax-func
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type Function interface {
+	// Definition is metadata about this function from its defining module.
+	Definition() FunctionDefinition
+
+	// Call invokes the function with the given parameters and returns any
+	// results or an error for any failure looking up or invoking the function.
+	//
+	// Encoding is described in Definition, and supplying an incorrect count of
+	// parameters vs FunctionDefinition.ParamTypes is an error.
+	//
+	// If the exporting Module was closed during this call, the error returned
+	// may be a sys.ExitError. See Module.CloseWithExitCode for details.
+	//
+	// Call is not goroutine-safe, therefore it is recommended to create
+	// another Function if you want to invoke the same function concurrently.
+	// On the other hand, sequential invocations of Call is allowed.
+	// However, this should not be called multiple times until the previous Call returns.
+	//
+	// To safely encode/decode params/results expressed as uint64, users are encouraged to
+	// use api.EncodeXXX or DecodeXXX functions. See the docs on api.ValueType.
+	//
+	// When RuntimeConfig.WithCloseOnContextDone is toggled, the invocation of this Call method is ensured to be closed
+	// whenever one of the three conditions is met. In the event of close, sys.ExitError will be returned and
+	// the api.Module from which this api.Function is derived will be made closed. See the documentation of
+	// WithCloseOnContextDone on wazero.RuntimeConfig for detail. See examples in context_done_example_test.go for
+	// the end-to-end demonstrations of how these terminations can be performed.
+	Call(ctx context.Context, params ...uint64) ([]uint64, error)
+
+	// CallWithStack is an optimized variation of Call that saves memory
+	// allocations when the stack slice is reused across calls.
+	//
+	// Stack length must be at least the max of parameter or result length.
+	// The caller adds parameters in order to the stack, and reads any results
+	// in order from the stack, except in the error case.
+	//
+	// For example, the following reuses the same stack slice to call searchFn
+	// repeatedly saving one allocation per iteration:
+	//
+	//	stack := make([]uint64, 4)
+	//	for i, search := range searchParams {
+	//		// copy the next params to the stack
+	//		copy(stack, search)
+	//		if err := searchFn.CallWithStack(ctx, stack); err != nil {
+	//			return err
+	//		} else if stack[0] == 1 { // found
+	//			return i // searchParams[i] matched!
+	//		}
+	//	}
+	//
+	// # Notes
+	//
+	//   - This is similar to GoModuleFunction, except for using calling functions
+	//     instead of implementing them. Moreover, this is used regardless of
+	//     whether the callee is a host or wasm defined function.
+	CallWithStack(ctx context.Context, stack []uint64) error
+
+	internalapi.WazeroOnly
+}
+
+// GoModuleFunction is a Function implemented in Go instead of a wasm binary.
+// The Module parameter is the calling module, used to access memory or
+// exported functions. See GoModuleFunc for an example.
+//
+// The stack is includes any parameters encoded according to their ValueType.
+// Its length is the max of parameter or result length. When there are results,
+// write them in order beginning at index zero. Do not use the stack after the
+// function returns.
+//
+// Here's a typical way to read three parameters and write back one.
+//
+//	// read parameters off the stack in index order
+//	argv, argvBuf := api.DecodeU32(stack[0]), api.DecodeU32(stack[1])
+//
+//	// write results back to the stack in index order
+//	stack[0] = api.EncodeU32(ErrnoSuccess)
+//
+// This function can be non-deterministic or cause side effects. It also
+// has special properties not defined in the WebAssembly Core specification.
+// Notably, this uses the caller's memory (via Module.Memory). See
+// https://www.w3.org/TR/wasm-core-1/#host-functions%E2%91%A0
+//
+// Most end users will not define functions directly with this, as they will
+// use reflection or code generators instead. These approaches are more
+// idiomatic as they can map go types to ValueType. This type is exposed for
+// those willing to trade usability and safety for performance.
+//
+// To safely decode/encode values from/to the uint64 stack, users are encouraged to use
+// api.EncodeXXX or api.DecodeXXX functions. See the docs on api.ValueType.
+type GoModuleFunction interface {
+	Call(ctx context.Context, mod Module, stack []uint64)
+}
+
+// GoModuleFunc is a convenience for defining an inlined function.
+//
+// For example, the following returns an uint32 value read from parameter zero:
+//
+//	api.GoModuleFunc(func(ctx context.Context, mod api.Module, stack []uint64) {
+//		offset := api.DecodeU32(stack[0]) // read the parameter from the stack
+//
+//		ret, ok := mod.Memory().ReadUint32Le(offset)
+//		if !ok {
+//			panic("out of memory")
+//		}
+//
+//		stack[0] = api.EncodeU32(ret) // add the result back to the stack.
+//	})
+type GoModuleFunc func(ctx context.Context, mod Module, stack []uint64)
+
+// Call implements GoModuleFunction.Call.
+func (f GoModuleFunc) Call(ctx context.Context, mod Module, stack []uint64) {
+	f(ctx, mod, stack)
+}
+
+// GoFunction is an optimized form of GoModuleFunction which doesn't require
+// the Module parameter. See GoFunc for an example.
+//
+// For example, this function does not need to use the importing module's
+// memory or exported functions.
+type GoFunction interface {
+	Call(ctx context.Context, stack []uint64)
+}
+
+// GoFunc is a convenience for defining an inlined function.
+//
+// For example, the following returns the sum of two uint32 parameters:
+//
+//	api.GoFunc(func(ctx context.Context, stack []uint64) {
+//		x, y := api.DecodeU32(stack[0]), api.DecodeU32(stack[1])
+//		stack[0] = api.EncodeU32(x + y)
+//	})
+type GoFunc func(ctx context.Context, stack []uint64)
+
+// Call implements GoFunction.Call.
+func (f GoFunc) Call(ctx context.Context, stack []uint64) {
+	f(ctx, stack)
+}
+
+// Global is a WebAssembly 1.0 (20191205) global exported from an instantiated module (wazero.Runtime InstantiateModule).
+//
+// For example, if the value is not mutable, you can read it once:
+//
+//	offset := module.ExportedGlobal("memory.offset").Get()
+//
+// Globals are allowed by specification to be mutable. However, this can be disabled by configuration. When in doubt,
+// safe cast to find out if the value can change. Here's an example:
+//
+//	offset := module.ExportedGlobal("memory.offset")
+//	if _, ok := offset.(api.MutableGlobal); ok {
+//		// value can change
+//	} else {
+//		// value is constant
+//	}
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#globals%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type Global interface {
+	fmt.Stringer
+
+	// Type describes the numeric type of the global.
+	Type() ValueType
+
+	// Get returns the last known value of this global.
+	//
+	// See Type for how to decode this value to a Go type.
+	Get() uint64
+}
+
+// MutableGlobal is a Global whose value can be updated at runtime (variable).
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type MutableGlobal interface {
+	Global
+
+	// Set updates the value of this global.
+	//
+	// See Global.Type for how to encode this value from a Go type.
+	Set(v uint64)
+
+	internalapi.WazeroOnly
+}
+
+// Memory allows restricted access to a module's memory. Notably, this does not allow growing.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#storage%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - This includes all value types available in WebAssembly 1.0 (20191205) and all are encoded little-endian.
+type Memory interface {
+	// Definition is metadata about this memory from its defining module.
+	Definition() MemoryDefinition
+
+	// Size returns the memory size in bytes available.
+	// e.g. If the underlying memory has 1 page: 65536
+	//
+	// # Notes
+	//
+	//   - This overflows (returns zero) if the memory has the maximum 65536 pages.
+	// 	   As a workaround until wazero v2 to fix the return type, use Grow(0) to obtain the current pages and
+	//     multiply by 65536.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefsyntax-instr-memorymathsfmemorysize%E2%91%A0
+	Size() uint32
+
+	// Grow increases memory by the delta in pages (65536 bytes per page).
+	// The return val is the previous memory size in pages, or false if the
+	// delta was ignored as it exceeds MemoryDefinition.Max.
+	//
+	// # Notes
+	//
+	//   - This is the same as the "memory.grow" instruction defined in the
+	//	   WebAssembly Core Specification, except returns false instead of -1.
+	//   - When this returns true, any shared views via Read must be refreshed.
+	//
+	// See MemorySizer Read and https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#grow-mem
+	Grow(deltaPages uint32) (previousPages uint32, ok bool)
+
+	// ReadByte reads a single byte from the underlying buffer at the offset or returns false if out of range.
+	ReadByte(offset uint32) (byte, bool)
+
+	// ReadUint16Le reads a uint16 in little-endian encoding from the underlying buffer at the offset in or returns
+	// false if out of range.
+	ReadUint16Le(offset uint32) (uint16, bool)
+
+	// ReadUint32Le reads a uint32 in little-endian encoding from the underlying buffer at the offset in or returns
+	// false if out of range.
+	ReadUint32Le(offset uint32) (uint32, bool)
+
+	// ReadFloat32Le reads a float32 from 32 IEEE 754 little-endian encoded bits in the underlying buffer at the offset
+	// or returns false if out of range.
+	// See math.Float32bits
+	ReadFloat32Le(offset uint32) (float32, bool)
+
+	// ReadUint64Le reads a uint64 in little-endian encoding from the underlying buffer at the offset or returns false
+	// if out of range.
+	ReadUint64Le(offset uint32) (uint64, bool)
+
+	// ReadFloat64Le reads a float64 from 64 IEEE 754 little-endian encoded bits in the underlying buffer at the offset
+	// or returns false if out of range.
+	//
+	// See math.Float64bits
+	ReadFloat64Le(offset uint32) (float64, bool)
+
+	// Read reads byteCount bytes from the underlying buffer at the offset or
+	// returns false if out of range.
+	//
+	// For example, to search for a NUL-terminated string:
+	//	buf, _ = memory.Read(offset, byteCount)
+	//	n := bytes.IndexByte(buf, 0)
+	//	if n < 0 {
+	//		// Not found!
+	//	}
+	//
+	// Write-through
+	//
+	// This returns a view of the underlying memory, not a copy. This means any
+	// writes to the slice returned are visible to Wasm, and any updates from
+	// Wasm are visible reading the returned slice.
+	//
+	// For example:
+	//	buf, _ = memory.Read(offset, byteCount)
+	//	buf[1] = 'a' // writes through to memory, meaning Wasm code see 'a'.
+	//
+	// If you don't intend-write through, make a copy of the returned slice.
+	//
+	// When to refresh Read
+	//
+	// The returned slice disconnects on any capacity change. For example,
+	// `buf = append(buf, 'a')` might result in a slice that is no longer
+	// shared. The same exists Wasm side. For example, if Wasm changes its
+	// memory capacity, ex via "memory.grow"), the host slice is no longer
+	// shared. Those who need a stable view must set Wasm memory min=max, or
+	// use wazero.RuntimeConfig WithMemoryCapacityPages to ensure max is always
+	// allocated.
+	Read(offset, byteCount uint32) ([]byte, bool)
+
+	// WriteByte writes a single byte to the underlying buffer at the offset in or returns false if out of range.
+	WriteByte(offset uint32, v byte) bool
+
+	// WriteUint16Le writes the value in little-endian encoding to the underlying buffer at the offset in or returns
+	// false if out of range.
+	WriteUint16Le(offset uint32, v uint16) bool
+
+	// WriteUint32Le writes the value in little-endian encoding to the underlying buffer at the offset in or returns
+	// false if out of range.
+	WriteUint32Le(offset, v uint32) bool
+
+	// WriteFloat32Le writes the value in 32 IEEE 754 little-endian encoded bits to the underlying buffer at the offset
+	// or returns false if out of range.
+	//
+	// See math.Float32bits
+	WriteFloat32Le(offset uint32, v float32) bool
+
+	// WriteUint64Le writes the value in little-endian encoding to the underlying buffer at the offset in or returns
+	// false if out of range.
+	WriteUint64Le(offset uint32, v uint64) bool
+
+	// WriteFloat64Le writes the value in 64 IEEE 754 little-endian encoded bits to the underlying buffer at the offset
+	// or returns false if out of range.
+	//
+	// See math.Float64bits
+	WriteFloat64Le(offset uint32, v float64) bool
+
+	// Write writes the slice to the underlying buffer at the offset or returns false if out of range.
+	Write(offset uint32, v []byte) bool
+
+	// WriteString writes the string to the underlying buffer at the offset or returns false if out of range.
+	WriteString(offset uint32, v string) bool
+
+	internalapi.WazeroOnly
+}
+
+// CustomSection contains the name and raw data of a custom section.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type CustomSection interface {
+	// Name is the name of the custom section
+	Name() string
+	// Data is the raw data of the custom section
+	Data() []byte
+
+	internalapi.WazeroOnly
+}
+
+// EncodeExternref encodes the input as a ValueTypeExternref.
+//
+// See DecodeExternref
+func EncodeExternref(input uintptr) uint64 {
+	return uint64(input)
+}
+
+// DecodeExternref decodes the input as a ValueTypeExternref.
+//
+// See EncodeExternref
+func DecodeExternref(input uint64) uintptr {
+	return uintptr(input)
+}
+
+// EncodeI32 encodes the input as a ValueTypeI32.
+func EncodeI32(input int32) uint64 {
+	return uint64(uint32(input))
+}
+
+// DecodeI32 decodes the input as a ValueTypeI32.
+func DecodeI32(input uint64) int32 {
+	return int32(input)
+}
+
+// EncodeU32 encodes the input as a ValueTypeI32.
+func EncodeU32(input uint32) uint64 {
+	return uint64(input)
+}
+
+// DecodeU32 decodes the input as a ValueTypeI32.
+func DecodeU32(input uint64) uint32 {
+	return uint32(input)
+}
+
+// EncodeI64 encodes the input as a ValueTypeI64.
+func EncodeI64(input int64) uint64 {
+	return uint64(input)
+}
+
+// EncodeF32 encodes the input as a ValueTypeF32.
+//
+// See DecodeF32
+func EncodeF32(input float32) uint64 {
+	return uint64(math.Float32bits(input))
+}
+
+// DecodeF32 decodes the input as a ValueTypeF32.
+//
+// See EncodeF32
+func DecodeF32(input uint64) float32 {
+	return math.Float32frombits(uint32(input))
+}
+
+// EncodeF64 encodes the input as a ValueTypeF64.
+//
+// See EncodeF32
+func EncodeF64(input float64) uint64 {
+	return math.Float64bits(input)
+}
+
+// DecodeF64 decodes the input as a ValueTypeF64.
+//
+// See EncodeF64
+func DecodeF64(input uint64) float64 {
+	return math.Float64frombits(input)
+}
@@ -0,0 +1,367 @@
+package wazero
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// HostFunctionBuilder defines a host function (in Go), so that a
+// WebAssembly binary (e.g. %.wasm file) can import and use it.
+//
+// Here's an example of an addition function:
+//
+//	hostModuleBuilder.NewFunctionBuilder().
+//		WithFunc(func(cxt context.Context, x, y uint32) uint32 {
+//			return x + y
+//		}).
+//		Export("add")
+//
+// # Memory
+//
+// All host functions act on the importing api.Module, including any memory
+// exported in its binary (%.wasm file). If you are reading or writing memory,
+// it is sand-boxed Wasm memory defined by the guest.
+//
+// Below, `m` is the importing module, defined in Wasm. `fn` is a host function
+// added via Export. This means that `x` was read from memory defined in Wasm,
+// not arbitrary memory in the process.
+//
+//	fn := func(ctx context.Context, m api.Module, offset uint32) uint32 {
+//		x, _ := m.Memory().ReadUint32Le(ctx, offset)
+//		return x
+//	}
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type HostFunctionBuilder interface {
+	// WithGoFunction is an advanced feature for those who need higher
+	// performance than WithFunc at the cost of more complexity.
+	//
+	// Here's an example addition function:
+	//
+	//	builder.WithGoFunction(api.GoFunc(func(ctx context.Context, stack []uint64) {
+	//		x, y := api.DecodeI32(stack[0]), api.DecodeI32(stack[1])
+	//		sum := x + y
+	//		stack[0] = api.EncodeI32(sum)
+	//	}), []api.ValueType{api.ValueTypeI32, api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32})
+	//
+	// As you can see above, defining in this way implies knowledge of which
+	// WebAssembly api.ValueType is appropriate for each parameter and result.
+	//
+	// See WithGoModuleFunction if you also need to access the calling module.
+	WithGoFunction(fn api.GoFunction, params, results []api.ValueType) HostFunctionBuilder
+
+	// WithGoModuleFunction is an advanced feature for those who need higher
+	// performance than WithFunc at the cost of more complexity.
+	//
+	// Here's an example addition function that loads operands from memory:
+	//
+	//	builder.WithGoModuleFunction(api.GoModuleFunc(func(ctx context.Context, m api.Module, stack []uint64) {
+	//		mem := m.Memory()
+	//		offset := api.DecodeU32(stack[0])
+	//
+	//		x, _ := mem.ReadUint32Le(ctx, offset)
+	//		y, _ := mem.ReadUint32Le(ctx, offset + 4) // 32 bits == 4 bytes!
+	//		sum := x + y
+	//
+	//		stack[0] = api.EncodeU32(sum)
+	//	}), []api.ValueType{api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32})
+	//
+	// As you can see above, defining in this way implies knowledge of which
+	// WebAssembly api.ValueType is appropriate for each parameter and result.
+	//
+	// See WithGoFunction if you don't need access to the calling module.
+	WithGoModuleFunction(fn api.GoModuleFunction, params, results []api.ValueType) HostFunctionBuilder
+
+	// WithFunc uses reflect.Value to map a go `func` to a WebAssembly
+	// compatible Signature. An input that isn't a `func` will fail to
+	// instantiate.
+	//
+	// Here's an example of an addition function:
+	//
+	//	builder.WithFunc(func(cxt context.Context, x, y uint32) uint32 {
+	//		return x + y
+	//	})
+	//
+	// # Defining a function
+	//
+	// Except for the context.Context and optional api.Module, all parameters
+	// or result types must map to WebAssembly numeric value types. This means
+	// uint32, int32, uint64, int64, float32 or float64.
+	//
+	// api.Module may be specified as the second parameter, usually to access
+	// memory. This is important because there are only numeric types in Wasm.
+	// The only way to share other data is via writing memory and sharing
+	// offsets.
+	//
+	//	builder.WithFunc(func(ctx context.Context, m api.Module, offset uint32) uint32 {
+	//		mem := m.Memory()
+	//		x, _ := mem.ReadUint32Le(ctx, offset)
+	//		y, _ := mem.ReadUint32Le(ctx, offset + 4) // 32 bits == 4 bytes!
+	//		return x + y
+	//	})
+	//
+	// This example propagates context properly when calling other functions
+	// exported in the api.Module:
+	//
+	//	builder.WithFunc(func(ctx context.Context, m api.Module, offset, byteCount uint32) uint32 {
+	//		fn = m.ExportedFunction("__read")
+	//		results, err := fn(ctx, offset, byteCount)
+	//	--snip--
+	WithFunc(interface{}) HostFunctionBuilder
+
+	// WithName defines the optional module-local name of this function, e.g.
+	// "random_get"
+	//
+	// Note: This is not required to match the Export name.
+	WithName(name string) HostFunctionBuilder
+
+	// WithParameterNames defines optional parameter names of the function
+	// signature, e.x. "buf", "buf_len"
+	//
+	// Note: When defined, names must be provided for all parameters.
+	WithParameterNames(names ...string) HostFunctionBuilder
+
+	// WithResultNames defines optional result names of the function
+	// signature, e.x. "errno"
+	//
+	// Note: When defined, names must be provided for all results.
+	WithResultNames(names ...string) HostFunctionBuilder
+
+	// Export exports this to the HostModuleBuilder as the given name, e.g.
+	// "random_get"
+	Export(name string) HostModuleBuilder
+}
+
+// HostModuleBuilder is a way to define host functions (in Go), so that a
+// WebAssembly binary (e.g. %.wasm file) can import and use them.
+//
+// Specifically, this implements the host side of an Application Binary
+// Interface (ABI) like WASI or AssemblyScript.
+//
+// For example, this defines and instantiates a module named "env" with one
+// function:
+//
+//	ctx := context.Background()
+//	r := wazero.NewRuntime(ctx)
+//	defer r.Close(ctx) // This closes everything this Runtime created.
+//
+//	hello := func() {
+//		println("hello!")
+//	}
+//	env, _ := r.NewHostModuleBuilder("env").
+//		NewFunctionBuilder().WithFunc(hello).Export("hello").
+//		Instantiate(ctx)
+//
+// If the same module may be instantiated multiple times, it is more efficient
+// to separate steps. Here's an example:
+//
+//	compiled, _ := r.NewHostModuleBuilder("env").
+//		NewFunctionBuilder().WithFunc(getRandomString).Export("get_random_string").
+//		Compile(ctx)
+//
+//	env1, _ := r.InstantiateModule(ctx, compiled, wazero.NewModuleConfig().WithName("env.1"))
+//	env2, _ := r.InstantiateModule(ctx, compiled, wazero.NewModuleConfig().WithName("env.2"))
+//
+// See HostFunctionBuilder for valid host function signatures and other details.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - HostModuleBuilder is mutable: each method returns the same instance for
+//     chaining.
+//   - methods do not return errors, to allow chaining. Any validation errors
+//     are deferred until Compile.
+//   - Functions are indexed in order of calls to NewFunctionBuilder as
+//     insertion ordering is needed by ABI such as Emscripten (invoke_*).
+//   - The semantics of host functions assumes the existence of an "importing module" because, for example, the host function needs access to
+//     the memory of the importing module. Therefore, direct use of ExportedFunction is forbidden for host modules.
+//     Practically speaking, it is usually meaningless to directly call a host function from Go code as it is already somewhere in Go code.
+type HostModuleBuilder interface {
+	// Note: until golang/go#5860, we can't use example tests to embed code in interface godocs.
+
+	// NewFunctionBuilder begins the definition of a host function.
+	NewFunctionBuilder() HostFunctionBuilder
+
+	// Compile returns a CompiledModule that can be instantiated by Runtime.
+	Compile(context.Context) (CompiledModule, error)
+
+	// Instantiate is a convenience that calls Compile, then Runtime.InstantiateModule.
+	// This can fail for reasons documented on Runtime.InstantiateModule.
+	//
+	// Here's an example:
+	//
+	//	ctx := context.Background()
+	//	r := wazero.NewRuntime(ctx)
+	//	defer r.Close(ctx) // This closes everything this Runtime created.
+	//
+	//	hello := func() {
+	//		println("hello!")
+	//	}
+	//	env, _ := r.NewHostModuleBuilder("env").
+	//		NewFunctionBuilder().WithFunc(hello).Export("hello").
+	//		Instantiate(ctx)
+	//
+	// # Notes
+	//
+	//   - Closing the Runtime has the same effect as closing the result.
+	//   - Fields in the builder are copied during instantiation: Later changes do not affect the instantiated result.
+	//   - To avoid using configuration defaults, use Compile instead.
+	Instantiate(context.Context) (api.Module, error)
+}
+
+// hostModuleBuilder implements HostModuleBuilder
+type hostModuleBuilder struct {
+	r              *runtime
+	moduleName     string
+	exportNames    []string
+	nameToHostFunc map[string]*wasm.HostFunc
+}
+
+// NewHostModuleBuilder implements Runtime.NewHostModuleBuilder
+func (r *runtime) NewHostModuleBuilder(moduleName string) HostModuleBuilder {
+	return &hostModuleBuilder{
+		r:              r,
+		moduleName:     moduleName,
+		nameToHostFunc: map[string]*wasm.HostFunc{},
+	}
+}
+
+// hostFunctionBuilder implements HostFunctionBuilder
+type hostFunctionBuilder struct {
+	b           *hostModuleBuilder
+	fn          interface{}
+	name        string
+	paramNames  []string
+	resultNames []string
+}
+
+// WithGoFunction implements HostFunctionBuilder.WithGoFunction
+func (h *hostFunctionBuilder) WithGoFunction(fn api.GoFunction, params, results []api.ValueType) HostFunctionBuilder {
+	h.fn = &wasm.HostFunc{ParamTypes: params, ResultTypes: results, Code: wasm.Code{GoFunc: fn}}
+	return h
+}
+
+// WithGoModuleFunction implements HostFunctionBuilder.WithGoModuleFunction
+func (h *hostFunctionBuilder) WithGoModuleFunction(fn api.GoModuleFunction, params, results []api.ValueType) HostFunctionBuilder {
+	h.fn = &wasm.HostFunc{ParamTypes: params, ResultTypes: results, Code: wasm.Code{GoFunc: fn}}
+	return h
+}
+
+// WithFunc implements HostFunctionBuilder.WithFunc
+func (h *hostFunctionBuilder) WithFunc(fn interface{}) HostFunctionBuilder {
+	h.fn = fn
+	return h
+}
+
+// WithName implements HostFunctionBuilder.WithName
+func (h *hostFunctionBuilder) WithName(name string) HostFunctionBuilder {
+	h.name = name
+	return h
+}
+
+// WithParameterNames implements HostFunctionBuilder.WithParameterNames
+func (h *hostFunctionBuilder) WithParameterNames(names ...string) HostFunctionBuilder {
+	h.paramNames = names
+	return h
+}
+
+// WithResultNames implements HostFunctionBuilder.WithResultNames
+func (h *hostFunctionBuilder) WithResultNames(names ...string) HostFunctionBuilder {
+	h.resultNames = names
+	return h
+}
+
+// Export implements HostFunctionBuilder.Export
+func (h *hostFunctionBuilder) Export(exportName string) HostModuleBuilder {
+	var hostFn *wasm.HostFunc
+	if fn, ok := h.fn.(*wasm.HostFunc); ok {
+		hostFn = fn
+	} else {
+		hostFn = &wasm.HostFunc{Code: wasm.Code{GoFunc: h.fn}}
+	}
+
+	// Assign any names from the builder
+	hostFn.ExportName = exportName
+	if h.name != "" {
+		hostFn.Name = h.name
+	}
+	if len(h.paramNames) != 0 {
+		hostFn.ParamNames = h.paramNames
+	}
+	if len(h.resultNames) != 0 {
+		hostFn.ResultNames = h.resultNames
+	}
+
+	h.b.ExportHostFunc(hostFn)
+	return h.b
+}
+
+// ExportHostFunc implements wasm.HostFuncExporter
+func (b *hostModuleBuilder) ExportHostFunc(fn *wasm.HostFunc) {
+	if _, ok := b.nameToHostFunc[fn.ExportName]; !ok { // add a new name
+		b.exportNames = append(b.exportNames, fn.ExportName)
+	}
+	b.nameToHostFunc[fn.ExportName] = fn
+}
+
+// NewFunctionBuilder implements HostModuleBuilder.NewFunctionBuilder
+func (b *hostModuleBuilder) NewFunctionBuilder() HostFunctionBuilder {
+	return &hostFunctionBuilder{b: b}
+}
+
+// Compile implements HostModuleBuilder.Compile
+func (b *hostModuleBuilder) Compile(ctx context.Context) (CompiledModule, error) {
+	module, err := wasm.NewHostModule(b.moduleName, b.exportNames, b.nameToHostFunc, b.r.enabledFeatures)
+	if err != nil {
+		return nil, err
+	} else if err = module.Validate(b.r.enabledFeatures); err != nil {
+		return nil, err
+	}
+
+	c := &compiledModule{module: module, compiledEngine: b.r.store.Engine}
+	listeners, err := buildFunctionListeners(ctx, module)
+	if err != nil {
+		return nil, err
+	}
+
+	if err = b.r.store.Engine.CompileModule(ctx, module, listeners, false); err != nil {
+		return nil, err
+	}
+
+	// typeIDs are static and compile-time known.
+	typeIDs, err := b.r.store.GetFunctionTypeIDs(module.TypeSection)
+	if err != nil {
+		return nil, err
+	}
+	c.typeIDs = typeIDs
+
+	return c, nil
+}
+
+// hostModuleInstance is a wrapper around api.Module that prevents calling ExportedFunction.
+type hostModuleInstance struct{ api.Module }
+
+// ExportedFunction implements api.Module ExportedFunction.
+func (h hostModuleInstance) ExportedFunction(name string) api.Function {
+	panic("calling ExportedFunction is forbidden on host modules. See the note on ExportedFunction interface")
+}
+
+// Instantiate implements HostModuleBuilder.Instantiate
+func (b *hostModuleBuilder) Instantiate(ctx context.Context) (api.Module, error) {
+	if compiled, err := b.Compile(ctx); err != nil {
+		return nil, err
+	} else {
+		compiled.(*compiledModule).closeWithModule = true
+		m, err := b.r.InstantiateModule(ctx, compiled, NewModuleConfig())
+		if err != nil {
+			return nil, err
+		}
+		return hostModuleInstance{m}, nil
+	}
+}
@@ -0,0 +1,123 @@
+package wazero
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	goruntime "runtime"
+	"sync"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/filecache"
+	"github.com/tetratelabs/wazero/internal/version"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// CompilationCache reduces time spent compiling (Runtime.CompileModule) the same wasm module.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - Instances of this can be reused across multiple runtimes, if configured
+//     via RuntimeConfig.
+//   - The cache check happens before the compilation, so if multiple Goroutines are
+//     trying to compile the same module simultaneously, it is possible that they
+//     all compile the module. The design here is that the lock isn't held for the action "Compile"
+//     but only for checking and saving the compiled result. Therefore, we strongly recommend that the embedder
+//     does the centralized compilation in a single Goroutines (or multiple Goroutines per Wasm binary) to generate cache rather than
+//     trying to Compile in parallel for a single module. In other words, we always recommend to produce CompiledModule
+//     share it across multiple Goroutines to avoid trying to compile the same module simultaneously.
+type CompilationCache interface{ api.Closer }
+
+// NewCompilationCache returns a new CompilationCache to be passed to RuntimeConfig.
+// This configures only in-memory cache, and doesn't persist to the file system. See wazero.NewCompilationCacheWithDir for detail.
+//
+// The returned CompilationCache can be used to share the in-memory compilation results across multiple instances of wazero.Runtime.
+func NewCompilationCache() CompilationCache {
+	return &cache{}
+}
+
+// NewCompilationCacheWithDir is like wazero.NewCompilationCache except the result also writes
+// state into the directory specified by `dirname` parameter.
+//
+// If the dirname doesn't exist, this creates it or returns an error.
+//
+// Those running wazero as a CLI or frequently restarting a process using the same wasm should
+// use this feature to reduce time waiting to compile the same module a second time.
+//
+// The contents written into dirname are wazero-version specific, meaning different versions of
+// wazero will duplicate entries for the same input wasm.
+//
+// Note: The embedder must safeguard this directory from external changes.
+func NewCompilationCacheWithDir(dirname string) (CompilationCache, error) {
+	c := &cache{}
+	err := c.ensuresFileCache(dirname, version.GetWazeroVersion())
+	return c, err
+}
+
+// cache implements Cache interface.
+type cache struct {
+	// eng is the engine for this cache. If the cache is configured, the engine is shared across multiple instances of
+	// Runtime, and its lifetime is not bound to them. Instead, the engine is alive until Cache.Close is called.
+	engs      [engineKindCount]wasm.Engine
+	fileCache filecache.Cache
+	initOnces [engineKindCount]sync.Once
+}
+
+func (c *cache) initEngine(ek engineKind, ne newEngine, ctx context.Context, features api.CoreFeatures) wasm.Engine {
+	c.initOnces[ek].Do(func() { c.engs[ek] = ne(ctx, features, c.fileCache) })
+	return c.engs[ek]
+}
+
+// Close implements the same method on the Cache interface.
+func (c *cache) Close(_ context.Context) (err error) {
+	for _, eng := range c.engs {
+		if eng != nil {
+			if err = eng.Close(); err != nil {
+				return
+			}
+		}
+	}
+	return
+}
+
+func (c *cache) ensuresFileCache(dir string, wazeroVersion string) error {
+	// Resolve a potentially relative directory into an absolute one.
+	var err error
+	dir, err = filepath.Abs(dir)
+	if err != nil {
+		return err
+	}
+
+	// Ensure the user-supplied directory.
+	if err = mkdir(dir); err != nil {
+		return err
+	}
+
+	// Create a version-specific directory to avoid conflicts.
+	dirname := path.Join(dir, "wazero-"+wazeroVersion+"-"+goruntime.GOARCH+"-"+goruntime.GOOS)
+	if err = mkdir(dirname); err != nil {
+		return err
+	}
+
+	c.fileCache = filecache.New(dirname)
+	return nil
+}
+
+func mkdir(dirname string) error {
+	if st, err := os.Stat(dirname); errors.Is(err, os.ErrNotExist) {
+		// If the directory not found, create the cache dir.
+		if err = os.MkdirAll(dirname, 0o700); err != nil {
+			return fmt.Errorf("create directory %s: %v", dirname, err)
+		}
+	} else if err != nil {
+		return err
+	} else if !st.IsDir() {
+		return fmt.Errorf("%s is not dir", dirname)
+	}
+	return nil
+}
@@ -0,0 +1,9 @@
+# Codecov for main is visible here https://app.codecov.io/gh/tetratelabs/wazero
+
+# We use codecov only as a UI, so we disable PR comments and commit status.
+# See https://docs.codecov.com/docs/pull-request-comments
+comment: false
+coverage:
+  status:
+    project: off
+    patch: off
@@ -0,0 +1,899 @@
+package wazero
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"math"
+	"net"
+	"time"
+
+	"github.com/tetratelabs/wazero/api"
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/filecache"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+	"github.com/tetratelabs/wazero/internal/platform"
+	internalsock "github.com/tetratelabs/wazero/internal/sock"
+	internalsys "github.com/tetratelabs/wazero/internal/sys"
+	"github.com/tetratelabs/wazero/internal/wasm"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// RuntimeConfig controls runtime behavior, with the default implementation as
+// NewRuntimeConfig
+//
+// The example below explicitly limits to Wasm Core 1.0 features as opposed to
+// relying on defaults:
+//
+//	rConfig = wazero.NewRuntimeConfig().WithCoreFeatures(api.CoreFeaturesV1)
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - RuntimeConfig is immutable. Each WithXXX function returns a new instance
+//     including the corresponding change.
+type RuntimeConfig interface {
+	// WithCoreFeatures sets the WebAssembly Core specification features this
+	// runtime supports. Defaults to api.CoreFeaturesV2.
+	//
+	// Example of disabling a specific feature:
+	//	features := api.CoreFeaturesV2.SetEnabled(api.CoreFeatureMutableGlobal, false)
+	//	rConfig = wazero.NewRuntimeConfig().WithCoreFeatures(features)
+	//
+	// # Why default to version 2.0?
+	//
+	// Many compilers that target WebAssembly require features after
+	// api.CoreFeaturesV1 by default. For example, TinyGo v0.24+ requires
+	// api.CoreFeatureBulkMemoryOperations. To avoid runtime errors, wazero
+	// defaults to api.CoreFeaturesV2, even though it is not yet a Web
+	// Standard (REC).
+	WithCoreFeatures(api.CoreFeatures) RuntimeConfig
+
+	// WithMemoryLimitPages overrides the maximum pages allowed per memory. The
+	// default is 65536, allowing 4GB total memory per instance if the maximum is
+	// not encoded in a Wasm binary. Setting a value larger than default will panic.
+	//
+	// This example reduces the largest possible memory size from 4GB to 128KB:
+	//	rConfig = wazero.NewRuntimeConfig().WithMemoryLimitPages(2)
+	//
+	// Note: Wasm has 32-bit memory and each page is 65536 (2^16) bytes. This
+	// implies a max of 65536 (2^16) addressable pages.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#grow-mem
+	WithMemoryLimitPages(memoryLimitPages uint32) RuntimeConfig
+
+	// WithMemoryCapacityFromMax eagerly allocates max memory, unless max is
+	// not defined. The default is false, which means minimum memory is
+	// allocated and any call to grow memory results in re-allocations.
+	//
+	// This example ensures any memory.grow instruction will never re-allocate:
+	//	rConfig = wazero.NewRuntimeConfig().WithMemoryCapacityFromMax(true)
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#grow-mem
+	//
+	// Note: if the memory maximum is not encoded in a Wasm binary, this
+	// results in allocating 4GB. See the doc on WithMemoryLimitPages for detail.
+	WithMemoryCapacityFromMax(memoryCapacityFromMax bool) RuntimeConfig
+
+	// WithDebugInfoEnabled toggles DWARF based stack traces in the face of
+	// runtime errors. Defaults to true.
+	//
+	// Those who wish to disable this, can like so:
+	//
+	//	r := wazero.NewRuntimeWithConfig(wazero.NewRuntimeConfig().WithDebugInfoEnabled(false)
+	//
+	// When disabled, a stack trace message looks like:
+	//
+	//	wasm stack trace:
+	//		.runtime._panic(i32)
+	//		.myFunc()
+	//		.main.main()
+	//		.runtime.run()
+	//		._start()
+	//
+	// When enabled, the stack trace includes source code information:
+	//
+	//	wasm stack trace:
+	//		.runtime._panic(i32)
+	//		  0x16e2: /opt/homebrew/Cellar/tinygo/0.26.0/src/runtime/runtime_tinygowasm.go:73:6
+	//		.myFunc()
+	//		  0x190b: /Users/XXXXX/wazero/internal/testing/dwarftestdata/testdata/main.go:19:7
+	//		.main.main()
+	//		  0x18ed: /Users/XXXXX/wazero/internal/testing/dwarftestdata/testdata/main.go:4:3
+	//		.runtime.run()
+	//		  0x18cc: /opt/homebrew/Cellar/tinygo/0.26.0/src/runtime/scheduler_none.go:26:10
+	//		._start()
+	//		  0x18b6: /opt/homebrew/Cellar/tinygo/0.26.0/src/runtime/runtime_wasm_wasi.go:22:5
+	//
+	// Note: This only takes into effect when the original Wasm binary has the
+	// DWARF "custom sections" that are often stripped, depending on
+	// optimization flags passed to the compiler.
+	WithDebugInfoEnabled(bool) RuntimeConfig
+
+	// WithCompilationCache configures how runtime caches the compiled modules. In the default configuration, compilation results are
+	// only in-memory until Runtime.Close is closed, and not shareable by multiple Runtime.
+	//
+	// Below defines the shared cache across multiple instances of Runtime:
+	//
+	//	// Creates the new Cache and the runtime configuration with it.
+	//	cache := wazero.NewCompilationCache()
+	//	defer cache.Close()
+	//	config := wazero.NewRuntimeConfig().WithCompilationCache(c)
+	//
+	//	// Creates two runtimes while sharing compilation caches.
+	//	foo := wazero.NewRuntimeWithConfig(context.Background(), config)
+	// 	bar := wazero.NewRuntimeWithConfig(context.Background(), config)
+	//
+	// # Cache Key
+	//
+	// Cached files are keyed on the version of wazero. This is obtained from go.mod of your application,
+	// and we use it to verify the compatibility of caches against the currently-running wazero.
+	// However, if you use this in tests of a package not named as `main`, then wazero cannot obtain the correct
+	// version of wazero due to the known issue of debug.BuildInfo function: https://github.com/golang/go/issues/33976.
+	// As a consequence, your cache won't contain the correct version information and always be treated as `dev` version.
+	// To avoid this issue, you can pass -ldflags "-X github.com/tetratelabs/wazero/internal/version.version=foo" when running tests.
+	WithCompilationCache(CompilationCache) RuntimeConfig
+
+	// WithCustomSections toggles parsing of "custom sections". Defaults to false.
+	//
+	// When enabled, it is possible to retrieve custom sections from a CompiledModule:
+	//
+	//	config := wazero.NewRuntimeConfig().WithCustomSections(true)
+	//	r := wazero.NewRuntimeWithConfig(ctx, config)
+	//	c, err := r.CompileModule(ctx, wasm)
+	//	customSections := c.CustomSections()
+	WithCustomSections(bool) RuntimeConfig
+
+	// WithCloseOnContextDone ensures the executions of functions to be terminated under one of the following circumstances:
+	//
+	// 	- context.Context passed to the Call method of api.Function is canceled during execution. (i.e. ctx by context.WithCancel)
+	// 	- context.Context passed to the Call method of api.Function reaches timeout during execution. (i.e. ctx by context.WithTimeout or context.WithDeadline)
+	// 	- Close or CloseWithExitCode of api.Module is explicitly called during execution.
+	//
+	// This is especially useful when one wants to run untrusted Wasm binaries since otherwise, any invocation of
+	// api.Function can potentially block the corresponding Goroutine forever. Moreover, it might block the
+	// entire underlying OS thread which runs the api.Function call. See "Why it's safe to execute runtime-generated
+	// machine codes against async Goroutine preemption" section in RATIONALE.md for detail.
+	//
+	// Upon the termination of the function executions, api.Module is closed.
+	//
+	// Note that this comes with a bit of extra cost when enabled. The reason is that internally this forces
+	// interpreter and compiler runtimes to insert the periodical checks on the conditions above. For that reason,
+	// this is disabled by default.
+	//
+	// See examples in context_done_example_test.go for the end-to-end demonstrations.
+	//
+	// When the invocations of api.Function are closed due to this, sys.ExitError is raised to the callers and
+	// the api.Module from which the functions are derived is made closed.
+	WithCloseOnContextDone(bool) RuntimeConfig
+}
+
+// NewRuntimeConfig returns a RuntimeConfig using the compiler if it is supported in this environment,
+// or the interpreter otherwise.
+func NewRuntimeConfig() RuntimeConfig {
+	ret := engineLessConfig.clone()
+	ret.engineKind = engineKindAuto
+	return ret
+}
+
+type newEngine func(context.Context, api.CoreFeatures, filecache.Cache) wasm.Engine
+
+type runtimeConfig struct {
+	enabledFeatures       api.CoreFeatures
+	memoryLimitPages      uint32
+	memoryCapacityFromMax bool
+	engineKind            engineKind
+	dwarfDisabled         bool // negative as defaults to enabled
+	newEngine             newEngine
+	cache                 CompilationCache
+	storeCustomSections   bool
+	ensureTermination     bool
+}
+
+// engineLessConfig helps avoid copy/pasting the wrong defaults.
+var engineLessConfig = &runtimeConfig{
+	enabledFeatures:       api.CoreFeaturesV2,
+	memoryLimitPages:      wasm.MemoryLimitPages,
+	memoryCapacityFromMax: false,
+	dwarfDisabled:         false,
+}
+
+type engineKind int
+
+const (
+	engineKindAuto engineKind = iota - 1
+	engineKindCompiler
+	engineKindInterpreter
+	engineKindCount
+)
+
+// NewRuntimeConfigCompiler compiles WebAssembly modules into
+// runtime.GOARCH-specific assembly for optimal performance.
+//
+// The default implementation is AOT (Ahead of Time) compilation, applied at
+// Runtime.CompileModule. This allows consistent runtime performance, as well
+// the ability to reduce any first request penalty.
+//
+// Note: While this is technically AOT, this does not imply any action on your
+// part. wazero automatically performs ahead-of-time compilation as needed when
+// Runtime.CompileModule is invoked.
+//
+// # Warning
+//
+//   - This panics at runtime if the runtime.GOOS or runtime.GOARCH does not
+//     support compiler. Use NewRuntimeConfig to safely detect and fallback to
+//     NewRuntimeConfigInterpreter if needed.
+//
+//   - If you are using wazero in buildmode=c-archive or c-shared, make sure that you set up the alternate signal stack
+//     by using, e.g. `sigaltstack` combined with `SA_ONSTACK` flag on `sigaction` on Linux,
+//     before calling any api.Function. This is because the Go runtime does not set up the alternate signal stack
+//     for c-archive or c-shared modes, and wazero uses the different stack than the calling Goroutine.
+//     Hence, the signal handler might get invoked on the wazero's stack, which may cause a stack overflow.
+//     https://github.com/tetratelabs/wazero/blob/2092c0a879f30d49d7b37f333f4547574b8afe0d/internal/integration_test/fuzz/fuzz/tests/sigstack.rs#L19-L36
+func NewRuntimeConfigCompiler() RuntimeConfig {
+	ret := engineLessConfig.clone()
+	ret.engineKind = engineKindCompiler
+	return ret
+}
+
+// NewRuntimeConfigInterpreter interprets WebAssembly modules instead of compiling them into assembly.
+func NewRuntimeConfigInterpreter() RuntimeConfig {
+	ret := engineLessConfig.clone()
+	ret.engineKind = engineKindInterpreter
+	return ret
+}
+
+// clone makes a deep copy of this runtime config.
+func (c *runtimeConfig) clone() *runtimeConfig {
+	ret := *c // copy except maps which share a ref
+	return &ret
+}
+
+// WithCoreFeatures implements RuntimeConfig.WithCoreFeatures
+func (c *runtimeConfig) WithCoreFeatures(features api.CoreFeatures) RuntimeConfig {
+	ret := c.clone()
+	ret.enabledFeatures = features
+	return ret
+}
+
+// WithCloseOnContextDone implements RuntimeConfig.WithCloseOnContextDone
+func (c *runtimeConfig) WithCloseOnContextDone(ensure bool) RuntimeConfig {
+	ret := c.clone()
+	ret.ensureTermination = ensure
+	return ret
+}
+
+// WithMemoryLimitPages implements RuntimeConfig.WithMemoryLimitPages
+func (c *runtimeConfig) WithMemoryLimitPages(memoryLimitPages uint32) RuntimeConfig {
+	ret := c.clone()
+	// This panics instead of returning an error as it is unlikely.
+	if memoryLimitPages > wasm.MemoryLimitPages {
+		panic(fmt.Errorf("memoryLimitPages invalid: %d > %d", memoryLimitPages, wasm.MemoryLimitPages))
+	}
+	ret.memoryLimitPages = memoryLimitPages
+	return ret
+}
+
+// WithCompilationCache implements RuntimeConfig.WithCompilationCache
+func (c *runtimeConfig) WithCompilationCache(ca CompilationCache) RuntimeConfig {
+	ret := c.clone()
+	ret.cache = ca
+	return ret
+}
+
+// WithMemoryCapacityFromMax implements RuntimeConfig.WithMemoryCapacityFromMax
+func (c *runtimeConfig) WithMemoryCapacityFromMax(memoryCapacityFromMax bool) RuntimeConfig {
+	ret := c.clone()
+	ret.memoryCapacityFromMax = memoryCapacityFromMax
+	return ret
+}
+
+// WithDebugInfoEnabled implements RuntimeConfig.WithDebugInfoEnabled
+func (c *runtimeConfig) WithDebugInfoEnabled(dwarfEnabled bool) RuntimeConfig {
+	ret := c.clone()
+	ret.dwarfDisabled = !dwarfEnabled
+	return ret
+}
+
+// WithCustomSections implements RuntimeConfig.WithCustomSections
+func (c *runtimeConfig) WithCustomSections(storeCustomSections bool) RuntimeConfig {
+	ret := c.clone()
+	ret.storeCustomSections = storeCustomSections
+	return ret
+}
+
+// CompiledModule is a WebAssembly module ready to be instantiated (Runtime.InstantiateModule) as an api.Module.
+//
+// In WebAssembly terminology, this is a decoded, validated, and possibly also compiled module. wazero avoids using
+// the name "Module" for both before and after instantiation as the name conflation has caused confusion.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#semantic-phases%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - Closing the wazero.Runtime closes any CompiledModule it compiled.
+type CompiledModule interface {
+	// Name returns the module name encoded into the binary or empty if not.
+	Name() string
+
+	// ImportedFunctions returns all the imported functions
+	// (api.FunctionDefinition) in this module or nil if there are none.
+	//
+	// Note: Unlike ExportedFunctions, there is no unique constraint on
+	// imports.
+	ImportedFunctions() []api.FunctionDefinition
+
+	// ExportedFunctions returns all the exported functions
+	// (api.FunctionDefinition) in this module keyed on export name.
+	ExportedFunctions() map[string]api.FunctionDefinition
+
+	// ImportedMemories returns all the imported memories
+	// (api.MemoryDefinition) in this module or nil if there are none.
+	//
+	// ## Notes
+	//   - As of WebAssembly Core Specification 2.0, there can be at most one
+	//     memory.
+	//   - Unlike ExportedMemories, there is no unique constraint on imports.
+	ImportedMemories() []api.MemoryDefinition
+
+	// ExportedMemories returns all the exported memories
+	// (api.MemoryDefinition) in this module keyed on export name.
+	//
+	// Note: As of WebAssembly Core Specification 2.0, there can be at most one
+	// memory.
+	ExportedMemories() map[string]api.MemoryDefinition
+
+	// CustomSections returns all the custom sections
+	// (api.CustomSection) in this module keyed on the section name.
+	CustomSections() []api.CustomSection
+
+	// Close releases all the allocated resources for this CompiledModule.
+	//
+	// Note: It is safe to call Close while having outstanding calls from an
+	// api.Module instantiated from this.
+	Close(context.Context) error
+}
+
+// compile-time check to ensure compiledModule implements CompiledModule
+var _ CompiledModule = &compiledModule{}
+
+type compiledModule struct {
+	module *wasm.Module
+	// compiledEngine holds an engine on which `module` is compiled.
+	compiledEngine wasm.Engine
+	// closeWithModule prevents leaking compiled code when a module is compiled implicitly.
+	closeWithModule bool
+	typeIDs         []wasm.FunctionTypeID
+}
+
+// Name implements CompiledModule.Name
+func (c *compiledModule) Name() (moduleName string) {
+	if ns := c.module.NameSection; ns != nil {
+		moduleName = ns.ModuleName
+	}
+	return
+}
+
+// Close implements CompiledModule.Close
+func (c *compiledModule) Close(context.Context) error {
+	c.compiledEngine.DeleteCompiledModule(c.module)
+	// It is possible the underlying may need to return an error later, but in any case this matches api.Module.Close.
+	return nil
+}
+
+// ImportedFunctions implements CompiledModule.ImportedFunctions
+func (c *compiledModule) ImportedFunctions() []api.FunctionDefinition {
+	return c.module.ImportedFunctions()
+}
+
+// ExportedFunctions implements CompiledModule.ExportedFunctions
+func (c *compiledModule) ExportedFunctions() map[string]api.FunctionDefinition {
+	return c.module.ExportedFunctions()
+}
+
+// ImportedMemories implements CompiledModule.ImportedMemories
+func (c *compiledModule) ImportedMemories() []api.MemoryDefinition {
+	return c.module.ImportedMemories()
+}
+
+// ExportedMemories implements CompiledModule.ExportedMemories
+func (c *compiledModule) ExportedMemories() map[string]api.MemoryDefinition {
+	return c.module.ExportedMemories()
+}
+
+// CustomSections implements CompiledModule.CustomSections
+func (c *compiledModule) CustomSections() []api.CustomSection {
+	ret := make([]api.CustomSection, len(c.module.CustomSections))
+	for i, d := range c.module.CustomSections {
+		ret[i] = &customSection{data: d.Data, name: d.Name}
+	}
+	return ret
+}
+
+// customSection implements wasm.CustomSection
+type customSection struct {
+	internalapi.WazeroOnlyType
+	name string
+	data []byte
+}
+
+// Name implements wasm.CustomSection.Name
+func (c *customSection) Name() string {
+	return c.name
+}
+
+// Data implements wasm.CustomSection.Data
+func (c *customSection) Data() []byte {
+	return c.data
+}
+
+// ModuleConfig configures resources needed by functions that have low-level interactions with the host operating
+// system. Using this, resources such as STDIN can be isolated, so that the same module can be safely instantiated
+// multiple times.
+//
+// Here's an example:
+//
+//	// Initialize base configuration:
+//	config := wazero.NewModuleConfig().WithStdout(buf).WithSysNanotime()
+//
+//	// Assign different configuration on each instantiation
+//	mod, _ := r.InstantiateModule(ctx, compiled, config.WithName("rotate").WithArgs("rotate", "angle=90", "dir=cw"))
+//
+// While wazero supports Windows as a platform, host functions using ModuleConfig follow a UNIX dialect.
+// See RATIONALE.md for design background and relationship to WebAssembly System Interfaces (WASI).
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - ModuleConfig is immutable. Each WithXXX function returns a new instance
+//     including the corresponding change.
+type ModuleConfig interface {
+	// WithArgs assigns command-line arguments visible to an imported function that reads an arg vector (argv). Defaults to
+	// none. Runtime.InstantiateModule errs if any arg is empty.
+	//
+	// These values are commonly read by the functions like "args_get" in "wasi_snapshot_preview1" although they could be
+	// read by functions imported from other modules.
+	//
+	// Similar to os.Args and exec.Cmd Env, many implementations would expect a program name to be argv[0]. However, neither
+	// WebAssembly nor WebAssembly System Interfaces (WASI) define this. Regardless, you may choose to set the first
+	// argument to the same value set via WithName.
+	//
+	// Note: This does not default to os.Args as that violates sandboxing.
+	//
+	// See https://linux.die.net/man/3/argv and https://en.wikipedia.org/wiki/Null-terminated_string
+	WithArgs(...string) ModuleConfig
+
+	// WithEnv sets an environment variable visible to a Module that imports functions. Defaults to none.
+	// Runtime.InstantiateModule errs if the key is empty or contains a NULL(0) or equals("") character.
+	//
+	// Validation is the same as os.Setenv on Linux and replaces any existing value. Unlike exec.Cmd Env, this does not
+	// default to the current process environment as that would violate sandboxing. This also does not preserve order.
+	//
+	// Environment variables are commonly read by the functions like "environ_get" in "wasi_snapshot_preview1" although
+	// they could be read by functions imported from other modules.
+	//
+	// While similar to process configuration, there are no assumptions that can be made about anything OS-specific. For
+	// example, neither WebAssembly nor WebAssembly System Interfaces (WASI) define concerns processes have, such as
+	// case-sensitivity on environment keys. For portability, define entries with case-insensitively unique keys.
+	//
+	// See https://linux.die.net/man/3/environ and https://en.wikipedia.org/wiki/Null-terminated_string
+	WithEnv(key, value string) ModuleConfig
+
+	// WithFS is a convenience that calls WithFSConfig with an FSConfig of the
+	// input for the root ("/") guest path.
+	WithFS(fs.FS) ModuleConfig
+
+	// WithFSConfig configures the filesystem available to each guest
+	// instantiated with this configuration. By default, no file access is
+	// allowed, so functions like `path_open` result in unsupported errors
+	// (e.g. syscall.ENOSYS).
+	WithFSConfig(FSConfig) ModuleConfig
+
+	// WithName configures the module name. Defaults to what was decoded from
+	// the name section. Duplicate names are not allowed in a single Runtime.
+	//
+	// Calling this with the empty string "" makes the module anonymous.
+	// That is useful when you want to instantiate the same CompiledModule multiple times like below:
+	//
+	// 	for i := 0; i < N; i++ {
+	//		// Instantiate a new Wasm module from the already compiled `compiledWasm` anonymously without a name.
+	//		instance, err := r.InstantiateModule(ctx, compiledWasm, wazero.NewModuleConfig().WithName(""))
+	//		// ....
+	//	}
+	//
+	// See the `concurrent-instantiation` example for a complete usage.
+	//
+	// Non-empty named modules are available for other modules to import by name.
+	WithName(string) ModuleConfig
+
+	// WithStartFunctions configures the functions to call after the module is
+	// instantiated. Defaults to "_start".
+	//
+	// Clearing the default is supported, via `WithStartFunctions()`.
+	//
+	// # Notes
+	//
+	//   - If a start function doesn't exist, it is skipped. However, any that
+	//     do exist are called in order.
+	//   - Start functions are not intended to be called multiple times.
+	//     Functions that should be called multiple times should be invoked
+	//     manually via api.Module's `ExportedFunction` method.
+	//   - Start functions commonly exit the module during instantiation,
+	//     preventing use of any functions later. This is the case in "wasip1",
+	//     which defines the default value "_start".
+	//   - See /RATIONALE.md for motivation of this feature.
+	WithStartFunctions(...string) ModuleConfig
+
+	// WithStderr configures where standard error (file descriptor 2) is written. Defaults to io.Discard.
+	//
+	// This writer is most commonly used by the functions like "fd_write" in "wasi_snapshot_preview1" although it could
+	// be used by functions imported from other modules.
+	//
+	// # Notes
+	//
+	//   - The caller is responsible to close any io.Writer they supply: It is not closed on api.Module Close.
+	//   - This does not default to os.Stderr as that both violates sandboxing and prevents concurrent modules.
+	//
+	// See https://linux.die.net/man/3/stderr
+	WithStderr(io.Writer) ModuleConfig
+
+	// WithStdin configures where standard input (file descriptor 0) is read. Defaults to return io.EOF.
+	//
+	// This reader is most commonly used by the functions like "fd_read" in "wasi_snapshot_preview1" although it could
+	// be used by functions imported from other modules.
+	//
+	// # Notes
+	//
+	//   - The caller is responsible to close any io.Reader they supply: It is not closed on api.Module Close.
+	//   - This does not default to os.Stdin as that both violates sandboxing and prevents concurrent modules.
+	//
+	// See https://linux.die.net/man/3/stdin
+	WithStdin(io.Reader) ModuleConfig
+
+	// WithStdout configures where standard output (file descriptor 1) is written. Defaults to io.Discard.
+	//
+	// This writer is most commonly used by the functions like "fd_write" in "wasi_snapshot_preview1" although it could
+	// be used by functions imported from other modules.
+	//
+	// # Notes
+	//
+	//   - The caller is responsible to close any io.Writer they supply: It is not closed on api.Module Close.
+	//   - This does not default to os.Stdout as that both violates sandboxing and prevents concurrent modules.
+	//
+	// See https://linux.die.net/man/3/stdout
+	WithStdout(io.Writer) ModuleConfig
+
+	// WithWalltime configures the wall clock, sometimes referred to as the
+	// real time clock. sys.Walltime returns the current unix/epoch time,
+	// seconds since midnight UTC 1 January 1970, with a nanosecond fraction.
+	// This defaults to a fake result that increases by 1ms on each reading.
+	//
+	// Here's an example that uses a custom clock:
+	//	moduleConfig = moduleConfig.
+	//		WithWalltime(func(context.Context) (sec int64, nsec int32) {
+	//			return clock.walltime()
+	//		}, sys.ClockResolution(time.Microsecond.Nanoseconds()))
+	//
+	// # Notes:
+	//   - This does not default to time.Now as that violates sandboxing.
+	//   - This is used to implement host functions such as WASI
+	//     `clock_time_get` with the `realtime` clock ID.
+	//   - Use WithSysWalltime for a usable implementation.
+	WithWalltime(sys.Walltime, sys.ClockResolution) ModuleConfig
+
+	// WithSysWalltime uses time.Now for sys.Walltime with a resolution of 1us
+	// (1000ns).
+	//
+	// See WithWalltime
+	WithSysWalltime() ModuleConfig
+
+	// WithNanotime configures the monotonic clock, used to measure elapsed
+	// time in nanoseconds. Defaults to a fake result that increases by 1ms
+	// on each reading.
+	//
+	// Here's an example that uses a custom clock:
+	//	moduleConfig = moduleConfig.
+	//		WithNanotime(func(context.Context) int64 {
+	//			return clock.nanotime()
+	//		}, sys.ClockResolution(time.Microsecond.Nanoseconds()))
+	//
+	// # Notes:
+	//   - This does not default to time.Since as that violates sandboxing.
+	//   - This is used to implement host functions such as WASI
+	//     `clock_time_get` with the `monotonic` clock ID.
+	//   - Some compilers implement sleep by looping on sys.Nanotime (e.g. Go).
+	//   - If you set this, you should probably set WithNanosleep also.
+	//   - Use WithSysNanotime for a usable implementation.
+	WithNanotime(sys.Nanotime, sys.ClockResolution) ModuleConfig
+
+	// WithSysNanotime uses time.Now for sys.Nanotime with a resolution of 1us.
+	//
+	// See WithNanotime
+	WithSysNanotime() ModuleConfig
+
+	// WithNanosleep configures the how to pause the current goroutine for at
+	// least the configured nanoseconds. Defaults to return immediately.
+	//
+	// This example uses a custom sleep function:
+	//	moduleConfig = moduleConfig.
+	//		WithNanosleep(func(ns int64) {
+	//			rel := unix.NsecToTimespec(ns)
+	//			remain := unix.Timespec{}
+	//			for { // loop until no more time remaining
+	//				err := unix.ClockNanosleep(unix.CLOCK_MONOTONIC, 0, &rel, &remain)
+	//			--snip--
+	//
+	// # Notes:
+	//   - This does not default to time.Sleep as that violates sandboxing.
+	//   - This is used to implement host functions such as WASI `poll_oneoff`.
+	//   - Some compilers implement sleep by looping on sys.Nanotime (e.g. Go).
+	//   - If you set this, you should probably set WithNanotime also.
+	//   - Use WithSysNanosleep for a usable implementation.
+	WithNanosleep(sys.Nanosleep) ModuleConfig
+
+	// WithOsyield yields the processor, typically to implement spin-wait
+	// loops. Defaults to return immediately.
+	//
+	// # Notes:
+	//   - This primarily supports `sched_yield` in WASI
+	//   - This does not default to runtime.osyield as that violates sandboxing.
+	WithOsyield(sys.Osyield) ModuleConfig
+
+	// WithSysNanosleep uses time.Sleep for sys.Nanosleep.
+	//
+	// See WithNanosleep
+	WithSysNanosleep() ModuleConfig
+
+	// WithRandSource configures a source of random bytes. Defaults to return a
+	// deterministic source. You might override this with crypto/rand.Reader
+	//
+	// This reader is most commonly used by the functions like "random_get" in
+	// "wasi_snapshot_preview1", "seed" in AssemblyScript standard "env", and
+	// "getRandomData" when runtime.GOOS is "js".
+	//
+	// Note: The caller is responsible to close any io.Reader they supply: It
+	// is not closed on api.Module Close.
+	WithRandSource(io.Reader) ModuleConfig
+}
+
+type moduleConfig struct {
+	name               string
+	nameSet            bool
+	startFunctions     []string
+	stdin              io.Reader
+	stdout             io.Writer
+	stderr             io.Writer
+	randSource         io.Reader
+	walltime           sys.Walltime
+	walltimeResolution sys.ClockResolution
+	nanotime           sys.Nanotime
+	nanotimeResolution sys.ClockResolution
+	nanosleep          sys.Nanosleep
+	osyield            sys.Osyield
+	args               [][]byte
+	// environ is pair-indexed to retain order similar to os.Environ.
+	environ [][]byte
+	// environKeys allow overwriting of existing values.
+	environKeys map[string]int
+	// fsConfig is the file system configuration for ABI like WASI.
+	fsConfig FSConfig
+	// sockConfig is the network listener configuration for ABI like WASI.
+	sockConfig *internalsock.Config
+}
+
+// NewModuleConfig returns a ModuleConfig that can be used for configuring module instantiation.
+func NewModuleConfig() ModuleConfig {
+	return &moduleConfig{
+		startFunctions: []string{"_start"},
+		environKeys:    map[string]int{},
+	}
+}
+
+// clone makes a deep copy of this module config.
+func (c *moduleConfig) clone() *moduleConfig {
+	ret := *c // copy except maps which share a ref
+	ret.environKeys = make(map[string]int, len(c.environKeys))
+	for key, value := range c.environKeys {
+		ret.environKeys[key] = value
+	}
+	return &ret
+}
+
+// WithArgs implements ModuleConfig.WithArgs
+func (c *moduleConfig) WithArgs(args ...string) ModuleConfig {
+	ret := c.clone()
+	ret.args = toByteSlices(args)
+	return ret
+}
+
+func toByteSlices(strings []string) (result [][]byte) {
+	if len(strings) == 0 {
+		return
+	}
+	result = make([][]byte, len(strings))
+	for i, a := range strings {
+		result[i] = []byte(a)
+	}
+	return
+}
+
+// WithEnv implements ModuleConfig.WithEnv
+func (c *moduleConfig) WithEnv(key, value string) ModuleConfig {
+	ret := c.clone()
+	// Check to see if this key already exists and update it.
+	if i, ok := ret.environKeys[key]; ok {
+		ret.environ[i+1] = []byte(value) // environ is pair-indexed, so the value is 1 after the key.
+	} else {
+		ret.environKeys[key] = len(ret.environ)
+		ret.environ = append(ret.environ, []byte(key), []byte(value))
+	}
+	return ret
+}
+
+// WithFS implements ModuleConfig.WithFS
+func (c *moduleConfig) WithFS(fs fs.FS) ModuleConfig {
+	var config FSConfig
+	if fs != nil {
+		config = NewFSConfig().WithFSMount(fs, "")
+	}
+	return c.WithFSConfig(config)
+}
+
+// WithFSConfig implements ModuleConfig.WithFSConfig
+func (c *moduleConfig) WithFSConfig(config FSConfig) ModuleConfig {
+	ret := c.clone()
+	ret.fsConfig = config
+	return ret
+}
+
+// WithName implements ModuleConfig.WithName
+func (c *moduleConfig) WithName(name string) ModuleConfig {
+	ret := c.clone()
+	ret.nameSet = true
+	ret.name = name
+	return ret
+}
+
+// WithStartFunctions implements ModuleConfig.WithStartFunctions
+func (c *moduleConfig) WithStartFunctions(startFunctions ...string) ModuleConfig {
+	ret := c.clone()
+	ret.startFunctions = startFunctions
+	return ret
+}
+
+// WithStderr implements ModuleConfig.WithStderr
+func (c *moduleConfig) WithStderr(stderr io.Writer) ModuleConfig {
+	ret := c.clone()
+	ret.stderr = stderr
+	return ret
+}
+
+// WithStdin implements ModuleConfig.WithStdin
+func (c *moduleConfig) WithStdin(stdin io.Reader) ModuleConfig {
+	ret := c.clone()
+	ret.stdin = stdin
+	return ret
+}
+
+// WithStdout implements ModuleConfig.WithStdout
+func (c *moduleConfig) WithStdout(stdout io.Writer) ModuleConfig {
+	ret := c.clone()
+	ret.stdout = stdout
+	return ret
+}
+
+// WithWalltime implements ModuleConfig.WithWalltime
+func (c *moduleConfig) WithWalltime(walltime sys.Walltime, resolution sys.ClockResolution) ModuleConfig {
+	ret := c.clone()
+	ret.walltime = walltime
+	ret.walltimeResolution = resolution
+	return ret
+}
+
+// We choose arbitrary resolutions here because there's no perfect alternative. For example, according to the
+// source in time.go, windows monotonic resolution can be 15ms. This chooses arbitrarily 1us for wall time and
+// 1ns for monotonic. See RATIONALE.md for more context.
+
+// WithSysWalltime implements ModuleConfig.WithSysWalltime
+func (c *moduleConfig) WithSysWalltime() ModuleConfig {
+	return c.WithWalltime(platform.Walltime, sys.ClockResolution(time.Microsecond.Nanoseconds()))
+}
+
+// WithNanotime implements ModuleConfig.WithNanotime
+func (c *moduleConfig) WithNanotime(nanotime sys.Nanotime, resolution sys.ClockResolution) ModuleConfig {
+	ret := c.clone()
+	ret.nanotime = nanotime
+	ret.nanotimeResolution = resolution
+	return ret
+}
+
+// WithSysNanotime implements ModuleConfig.WithSysNanotime
+func (c *moduleConfig) WithSysNanotime() ModuleConfig {
+	return c.WithNanotime(platform.Nanotime, sys.ClockResolution(1))
+}
+
+// WithNanosleep implements ModuleConfig.WithNanosleep
+func (c *moduleConfig) WithNanosleep(nanosleep sys.Nanosleep) ModuleConfig {
+	ret := *c // copy
+	ret.nanosleep = nanosleep
+	return &ret
+}
+
+// WithOsyield implements ModuleConfig.WithOsyield
+func (c *moduleConfig) WithOsyield(osyield sys.Osyield) ModuleConfig {
+	ret := *c // copy
+	ret.osyield = osyield
+	return &ret
+}
+
+// WithSysNanosleep implements ModuleConfig.WithSysNanosleep
+func (c *moduleConfig) WithSysNanosleep() ModuleConfig {
+	return c.WithNanosleep(platform.Nanosleep)
+}
+
+// WithRandSource implements ModuleConfig.WithRandSource
+func (c *moduleConfig) WithRandSource(source io.Reader) ModuleConfig {
+	ret := c.clone()
+	ret.randSource = source
+	return ret
+}
+
+// toSysContext creates a baseline wasm.Context configured by ModuleConfig.
+func (c *moduleConfig) toSysContext() (sysCtx *internalsys.Context, err error) {
+	var environ [][]byte // Intentionally doesn't pre-allocate to reduce logic to default to nil.
+	// Same validation as syscall.Setenv for Linux
+	for i := 0; i < len(c.environ); i += 2 {
+		key, value := c.environ[i], c.environ[i+1]
+		keyLen := len(key)
+		if keyLen == 0 {
+			err = errors.New("environ invalid: empty key")
+			return
+		}
+		valueLen := len(value)
+		result := make([]byte, keyLen+valueLen+1)
+		j := 0
+		for ; j < keyLen; j++ {
+			if k := key[j]; k == '=' { // NUL enforced in NewContext
+				err = errors.New("environ invalid: key contains '=' character")
+				return
+			} else {
+				result[j] = k
+			}
+		}
+		result[j] = '='
+		copy(result[j+1:], value)
+		environ = append(environ, result)
+	}
+
+	var fs []experimentalsys.FS
+	var guestPaths []string
+	if f, ok := c.fsConfig.(*fsConfig); ok {
+		fs, guestPaths = f.preopens()
+	}
+
+	var listeners []*net.TCPListener
+	if n := c.sockConfig; n != nil {
+		if listeners, err = n.BuildTCPListeners(); err != nil {
+			return
+		}
+	}
+
+	return internalsys.NewContext(
+		math.MaxUint32,
+		c.args,
+		environ,
+		c.stdin,
+		c.stdout,
+		c.stderr,
+		c.randSource,
+		c.walltime, c.walltimeResolution,
+		c.nanotime, c.nanotimeResolution,
+		c.nanosleep, c.osyield,
+		fs, guestPaths,
+		listeners,
+	)
+}
@@ -0,0 +1,35 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// Snapshot holds the execution state at the time of a Snapshotter.Snapshot call.
+type Snapshot interface {
+	// Restore sets the Wasm execution state to the capture. Because a host function
+	// calling this is resetting the pointer to the executation stack, the host function
+	// will not be able to return values in the normal way. ret is a slice of values the
+	// host function intends to return from the restored function.
+	Restore(ret []uint64)
+}
+
+// Snapshotter allows host functions to snapshot the WebAssembly execution environment.
+type Snapshotter interface {
+	// Snapshot captures the current execution state.
+	Snapshot() Snapshot
+}
+
+// WithSnapshotter enables snapshots.
+// Passing the returned context to a exported function invocation enables snapshots,
+// and allows host functions to retrieve the Snapshotter using GetSnapshotter.
+func WithSnapshotter(ctx context.Context) context.Context {
+	return context.WithValue(ctx, expctxkeys.EnableSnapshotterKey{}, struct{}{})
+}
+
+// GetSnapshotter gets the Snapshotter from a host function.
+// It is only present if WithSnapshotter was called with the function invocation context.
+func GetSnapshotter(ctx context.Context) Snapshotter {
+	return ctx.Value(expctxkeys.SnapshotterKey{}).(Snapshotter)
+}
@@ -0,0 +1,63 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// CloseNotifier is a notification hook, invoked when a module is closed.
+//
+// Note: This is experimental progress towards #1197, and likely to change. Do
+// not expose this in shared libraries as it can cause version locks.
+type CloseNotifier interface {
+	// CloseNotify is a notification that occurs *before* an api.Module is
+	// closed. `exitCode` is zero on success or in the case there was no exit
+	// code.
+	//
+	// Notes:
+	//   - This does not return an error because the module will be closed
+	//     unconditionally.
+	//   - Do not panic from this function as it doing so could cause resource
+	//     leaks.
+	//   - While this is only called once per module, if configured for
+	//     multiple modules, it will be called for each, e.g. on runtime close.
+	CloseNotify(ctx context.Context, exitCode uint32)
+}
+
+// ^-- Note: This might need to be a part of the listener or become a part of
+// host state implementation. For example, if this is used to implement state
+// cleanup for host modules, possibly something like below would be better, as
+// it could be implemented in a way that allows concurrent module use.
+//
+//	// key is like a context key, stateFactory is invoked per instantiate and
+//	// is associated with the key (exposed as `Module.State` similar to go
+//	// context). Using a key is better than the module name because we can
+//	// de-dupe it for host modules that can be instantiated into different
+//	// names. Also, you can make the key package private.
+//	HostModuleBuilder.WithState(key any, stateFactory func() Cleanup)`
+//
+// Such a design could work to isolate state only needed for wasip1, for
+// example the dirent cache. However, if end users use this for different
+// things, we may need separate designs.
+//
+// In summary, the purpose of this iteration is to identify projects that
+// would use something like this, and then we can figure out which way it
+// should go.
+
+// CloseNotifyFunc is a convenience for defining inlining a CloseNotifier.
+type CloseNotifyFunc func(ctx context.Context, exitCode uint32)
+
+// CloseNotify implements CloseNotifier.CloseNotify.
+func (f CloseNotifyFunc) CloseNotify(ctx context.Context, exitCode uint32) {
+	f(ctx, exitCode)
+}
+
+// WithCloseNotifier registers the given CloseNotifier into the given
+// context.Context.
+func WithCloseNotifier(ctx context.Context, notifier CloseNotifier) context.Context {
+	if notifier != nil {
+		return context.WithValue(ctx, expctxkeys.CloseNotifierKey{}, notifier)
+	}
+	return ctx
+}
@@ -0,0 +1,19 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// WithCompilationWorkers sets the desired number of compilation workers.
+func WithCompilationWorkers(ctx context.Context, workers int) context.Context {
+	return context.WithValue(ctx, expctxkeys.CompilationWorkers{}, workers)
+}
+
+// GetCompilationWorkers returns the desired number of compilation workers.
+// The minimum value returned is 1.
+func GetCompilationWorkers(ctx context.Context) int {
+	workers, _ := ctx.Value(expctxkeys.CompilationWorkers{}).(int)
+	return max(workers, 1)
+}
@@ -0,0 +1,41 @@
+// Package experimental includes features we aren't yet sure about. These are enabled with context.Context keys.
+//
+// Note: All features here may be changed or deleted at any time, so use with caution!
+package experimental
+
+import (
+	"github.com/tetratelabs/wazero/api"
+)
+
+// InternalModule is an api.Module that exposes additional
+// information.
+type InternalModule interface {
+	api.Module
+
+	// NumGlobal returns the count of all globals in the module.
+	NumGlobal() int
+
+	// Global provides a read-only view for a given global index.
+	//
+	// The methods panics if i is out of bounds.
+	Global(i int) api.Global
+}
+
+// ProgramCounter is an opaque value representing a specific execution point in
+// a module. It is meant to be used with Function.SourceOffsetForPC and
+// StackIterator.
+type ProgramCounter uint64
+
+// InternalFunction exposes some information about a function instance.
+type InternalFunction interface {
+	// Definition provides introspection into the function's names and
+	// signature.
+	Definition() api.FunctionDefinition
+
+	// SourceOffsetForPC resolves a program counter into its corresponding
+	// offset in the Code section of the module this function belongs to.
+	// The source offset is meant to help map the function calls to their
+	// location in the original source files. Returns 0 if the offset cannot
+	// be calculated.
+	SourceOffsetForPC(pc ProgramCounter) uint64
+}
@@ -0,0 +1,18 @@
+package experimental
+
+import "github.com/tetratelabs/wazero/api"
+
+// CoreFeaturesThreads enables threads instructions ("threads").
+//
+// # Notes
+//
+//   - The instruction list is too long to enumerate in godoc.
+//     See https://github.com/WebAssembly/threads/blob/main/proposals/threads/Overview.md
+//   - Atomic operations are guest-only until api.Memory or otherwise expose them to host functions.
+//   - On systems without mmap available, the memory will pre-allocate to the maximum size. Many
+//     binaries will use a theroetical maximum like 4GB, so if using such a binary on a system
+//     without mmap, consider editing the binary to reduce the max size setting of memory.
+const CoreFeaturesThreads = api.CoreFeatureSIMD << 1
+
+// CoreFeaturesThreads enables tail call instructions ("tail-call").
+const CoreFeaturesTailCall = api.CoreFeatureSIMD << 2
@@ -0,0 +1,19 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// ImportResolver is an experimental func type that, if set,
+// will be used as the first step in resolving imports.
+// See issue 2294.
+// If the import name is not found, it should return nil.
+type ImportResolver func(name string) api.Module
+
+// WithImportResolver returns a new context with the given ImportResolver.
+func WithImportResolver(ctx context.Context, resolver ImportResolver) context.Context {
+	return context.WithValue(ctx, expctxkeys.ImportResolverKey{}, resolver)
+}
@@ -0,0 +1,324 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// StackIterator allows iterating on each function of the call stack, starting
+// from the top. At least one call to Next() is required to start the iteration.
+//
+// Note: The iterator provides a view of the call stack at the time of
+// iteration. As a result, parameter values may be different than the ones their
+// function was called with.
+type StackIterator interface {
+	// Next moves the iterator to the next function in the stack. Returns
+	// false if it reached the bottom of the stack.
+	Next() bool
+	// Function describes the function called by the current frame.
+	Function() InternalFunction
+	// ProgramCounter returns the program counter associated with the
+	// function call.
+	ProgramCounter() ProgramCounter
+}
+
+// WithFunctionListenerFactory registers a FunctionListenerFactory
+// with the context.
+func WithFunctionListenerFactory(ctx context.Context, factory FunctionListenerFactory) context.Context {
+	return context.WithValue(ctx, expctxkeys.FunctionListenerFactoryKey{}, factory)
+}
+
+// FunctionListenerFactory returns FunctionListeners to be notified when a
+// function is called.
+type FunctionListenerFactory interface {
+	// NewFunctionListener returns a FunctionListener for a defined function.
+	// If nil is returned, no listener will be notified.
+	NewFunctionListener(api.FunctionDefinition) FunctionListener
+	// ^^ A single instance can be returned to avoid instantiating a listener
+	// per function, especially as they may be thousands of functions. Shared
+	// listeners use their FunctionDefinition parameter to clarify.
+}
+
+// FunctionListener can be registered for any function via
+// FunctionListenerFactory to be notified when the function is called.
+type FunctionListener interface {
+	// Before is invoked before a function is called.
+	//
+	// There is always one corresponding call to After or Abort for each call to
+	// Before. This guarantee allows the listener to maintain an internal stack
+	// to perform correlations between the entry and exit of functions.
+	//
+	// # Params
+	//
+	//   - ctx: the context of the caller function which must be the same
+	//	   instance or parent of the result.
+	//   - mod: the calling module.
+	//   - def: the function definition.
+	//   - params:  api.ValueType encoded parameters.
+	//   - stackIterator: iterator on the call stack. At least one entry is
+	//     guaranteed (the called function), whose Args() will be equal to
+	//     params. The iterator will be reused between calls to Before.
+	//
+	// Note: api.Memory is meant for inspection, not modification.
+	// mod can be cast to InternalModule to read non-exported globals.
+	Before(ctx context.Context, mod api.Module, def api.FunctionDefinition, params []uint64, stackIterator StackIterator)
+
+	// After is invoked after a function is called.
+	//
+	// # Params
+	//
+	//   - ctx: the context of the caller function.
+	//   - mod: the calling module.
+	//   - def: the function definition.
+	//   - results: api.ValueType encoded results.
+	//
+	// # Notes
+	//
+	//   - api.Memory is meant for inspection, not modification.
+	//   - This is not called when a host function panics, or a guest function traps.
+	//      See Abort for more details.
+	After(ctx context.Context, mod api.Module, def api.FunctionDefinition, results []uint64)
+
+	// Abort is invoked when a function does not return due to a trap or panic.
+	//
+	// # Params
+	//
+	//   - ctx: the context of the caller function.
+	//   - mod: the calling module.
+	//   - def: the function definition.
+	//   - err: the error value representing the reason why the function aborted.
+	//
+	// # Notes
+	//
+	//   - api.Memory is meant for inspection, not modification.
+	Abort(ctx context.Context, mod api.Module, def api.FunctionDefinition, err error)
+}
+
+// FunctionListenerFunc is a function type implementing the FunctionListener
+// interface, making it possible to use regular functions and methods as
+// listeners of function invocation.
+//
+// The FunctionListener interface declares two methods (Before and After),
+// but this type invokes its value only when Before is called. It is best
+// suites for cases where the host does not need to perform correlation
+// between the start and end of the function call.
+type FunctionListenerFunc func(context.Context, api.Module, api.FunctionDefinition, []uint64, StackIterator)
+
+// Before satisfies the FunctionListener interface, calls f.
+func (f FunctionListenerFunc) Before(ctx context.Context, mod api.Module, def api.FunctionDefinition, params []uint64, stackIterator StackIterator) {
+	f(ctx, mod, def, params, stackIterator)
+}
+
+// After is declared to satisfy the FunctionListener interface, but it does
+// nothing.
+func (f FunctionListenerFunc) After(context.Context, api.Module, api.FunctionDefinition, []uint64) {
+}
+
+// Abort is declared to satisfy the FunctionListener interface, but it does
+// nothing.
+func (f FunctionListenerFunc) Abort(context.Context, api.Module, api.FunctionDefinition, error) {
+}
+
+// FunctionListenerFactoryFunc is a function type implementing the
+// FunctionListenerFactory interface, making it possible to use regular
+// functions and methods as factory of function listeners.
+type FunctionListenerFactoryFunc func(api.FunctionDefinition) FunctionListener
+
+// NewFunctionListener satisfies the FunctionListenerFactory interface, calls f.
+func (f FunctionListenerFactoryFunc) NewFunctionListener(def api.FunctionDefinition) FunctionListener {
+	return f(def)
+}
+
+// MultiFunctionListenerFactory constructs a FunctionListenerFactory which
+// combines the listeners created by each of the factories passed as arguments.
+//
+// This function is useful when multiple listeners need to be hooked to a module
+// because the propagation mechanism based on installing a listener factory in
+// the context.Context used when instantiating modules allows for a single
+// listener to be installed.
+//
+// The stack iterator passed to the Before method is reset so that each listener
+// can iterate the call stack independently without impacting the ability of
+// other listeners to do so.
+func MultiFunctionListenerFactory(factories ...FunctionListenerFactory) FunctionListenerFactory {
+	multi := make(multiFunctionListenerFactory, len(factories))
+	copy(multi, factories)
+	return multi
+}
+
+type multiFunctionListenerFactory []FunctionListenerFactory
+
+func (multi multiFunctionListenerFactory) NewFunctionListener(def api.FunctionDefinition) FunctionListener {
+	var lstns []FunctionListener
+	for _, factory := range multi {
+		if lstn := factory.NewFunctionListener(def); lstn != nil {
+			lstns = append(lstns, lstn)
+		}
+	}
+	switch len(lstns) {
+	case 0:
+		return nil
+	case 1:
+		return lstns[0]
+	default:
+		return &multiFunctionListener{lstns: lstns}
+	}
+}
+
+type multiFunctionListener struct {
+	lstns []FunctionListener
+	stack stackIterator
+}
+
+func (multi *multiFunctionListener) Before(ctx context.Context, mod api.Module, def api.FunctionDefinition, params []uint64, si StackIterator) {
+	multi.stack.base = si
+	for _, lstn := range multi.lstns {
+		multi.stack.index = -1
+		lstn.Before(ctx, mod, def, params, &multi.stack)
+	}
+}
+
+func (multi *multiFunctionListener) After(ctx context.Context, mod api.Module, def api.FunctionDefinition, results []uint64) {
+	for _, lstn := range multi.lstns {
+		lstn.After(ctx, mod, def, results)
+	}
+}
+
+func (multi *multiFunctionListener) Abort(ctx context.Context, mod api.Module, def api.FunctionDefinition, err error) {
+	for _, lstn := range multi.lstns {
+		lstn.Abort(ctx, mod, def, err)
+	}
+}
+
+type stackIterator struct {
+	base  StackIterator
+	index int
+	pcs   []uint64
+	fns   []InternalFunction
+}
+
+func (si *stackIterator) Next() bool {
+	if si.base != nil {
+		si.pcs = si.pcs[:0]
+		si.fns = si.fns[:0]
+
+		for si.base.Next() {
+			si.pcs = append(si.pcs, uint64(si.base.ProgramCounter()))
+			si.fns = append(si.fns, si.base.Function())
+		}
+
+		si.base = nil
+	}
+	si.index++
+	return si.index < len(si.pcs)
+}
+
+func (si *stackIterator) ProgramCounter() ProgramCounter {
+	return ProgramCounter(si.pcs[si.index])
+}
+
+func (si *stackIterator) Function() InternalFunction {
+	return si.fns[si.index]
+}
+
+// StackFrame represents a frame on the call stack.
+type StackFrame struct {
+	Function     api.Function
+	Params       []uint64
+	Results      []uint64
+	PC           uint64
+	SourceOffset uint64
+}
+
+type internalFunction struct {
+	definition   api.FunctionDefinition
+	sourceOffset uint64
+}
+
+func (f internalFunction) Definition() api.FunctionDefinition {
+	return f.definition
+}
+
+func (f internalFunction) SourceOffsetForPC(pc ProgramCounter) uint64 {
+	return f.sourceOffset
+}
+
+// stackFrameIterator is an implementation of the experimental.stackFrameIterator
+// interface.
+type stackFrameIterator struct {
+	index int
+	stack []StackFrame
+	fndef []api.FunctionDefinition
+}
+
+func (si *stackFrameIterator) Next() bool {
+	si.index++
+	return si.index < len(si.stack)
+}
+
+func (si *stackFrameIterator) Function() InternalFunction {
+	return internalFunction{
+		definition:   si.fndef[si.index],
+		sourceOffset: si.stack[si.index].SourceOffset,
+	}
+}
+
+func (si *stackFrameIterator) ProgramCounter() ProgramCounter {
+	return ProgramCounter(si.stack[si.index].PC)
+}
+
+// NewStackIterator constructs a stack iterator from a list of stack frames.
+// The top most frame is the last one.
+func NewStackIterator(stack ...StackFrame) StackIterator {
+	si := &stackFrameIterator{
+		index: -1,
+		stack: make([]StackFrame, len(stack)),
+		fndef: make([]api.FunctionDefinition, len(stack)),
+	}
+	for i := range stack {
+		si.stack[i] = stack[len(stack)-(i+1)]
+	}
+	// The size of function definition is only one pointer which should allow
+	// the compiler to optimize the conversion to api.FunctionDefinition; but
+	// the presence of internal.WazeroOnlyType, despite being defined as an
+	// empty struct, forces a heap allocation that we amortize by caching the
+	// result.
+	for i, frame := range stack {
+		si.fndef[i] = frame.Function.Definition()
+	}
+	return si
+}
+
+// BenchmarkFunctionListener implements a benchmark for function listeners.
+//
+// The benchmark calls Before and After methods repeatedly using the provided
+// module an stack frames to invoke the methods.
+//
+// The stack frame is a representation of the call stack that the Before method
+// will be invoked with. The top of the stack is stored at index zero. The stack
+// must contain at least one frame or the benchmark will fail.
+func BenchmarkFunctionListener(n int, module api.Module, stack []StackFrame, listener FunctionListener) {
+	if len(stack) == 0 {
+		panic("cannot benchmark function listener with an empty stack")
+	}
+
+	ctx := context.Background()
+	def := stack[0].Function.Definition()
+	params := stack[0].Params
+	results := stack[0].Results
+	stackIterator := &stackIterator{base: NewStackIterator(stack...)}
+
+	for i := 0; i < n; i++ {
+		stackIterator.index = -1
+		listener.Before(ctx, module, def, params, stackIterator)
+		listener.After(ctx, module, def, results)
+	}
+}
+
+// TODO: the calls to Abort are not yet tested in internal/testing/enginetest,
+// but they are validated indirectly in tests which exercise host logging,
+// like Test_procExit in imports/wasi_snapshot_preview1. Eventually we should
+// add dedicated tests to validate the behavior of the interpreter and compiler
+// engines independently.
@@ -0,0 +1,52 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// MemoryAllocator is a memory allocation hook,
+// invoked to create a LinearMemory.
+type MemoryAllocator interface {
+	// Allocate should create a new LinearMemory with the given specification:
+	// cap is the suggested initial capacity for the backing []byte,
+	// and max the maximum length that will ever be requested.
+	//
+	// Notes:
+	//   - To back a shared memory, the address of the backing []byte cannot
+	//     change. This is checked at runtime. Implementations should document
+	//     if the returned LinearMemory meets this requirement.
+	Allocate(cap, max uint64) LinearMemory
+}
+
+// MemoryAllocatorFunc is a convenience for defining inlining a MemoryAllocator.
+type MemoryAllocatorFunc func(cap, max uint64) LinearMemory
+
+// Allocate implements MemoryAllocator.Allocate.
+func (f MemoryAllocatorFunc) Allocate(cap, max uint64) LinearMemory {
+	return f(cap, max)
+}
+
+// LinearMemory is an expandable []byte that backs a Wasm linear memory.
+type LinearMemory interface {
+	// Reallocates the linear memory to size bytes in length.
+	//
+	// Notes:
+	//   - To back a shared memory, Reallocate can't change the address of the
+	//     backing []byte (only its length/capacity may change).
+	//   - Reallocate may return nil if fails to grow the LinearMemory. This
+	//     condition may or may not be handled gracefully by the Wasm module.
+	Reallocate(size uint64) []byte
+	// Free the backing memory buffer.
+	Free()
+}
+
+// WithMemoryAllocator registers the given MemoryAllocator into the given
+// context.Context. The context must be passed when initializing a module.
+func WithMemoryAllocator(ctx context.Context, allocator MemoryAllocator) context.Context {
+	if allocator != nil {
+		return context.WithValue(ctx, expctxkeys.MemoryAllocatorKey{}, allocator)
+	}
+	return ctx
+}
@@ -0,0 +1,92 @@
+package sys
+
+import (
+	"fmt"
+	"io/fs"
+
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// FileType is fs.FileMode masked on fs.ModeType. For example, zero is a
+// regular file, fs.ModeDir is a directory and fs.ModeIrregular is unknown.
+//
+// Note: This is defined by Linux, not POSIX.
+type FileType = fs.FileMode
+
+// Dirent is an entry read from a directory via File.Readdir.
+//
+// # Notes
+//
+//   - This extends `dirent` defined in POSIX with some fields defined by
+//     Linux. See https://man7.org/linux/man-pages/man3/readdir.3.html and
+//     https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html
+//   - This has a subset of fields defined in sys.Stat_t. Notably, there is no
+//     field corresponding to Stat_t.Dev because that value will be constant
+//     for all files in a directory. To get the Dev value, call File.Stat on
+//     the directory File.Readdir was called on.
+type Dirent struct {
+	// Ino is the file serial number, or zero if not available. See Ino for
+	// more details including impact returning a zero value.
+	Ino sys.Inode
+
+	// Name is the base name of the directory entry. Empty is invalid.
+	Name string
+
+	// Type is fs.FileMode masked on fs.ModeType. For example, zero is a
+	// regular file, fs.ModeDir is a directory and fs.ModeIrregular is unknown.
+	//
+	// Note: This is defined by Linux, not POSIX.
+	Type fs.FileMode
+}
+
+func (d *Dirent) String() string {
+	return fmt.Sprintf("name=%s, type=%v, ino=%d", d.Name, d.Type, d.Ino)
+}
+
+// IsDir returns true if the Type is fs.ModeDir.
+func (d *Dirent) IsDir() bool {
+	return d.Type == fs.ModeDir
+}
+
+// DirFile is embeddable to reduce the amount of functions to implement a file.
+type DirFile struct{}
+
+// IsAppend implements File.IsAppend
+func (DirFile) IsAppend() bool {
+	return false
+}
+
+// SetAppend implements File.SetAppend
+func (DirFile) SetAppend(bool) Errno {
+	return EISDIR
+}
+
+// IsDir implements File.IsDir
+func (DirFile) IsDir() (bool, Errno) {
+	return true, 0
+}
+
+// Read implements File.Read
+func (DirFile) Read([]byte) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Pread implements File.Pread
+func (DirFile) Pread([]byte, int64) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Write implements File.Write
+func (DirFile) Write([]byte) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Pwrite implements File.Pwrite
+func (DirFile) Pwrite([]byte, int64) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Truncate implements File.Truncate
+func (DirFile) Truncate(int64) Errno {
+	return EISDIR
+}
@@ -0,0 +1,98 @@
+package sys
+
+import "strconv"
+
+// Errno is a subset of POSIX errno used by wazero interfaces. Zero is not an
+// error. Other values should not be interpreted numerically, rather by constants
+// prefixed with 'E'.
+//
+// See https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+type Errno uint16
+
+// ^-- Note: This will eventually move to the public /sys package. It is
+// experimental until we audit the socket related APIs to ensure we have all
+// the Errno it returns, and we export fs.FS. This is not in /internal/sys as
+// that would introduce a package cycle.
+
+// This is a subset of errors to reduce implementation burden. `wasip1` defines
+// almost all POSIX error numbers, but not all are used in practice. wazero
+// will add ones needed in POSIX order, as needed by functions that explicitly
+// document returning them.
+//
+// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-errno-enumu16
+const (
+	EACCES Errno = iota + 1
+	EAGAIN
+	EBADF
+	EEXIST
+	EFAULT
+	EINTR
+	EINVAL
+	EIO
+	EISDIR
+	ELOOP
+	ENAMETOOLONG
+	ENOENT
+	ENOSYS
+	ENOTDIR
+	ERANGE
+	ENOTEMPTY
+	ENOTSOCK
+	ENOTSUP
+	EPERM
+	EROFS
+
+	// NOTE ENOTCAPABLE is defined in wasip1, but not in POSIX. wasi-libc
+	// converts it to EBADF, ESPIPE or EINVAL depending on the call site.
+	// It isn't known if compilers who don't use ENOTCAPABLE would crash on it.
+)
+
+// Error implements error
+func (e Errno) Error() string {
+	switch e {
+	case 0: // not an error
+		return "success"
+	case EACCES:
+		return "permission denied"
+	case EAGAIN:
+		return "resource unavailable, try again"
+	case EBADF:
+		return "bad file descriptor"
+	case EEXIST:
+		return "file exists"
+	case EFAULT:
+		return "bad address"
+	case EINTR:
+		return "interrupted function"
+	case EINVAL:
+		return "invalid argument"
+	case EIO:
+		return "input/output error"
+	case EISDIR:
+		return "is a directory"
+	case ELOOP:
+		return "too many levels of symbolic links"
+	case ENAMETOOLONG:
+		return "filename too long"
+	case ENOENT:
+		return "no such file or directory"
+	case ENOSYS:
+		return "functionality not supported"
+	case ENOTDIR:
+		return "not a directory or a symbolic link to a directory"
+	case ERANGE:
+		return "result too large"
+	case ENOTEMPTY:
+		return "directory not empty"
+	case ENOTSOCK:
+		return "not a socket"
+	case ENOTSUP:
+		return "not supported (may be the same value as [EOPNOTSUPP])"
+	case EPERM:
+		return "operation not permitted"
+	case EROFS:
+		return "read-only file system"
+	default:
+		return "Errno(" + strconv.Itoa(int(e)) + ")"
+	}
+}
@@ -0,0 +1,45 @@
+package sys
+
+import (
+	"io"
+	"io/fs"
+	"os"
+)
+
+// UnwrapOSError returns an Errno or zero if the input is nil.
+func UnwrapOSError(err error) Errno {
+	if err == nil {
+		return 0
+	}
+	err = underlyingError(err)
+	switch err {
+	case nil, io.EOF:
+		return 0 // EOF is not a Errno
+	case fs.ErrInvalid:
+		return EINVAL
+	case fs.ErrPermission:
+		return EPERM
+	case fs.ErrExist:
+		return EEXIST
+	case fs.ErrNotExist:
+		return ENOENT
+	case fs.ErrClosed:
+		return EBADF
+	}
+	return errorToErrno(err)
+}
+
+// underlyingError returns the underlying error if a well-known OS error type.
+//
+// This impl is basically the same as os.underlyingError in os/error.go
+func underlyingError(err error) error {
+	switch err := err.(type) {
+	case *os.PathError:
+		return err.Err
+	case *os.LinkError:
+		return err.Err
+	case *os.SyscallError:
+		return err.Err
+	}
+	return err
+}
@@ -0,0 +1,316 @@
+package sys
+
+import "github.com/tetratelabs/wazero/sys"
+
+// File is a writeable fs.File bridge backed by syscall functions needed for ABI
+// including WASI.
+//
+// Implementations should embed UnimplementedFile for forward compatibility. Any
+// unsupported method or parameter should return ENOSYS.
+//
+// # Errors
+//
+// All methods that can return an error return a Errno, which is zero
+// on success.
+//
+// Restricting to Errno matches current WebAssembly host functions,
+// which are constrained to well-known error codes. For example, WASI maps syscall
+// errors to u32 numeric values.
+//
+// # Notes
+//
+//   - You must call Close to avoid file resource conflicts. For example,
+//     Windows cannot delete the underlying directory while a handle to it
+//     remains open.
+//   - A writable filesystem abstraction is not yet implemented as of Go 1.20.
+//     See https://github.com/golang/go/issues/45757
+type File interface {
+	// Dev returns the device ID (Stat_t.Dev) of this file, zero if unknown or
+	// an error retrieving it.
+	//
+	// # Errors
+	//
+	// Possible errors are those from Stat, except ENOSYS should not
+	// be returned. Zero should be returned if there is no implementation.
+	//
+	// # Notes
+	//
+	//   - Implementations should cache this result.
+	//   - This combined with Ino can implement os.SameFile.
+	Dev() (uint64, Errno)
+
+	// Ino returns the serial number (Stat_t.Ino) of this file, zero if unknown
+	// or an error retrieving it.
+	//
+	// # Errors
+	//
+	// Possible errors are those from Stat, except ENOSYS should not
+	// be returned. Zero should be returned if there is no implementation.
+	//
+	// # Notes
+	//
+	//   - Implementations should cache this result.
+	//   - This combined with Dev can implement os.SameFile.
+	Ino() (sys.Inode, Errno)
+
+	// IsDir returns true if this file is a directory or an error there was an
+	// error retrieving this information.
+	//
+	// # Errors
+	//
+	// Possible errors are those from Stat, except ENOSYS should not
+	// be returned. false should be returned if there is no implementation.
+	//
+	// # Notes
+	//
+	//   - Implementations should cache this result.
+	IsDir() (bool, Errno)
+
+	// IsAppend returns true if the file was opened with O_APPEND, or
+	// SetAppend was successfully enabled on this file.
+	//
+	// # Notes
+	//
+	//   - This might not match the underlying state of the file descriptor if
+	//     the file was not opened via OpenFile.
+	IsAppend() bool
+
+	// SetAppend toggles the append mode (O_APPEND) of this file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - There is no `O_APPEND` for `fcntl` in POSIX, so implementations may
+	//     have to re-open the underlying file to apply this. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/open.html
+	SetAppend(enable bool) Errno
+
+	// Stat is similar to syscall.Fstat.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Fstat and `fstatat` with `AT_FDCWD` in POSIX.
+	//     See https://pubs.opengroup.org/onlinepubs/9699919799/functions/stat.html
+	//   - A fs.FileInfo backed implementation sets atim, mtim and ctim to the
+	//     same value.
+	//   - Windows allows you to stat a closed directory.
+	Stat() (sys.Stat_t, Errno)
+
+	// Read attempts to read all bytes in the file into `buf`, and returns the
+	// count read even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not readable.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.Reader and `read` in POSIX, preferring semantics of
+	//     io.Reader. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/read.html
+	//   - Unlike io.Reader, there is no io.EOF returned on end-of-file. To
+	//     read the file completely, the caller must repeat until `n` is zero.
+	Read(buf []byte) (n int, errno Errno)
+
+	// Pread attempts to read all bytes in the file into `p`, starting at the
+	// offset `off`, and returns the count read even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not readable.
+	//   - EINVAL: the offset was negative.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.ReaderAt and `pread` in POSIX, preferring semantics
+	//     of io.ReaderAt. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/pread.html
+	//   - Unlike io.ReaderAt, there is no io.EOF returned on end-of-file. To
+	//     read the file completely, the caller must repeat until `n` is zero.
+	Pread(buf []byte, off int64) (n int, errno Errno)
+
+	// Seek attempts to set the next offset for Read or Write and returns the
+	// resulting absolute offset or an error.
+	//
+	// # Parameters
+	//
+	// The `offset` parameters is interpreted in terms of `whence`:
+	//   - io.SeekStart: relative to the start of the file, e.g. offset=0 sets
+	//     the next Read or Write to the beginning of the file.
+	//   - io.SeekCurrent: relative to the current offset, e.g. offset=16 sets
+	//     the next Read or Write 16 bytes past the prior.
+	//   - io.SeekEnd: relative to the end of the file, e.g. offset=-1 sets the
+	//     next Read or Write to the last byte in the file.
+	//
+	// # Behavior when a directory
+	//
+	// The only supported use case for a directory is seeking to `offset` zero
+	// (`whence` = io.SeekStart). This should have the same behavior as
+	// os.File, which resets any internal state used by Readdir.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not readable.
+	//   - EINVAL: the offset was negative.
+	//
+	// # Notes
+	//
+	//   - This is like io.Seeker and `fseek` in POSIX, preferring semantics
+	//     of io.Seeker. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/fseek.html
+	Seek(offset int64, whence int) (newOffset int64, errno Errno)
+
+	// Readdir reads the contents of the directory associated with file and
+	// returns a slice of up to n Dirent values in an arbitrary order. This is
+	// a stateful function, so subsequent calls return any next values.
+	//
+	// If n > 0, Readdir returns at most n entries or an error.
+	// If n <= 0, Readdir returns all remaining entries or an error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file was closed or not a directory.
+	//   - ENOENT: the directory could not be read (e.g. deleted).
+	//
+	// # Notes
+	//
+	//   - This is like `Readdir` on os.File, but unlike `readdir` in POSIX.
+	//     See https://pubs.opengroup.org/onlinepubs/9699919799/functions/readdir.html
+	//   - Unlike os.File, there is no io.EOF returned on end-of-directory. To
+	//     read the directory completely, the caller must repeat until the
+	//     count read (`len(dirents)`) is less than `n`.
+	//   - See /RATIONALE.md for design notes.
+	Readdir(n int) (dirents []Dirent, errno Errno)
+
+	// Write attempts to write all bytes in `p` to the file, and returns the
+	// count written even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file was closed, not writeable, or a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.Writer and `write` in POSIX, preferring semantics of
+	//     io.Writer. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/write.html
+	Write(buf []byte) (n int, errno Errno)
+
+	// Pwrite attempts to write all bytes in `p` to the file at the given
+	// offset `off`, and returns the count written even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not writeable.
+	//   - EINVAL: the offset was negative.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.WriterAt and `pwrite` in POSIX, preferring semantics
+	//     of io.WriterAt. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/pwrite.html
+	Pwrite(buf []byte, off int64) (n int, errno Errno)
+
+	// Truncate truncates a file to a specified length.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//   - EINVAL: the `size` is negative.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Ftruncate and `ftruncate` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/ftruncate.html
+	//   - Windows does not error when calling Truncate on a closed file.
+	Truncate(size int64) Errno
+
+	// Sync synchronizes changes to the file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Fsync and `fsync` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/fsync.html
+	//   - This returns with no error instead of ENOSYS when
+	//     unimplemented. This prevents fake filesystems from erring.
+	//   - Windows does not error when calling Sync on a closed file.
+	Sync() Errno
+
+	// Datasync synchronizes the data of a file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Fdatasync and `fdatasync` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/fdatasync.html
+	//   - This returns with no error instead of ENOSYS when
+	//     unimplemented. This prevents fake filesystems from erring.
+	//   - As this is commonly missing, some implementations dispatch to Sync.
+	Datasync() Errno
+
+	// Utimens set file access and modification times of this file, at
+	// nanosecond precision.
+	//
+	// # Parameters
+	//
+	// The `atim` and `mtim` parameters refer to access and modification time
+	// stamps as defined in sys.Stat_t. To retain one or the other, substitute
+	// it with the pseudo-timestamp UTIME_OMIT.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.UtimesNano and `futimens` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/futimens.html
+	//   - Windows requires files to be open with O_RDWR, which means you
+	//     cannot use this to update timestamps on a directory (EPERM).
+	Utimens(atim, mtim int64) Errno
+
+	// Close closes the underlying file.
+	//
+	// A zero Errno is returned if unimplemented or success.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Close and `close` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html
+	Close() Errno
+}
@@ -0,0 +1,292 @@
+package sys
+
+import (
+	"io/fs"
+
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// FS is a writeable fs.FS bridge backed by syscall functions needed for ABI
+// including WASI.
+//
+// Implementations should embed UnimplementedFS for forward compatibility. Any
+// unsupported method or parameter should return ENO
+//
+// # Errors
+//
+// All methods that can return an error return a Errno, which is zero
+// on success.
+//
+// Restricting to Errno matches current WebAssembly host functions,
+// which are constrained to well-known error codes. For example, WASI maps syscall
+// errors to u32 numeric values.
+//
+// # Notes
+//
+// A writable filesystem abstraction is not yet implemented as of Go 1.20. See
+// https://github.com/golang/go/issues/45757
+type FS interface {
+	// OpenFile opens a file. It should be closed via Close on File.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` or `flag` is invalid.
+	//   - EISDIR: the path was a directory, but flag included O_RDWR or
+	//     O_WRONLY
+	//   - ENOENT: `path` doesn't exist and `flag` doesn't contain O_CREAT.
+	//
+	// # Constraints on the returned file
+	//
+	// Implementations that can read flags should enforce them regardless of
+	// the type returned. For example, while os.File implements io.Writer,
+	// attempts to write to a directory or a file opened with O_RDONLY fail
+	// with a EBADF.
+	//
+	// Some implementations choose whether to enforce read-only opens, namely
+	// fs.FS. While fs.FS is supported (Adapt), wazero cannot runtime enforce
+	// open flags. Instead, we encourage good behavior and test our built-in
+	// implementations.
+	//
+	// # Notes
+	//
+	//   - This is like os.OpenFile, except the path is relative to this file
+	//     system, and Errno is returned instead of os.PathError.
+	//   - Implications of permissions when O_CREAT are described in Chmod notes.
+	//   - This is like `open` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/open.html
+	OpenFile(path string, flag Oflag, perm fs.FileMode) (File, Errno)
+
+	// Lstat gets file status without following symbolic links.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - ENOENT: `path` doesn't exist.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Lstat, except the `path` is relative to this
+	//     file system.
+	//   - This is like `lstat` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/lstat.html
+	//   - An fs.FileInfo backed implementation sets atim, mtim and ctim to the
+	//     same value.
+	//   - When the path is a symbolic link, the stat returned is for the link,
+	//     not the file it refers to.
+	Lstat(path string) (sys.Stat_t, Errno)
+
+	// Stat gets file status.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - ENOENT: `path` doesn't exist.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Stat, except the `path` is relative to this
+	//     file system.
+	//   - This is like `stat` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/stat.html
+	//   - An fs.FileInfo backed implementation sets atim, mtim and ctim to the
+	//     same value.
+	//   - When the path is a symbolic link, the stat returned is for the file
+	//     it refers to.
+	Stat(path string) (sys.Stat_t, Errno)
+
+	// Mkdir makes a directory.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - EEXIST: `path` exists and is a directory.
+	//   - ENOTDIR: `path` exists and is a file.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Mkdir, except the `path` is relative to this
+	//     file system.
+	//   - This is like `mkdir` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/mkdir.html
+	//   - Implications of permissions are described in Chmod notes.
+	Mkdir(path string, perm fs.FileMode) Errno
+
+	// Chmod changes the mode of the file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - ENOENT: `path` does not exist.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Chmod, except the `path` is relative to this
+	//     file system.
+	//   - This is like `chmod` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/chmod.html
+	//   - Windows ignores the execute bit, and any permissions come back as
+	//     group and world. For example, chmod of 0400 reads back as 0444, and
+	//     0700 0666. Also, permissions on directories aren't supported at all.
+	Chmod(path string, perm fs.FileMode) Errno
+
+	// Rename renames file or directory.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `from` or `to` is invalid.
+	//   - ENOENT: `from` or `to` don't exist.
+	//   - ENOTDIR: `from` is a directory and `to` exists as a file.
+	//   - EISDIR: `from` is a file and `to` exists as a directory.
+	//   - ENOTEMPTY: `both from` and `to` are existing directory, but
+	//    `to` is not empty.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Rename, except the paths are relative to this
+	//     file system.
+	//   - This is like `rename` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/rename.html
+	//   -  Windows doesn't let you overwrite an existing directory.
+	Rename(from, to string) Errno
+
+	// Rmdir removes a directory.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - ENOENT: `path` doesn't exist.
+	//   - ENOTDIR: `path` exists, but isn't a directory.
+	//   - ENOTEMPTY: `path` exists, but isn't empty.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Rmdir, except the `path` is relative to this
+	//     file system.
+	//   - This is like `rmdir` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/rmdir.html
+	//   - As of Go 1.19, Windows maps ENOTDIR to ENOENT.
+	Rmdir(path string) Errno
+
+	// Unlink removes a directory entry.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - ENOENT: `path` doesn't exist.
+	//   - EISDIR: `path` exists, but is a directory.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Unlink, except the `path` is relative to this
+	//     file system.
+	//   - This is like `unlink` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/unlink.html
+	//   - On Windows, syscall.Unlink doesn't delete symlink to directory unlike other platforms. Implementations might
+	//     want to combine syscall.RemoveDirectory with syscall.Unlink in order to delete such links on Windows.
+	//     See https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-removedirectorya
+	Unlink(path string) Errno
+
+	// Link creates a "hard" link from oldPath to newPath, in contrast to a
+	// soft link (via Symlink).
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EPERM: `oldPath` is invalid.
+	//   - ENOENT: `oldPath` doesn't exist.
+	//   - EISDIR: `newPath` exists, but is a directory.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Link, except the `oldPath` is relative to this
+	//     file system.
+	//   - This is like `link` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/link.html
+	Link(oldPath, newPath string) Errno
+
+	// Symlink creates a "soft" link from oldPath to newPath, in contrast to a
+	// hard link (via Link).
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EPERM: `oldPath` or `newPath` is invalid.
+	//   - EEXIST: `newPath` exists.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Symlink, except the `oldPath` is relative to
+	//     this file system.
+	//   - This is like `symlink` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/symlink.html
+	//   - Only `newPath` is relative to this file system and `oldPath` is kept
+	//     as-is. That is because the link is only resolved relative to the
+	//     directory when dereferencing it (e.g. ReadLink).
+	//     See https://github.com/bytecodealliance/cap-std/blob/v1.0.4/cap-std/src/fs/dir.rs#L404-L409
+	//     for how others implement this.
+	//   - Symlinks in Windows requires `SeCreateSymbolicLinkPrivilege`.
+	//     Otherwise, EPERM results.
+	//     See https://learn.microsoft.com/en-us/windows/security/threat-protection/security-policy-settings/create-symbolic-links
+	Symlink(oldPath, linkName string) Errno
+
+	// Readlink reads the contents of a symbolic link.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Readlink, except the path is relative to this
+	//     filesystem.
+	//   - This is like `readlink` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/readlink.html
+	//   - On Windows, the path separator is different from other platforms,
+	//     but to provide consistent results to Wasm, this normalizes to a "/"
+	//     separator.
+	Readlink(path string) (string, Errno)
+
+	// Utimens set file access and modification times on a path relative to
+	// this file system, at nanosecond precision.
+	//
+	// # Parameters
+	//
+	// If the path is a symbolic link, the target of expanding that link is
+	// updated.
+	//
+	// The `atim` and `mtim` parameters refer to access and modification time
+	// stamps as defined in sys.Stat_t. To retain one or the other, substitute
+	// it with the pseudo-timestamp UTIME_OMIT.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - EEXIST: `path` exists and is a directory.
+	//   - ENOTDIR: `path` exists and is a file.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.UtimesNano and `utimensat` with `AT_FDCWD` in
+	//     POSIX. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/futimens.html
+	Utimens(path string, atim, mtim int64) Errno
+}
@@ -0,0 +1,70 @@
+package sys
+
+// Oflag are flags used for FS.OpenFile. Values, including zero, should not be
+// interpreted numerically. Instead, use by constants prefixed with 'O_' with
+// special casing noted below.
+//
+// # Notes
+//
+//   - O_RDONLY, O_RDWR and O_WRONLY are mutually exclusive, while the other
+//     flags can coexist bitwise.
+//   - This is like `flag` in os.OpenFile and `oflag` in POSIX. See
+//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/open.html
+type Oflag uint32
+
+// This is a subset of oflags to reduce implementation burden. `wasip1` splits
+// these across `oflags` and `fdflags`. We can't rely on the Go `os` package,
+// as it is missing some values. Any flags added will be defined in POSIX
+// order, as needed by functions that explicitly document accepting them.
+//
+// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-oflags-flagsu16
+// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-fdflags-flagsu16
+const (
+	// O_RDONLY is like os.O_RDONLY
+	O_RDONLY Oflag = iota
+
+	// O_RDWR is like os.O_RDWR
+	O_RDWR
+
+	// O_WRONLY is like os.O_WRONLY
+	O_WRONLY
+
+	// Define bitflags as they are in POSIX `open`: alphabetically
+
+	// O_APPEND is like os.O_APPEND
+	O_APPEND Oflag = 1 << iota
+
+	// O_CREAT is link os.O_CREATE
+	O_CREAT
+
+	// O_DIRECTORY is defined on some platforms as syscall.O_DIRECTORY.
+	//
+	// Note: This ensures that the opened file is a directory. Those emulating
+	// on platforms that don't support the O_DIRECTORY, can double-check the
+	// result with File.IsDir (or stat) and err if not a directory.
+	O_DIRECTORY
+
+	// O_DSYNC is defined on some platforms as syscall.O_DSYNC.
+	O_DSYNC
+
+	// O_EXCL is defined on some platforms as syscall.O_EXCL.
+	O_EXCL
+
+	// O_NOFOLLOW is defined on some platforms as syscall.O_NOFOLLOW.
+	//
+	// Note: This allows programs to ensure that if the opened file is a
+	// symbolic link, the link itself is opened instead of its target.
+	O_NOFOLLOW
+
+	// O_NONBLOCK is defined on some platforms as syscall.O_NONBLOCK.
+	O_NONBLOCK
+
+	// O_RSYNC is defined on some platforms as syscall.O_RSYNC.
+	O_RSYNC
+
+	// O_SYNC is defined on some platforms as syscall.O_SYNC.
+	O_SYNC
+
+	// O_TRUNC is defined on some platforms as syscall.O_TRUNC.
+	O_TRUNC
+)
@@ -0,0 +1,106 @@
+//go:build !(plan9 || aix)
+
+package sys
+
+import "syscall"
+
+func syscallToErrno(err error) (Errno, bool) {
+	errno, ok := err.(syscall.Errno)
+	if !ok {
+		return 0, false
+	}
+	switch errno {
+	case 0:
+		return 0, true
+	case syscall.EACCES:
+		return EACCES, true
+	case syscall.EAGAIN:
+		return EAGAIN, true
+	case syscall.EBADF:
+		return EBADF, true
+	case syscall.EEXIST:
+		return EEXIST, true
+	case syscall.EFAULT:
+		return EFAULT, true
+	case syscall.EINTR:
+		return EINTR, true
+	case syscall.EINVAL:
+		return EINVAL, true
+	case syscall.EIO:
+		return EIO, true
+	case syscall.EISDIR:
+		return EISDIR, true
+	case syscall.ELOOP:
+		return ELOOP, true
+	case syscall.ENAMETOOLONG:
+		return ENAMETOOLONG, true
+	case syscall.ENOENT:
+		return ENOENT, true
+	case syscall.ENOSYS:
+		return ENOSYS, true
+	case syscall.ENOTDIR:
+		return ENOTDIR, true
+	case syscall.ERANGE:
+		return ERANGE, true
+	case syscall.ENOTEMPTY:
+		return ENOTEMPTY, true
+	case syscall.ENOTSOCK:
+		return ENOTSOCK, true
+	case syscall.ENOTSUP:
+		return ENOTSUP, true
+	case syscall.EPERM:
+		return EPERM, true
+	case syscall.EROFS:
+		return EROFS, true
+	default:
+		return EIO, true
+	}
+}
+
+// Unwrap is a convenience for runtime.GOOS which define syscall.Errno.
+func (e Errno) Unwrap() error {
+	switch e {
+	case 0:
+		return nil
+	case EACCES:
+		return syscall.EACCES
+	case EAGAIN:
+		return syscall.EAGAIN
+	case EBADF:
+		return syscall.EBADF
+	case EEXIST:
+		return syscall.EEXIST
+	case EFAULT:
+		return syscall.EFAULT
+	case EINTR:
+		return syscall.EINTR
+	case EINVAL:
+		return syscall.EINVAL
+	case EIO:
+		return syscall.EIO
+	case EISDIR:
+		return syscall.EISDIR
+	case ELOOP:
+		return syscall.ELOOP
+	case ENAMETOOLONG:
+		return syscall.ENAMETOOLONG
+	case ENOENT:
+		return syscall.ENOENT
+	case ENOSYS:
+		return syscall.ENOSYS
+	case ENOTDIR:
+		return syscall.ENOTDIR
+	case ENOTEMPTY:
+		return syscall.ENOTEMPTY
+	case ENOTSOCK:
+		return syscall.ENOTSOCK
+	case ENOTSUP:
+		return syscall.ENOTSUP
+	case EPERM:
+		return syscall.EPERM
+	case EROFS:
+		return syscall.EROFS
+	default:
+		return syscall.EIO
+	}
+}
@@ -0,0 +1,13 @@
+//go:build !windows
+
+package sys
+
+func errorToErrno(err error) Errno {
+	if errno, ok := err.(Errno); ok {
+		return errno
+	}
+	if errno, ok := syscallToErrno(err); ok {
+		return errno
+	}
+	return EIO
+}
@@ -0,0 +1,7 @@
+//go:build plan9 || aix
+
+package sys
+
+func syscallToErrno(err error) (Errno, bool) {
+	return 0, false
+}
@@ -0,0 +1,37 @@
+package sys
+
+import "golang.org/x/sys/windows"
+
+func errorToErrno(err error) Errno {
+	switch err := err.(type) {
+	case Errno:
+		return err
+	case windows.Errno:
+		// Note: In windows, _ERROR_PATH_NOT_FOUND(0x3) maps to syscall.ENOTDIR
+		switch err {
+		case windows.ERROR_ALREADY_EXISTS, windows.ERROR_FILE_EXISTS:
+			return EEXIST
+		case windows.ERROR_DIRECTORY:
+			// ERROR_DIRECTORY is returned by syscall.Rmdir.
+			return ENOTDIR
+		case windows.ERROR_DIR_NOT_EMPTY:
+			return ENOTEMPTY
+		case windows.ERROR_INVALID_HANDLE, windows.WSAENOTSOCK, windows.ERROR_ACCESS_DENIED:
+			// WSAENOTSOCK is returned by winsock_select when a given handle is not a socket.
+			// POSIX read and write functions expect EBADF, not EACCES when not
+			// open for reading or writing.
+			return EBADF
+		case windows.ERROR_PRIVILEGE_NOT_HELD:
+			return EPERM
+		case windows.ERROR_NEGATIVE_SEEK, windows.ERROR_NOT_A_REPARSE_POINT, windows.ERROR_INVALID_NAME:
+			// ERROR_NEGATIVE_SEEK is returned by os.Truncate.
+			// ERROR_NOT_A_REPARSE_POINT is returned by os.Readlink.
+			// ERROR_INVALID_NAME is returned by open when a file path has a trailing slash.
+			return EINVAL
+		}
+		errno, _ := syscallToErrno(err)
+		return errno
+	default:
+		return EIO
+	}
+}
@@ -0,0 +1,10 @@
+package sys
+
+import "math"
+
+// UTIME_OMIT is a special constant for use in updating times via FS.Utimens
+// or File.Utimens. When used for atim or mtim, the value is retained.
+//
+// Note: This may be implemented via a stat when the underlying filesystem
+// does not support this value.
+const UTIME_OMIT int64 = math.MinInt64
@@ -0,0 +1,160 @@
+package sys
+
+import (
+	"io/fs"
+
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// UnimplementedFS is an FS that returns ENOSYS for all functions,
+// This should be embedded to have forward compatible implementations.
+type UnimplementedFS struct{}
+
+// OpenFile implements FS.OpenFile
+func (UnimplementedFS) OpenFile(path string, flag Oflag, perm fs.FileMode) (File, Errno) {
+	return nil, ENOSYS
+}
+
+// Lstat implements FS.Lstat
+func (UnimplementedFS) Lstat(path string) (sys.Stat_t, Errno) {
+	return sys.Stat_t{}, ENOSYS
+}
+
+// Stat implements FS.Stat
+func (UnimplementedFS) Stat(path string) (sys.Stat_t, Errno) {
+	return sys.Stat_t{}, ENOSYS
+}
+
+// Readlink implements FS.Readlink
+func (UnimplementedFS) Readlink(path string) (string, Errno) {
+	return "", ENOSYS
+}
+
+// Mkdir implements FS.Mkdir
+func (UnimplementedFS) Mkdir(path string, perm fs.FileMode) Errno {
+	return ENOSYS
+}
+
+// Chmod implements FS.Chmod
+func (UnimplementedFS) Chmod(path string, perm fs.FileMode) Errno {
+	return ENOSYS
+}
+
+// Rename implements FS.Rename
+func (UnimplementedFS) Rename(from, to string) Errno {
+	return ENOSYS
+}
+
+// Rmdir implements FS.Rmdir
+func (UnimplementedFS) Rmdir(path string) Errno {
+	return ENOSYS
+}
+
+// Link implements FS.Link
+func (UnimplementedFS) Link(_, _ string) Errno {
+	return ENOSYS
+}
+
+// Symlink implements FS.Symlink
+func (UnimplementedFS) Symlink(_, _ string) Errno {
+	return ENOSYS
+}
+
+// Unlink implements FS.Unlink
+func (UnimplementedFS) Unlink(path string) Errno {
+	return ENOSYS
+}
+
+// Utimens implements FS.Utimens
+func (UnimplementedFS) Utimens(path string, atim, mtim int64) Errno {
+	return ENOSYS
+}
+
+// UnimplementedFile is a File that returns ENOSYS for all functions,
+// except where no-op are otherwise documented.
+//
+// This should be embedded to have forward compatible implementations.
+type UnimplementedFile struct{}
+
+// Dev implements File.Dev
+func (UnimplementedFile) Dev() (uint64, Errno) {
+	return 0, 0
+}
+
+// Ino implements File.Ino
+func (UnimplementedFile) Ino() (sys.Inode, Errno) {
+	return 0, 0
+}
+
+// IsDir implements File.IsDir
+func (UnimplementedFile) IsDir() (bool, Errno) {
+	return false, 0
+}
+
+// IsAppend implements File.IsAppend
+func (UnimplementedFile) IsAppend() bool {
+	return false
+}
+
+// SetAppend implements File.SetAppend
+func (UnimplementedFile) SetAppend(bool) Errno {
+	return ENOSYS
+}
+
+// Stat implements File.Stat
+func (UnimplementedFile) Stat() (sys.Stat_t, Errno) {
+	return sys.Stat_t{}, ENOSYS
+}
+
+// Read implements File.Read
+func (UnimplementedFile) Read([]byte) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Pread implements File.Pread
+func (UnimplementedFile) Pread([]byte, int64) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Seek implements File.Seek
+func (UnimplementedFile) Seek(int64, int) (int64, Errno) {
+	return 0, ENOSYS
+}
+
+// Readdir implements File.Readdir
+func (UnimplementedFile) Readdir(int) (dirents []Dirent, errno Errno) {
+	return nil, ENOSYS
+}
+
+// Write implements File.Write
+func (UnimplementedFile) Write([]byte) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Pwrite implements File.Pwrite
+func (UnimplementedFile) Pwrite([]byte, int64) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Truncate implements File.Truncate
+func (UnimplementedFile) Truncate(int64) Errno {
+	return ENOSYS
+}
+
+// Sync implements File.Sync
+func (UnimplementedFile) Sync() Errno {
+	return 0 // not ENOSYS
+}
+
+// Datasync implements File.Datasync
+func (UnimplementedFile) Datasync() Errno {
+	return 0 // not ENOSYS
+}
+
+// Utimens implements File.Utimens
+func (UnimplementedFile) Utimens(int64, int64) Errno {
+	return ENOSYS
+}
+
+// Close implements File.Close
+func (UnimplementedFile) Close() (errno Errno) { return }
@@ -0,0 +1,213 @@
+package wazero
+
+import (
+	"io/fs"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/sys"
+	"github.com/tetratelabs/wazero/internal/sysfs"
+)
+
+// FSConfig configures filesystem paths the embedding host allows the wasm
+// guest to access. Unconfigured paths are not allowed, so functions like
+// `path_open` result in unsupported errors (e.g. syscall.ENOSYS).
+//
+// # Guest Path
+//
+// `guestPath` is the name of the path the guest should use a filesystem for, or
+// empty for any files.
+//
+// All `guestPath` paths are normalized, specifically removing any leading or
+// trailing slashes. This means "/", "./" or "." all coerce to empty "".
+//
+// Multiple `guestPath` values can be configured, but the last longest match
+// wins. For example, if "tmp", then "" were added, a request to open
+// "tmp/foo.txt" use the filesystem associated with "tmp" even though a wider
+// path, "" (all files), was added later.
+//
+// A `guestPath` of "." coerces to the empty string "" because the current
+// directory is handled by the guest. In other words, the guest resolves ites
+// current directory prior to requesting files.
+//
+// More notes on `guestPath`
+//   - Working directories are typically tracked in wasm, though possible some
+//     relative paths are requested. For example, TinyGo may attempt to resolve
+//     a path "../.." in unit tests.
+//   - Zig uses the first path name it sees as the initial working directory of
+//     the process.
+//
+// # Scope
+//
+// Configuration here is module instance scoped. This means you can use the
+// same configuration for multiple calls to Runtime.InstantiateModule. Each
+// module will have a different file descriptor table. Any errors accessing
+// resources allowed here are deferred to instantiation time of each module.
+//
+// Any host resources present at the time of configuration, but deleted before
+// Runtime.InstantiateModule will trap/panic when the guest wasm initializes or
+// calls functions like `fd_read`.
+//
+// # Windows
+//
+// While wazero supports Windows as a platform, all known compilers use POSIX
+// conventions at runtime. For example, even when running on Windows, paths
+// used by wasm are separated by forward slash (/), not backslash (\).
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - FSConfig is immutable. Each WithXXX function returns a new instance
+//     including the corresponding change.
+//   - RATIONALE.md includes design background and relationship to WebAssembly
+//     System Interfaces (WASI).
+type FSConfig interface {
+	// WithDirMount assigns a directory at `dir` to any paths beginning at
+	// `guestPath`.
+	//
+	// For example, `dirPath` as / (or c:\ in Windows), makes the entire host
+	// volume writeable to the path on the guest. The `guestPath` is always a
+	// POSIX style path, slash (/) delimited, even if run on Windows.
+	//
+	// If the same `guestPath` was assigned before, this overrides its value,
+	// retaining the original precedence. See the documentation of FSConfig for
+	// more details on `guestPath`.
+	//
+	// # Isolation
+	//
+	// The guest will have full access to this directory including escaping it
+	// via relative path lookups like "../../". Full access includes operations
+	// such as creating or deleting files, limited to any host level access
+	// controls.
+	//
+	// # os.DirFS
+	//
+	// This configuration optimizes for WASI compatibility which is sometimes
+	// at odds with the behavior of os.DirFS. Hence, this will not behave
+	// exactly the same as os.DirFS. See /RATIONALE.md for more.
+	WithDirMount(dir, guestPath string) FSConfig
+
+	// WithReadOnlyDirMount assigns a directory at `dir` to any paths
+	// beginning at `guestPath`.
+	//
+	// This is the same as WithDirMount except only read operations are
+	// permitted. However, escaping the directory via relative path lookups
+	// like "../../" is still allowed.
+	WithReadOnlyDirMount(dir, guestPath string) FSConfig
+
+	// WithFSMount assigns a fs.FS file system for any paths beginning at
+	// `guestPath`.
+	//
+	// If the same `guestPath` was assigned before, this overrides its value,
+	// retaining the original precedence. See the documentation of FSConfig for
+	// more details on `guestPath`.
+	//
+	// # Isolation
+	//
+	// fs.FS does not restrict the ability to overwrite returned files via
+	// io.Writer. Moreover, os.DirFS documentation includes important notes
+	// about isolation, which also applies to fs.Sub. As of Go 1.19, the
+	// built-in file-systems are not jailed (chroot). See
+	// https://github.com/golang/go/issues/42322
+	//
+	// # os.DirFS
+	//
+	// Due to limited control and functionality available in os.DirFS, we
+	// advise using WithDirMount instead. There will be behavior differences
+	// between os.DirFS and WithDirMount, as the latter biases towards what's
+	// expected from WASI implementations.
+	//
+	// # Custom fs.FileInfo
+	//
+	// The underlying implementation supports data not usually in fs.FileInfo
+	// when `info.Sys` returns *sys.Stat_t. For example, a custom fs.FS can use
+	// this approach to generate or mask sys.Inode data. Such a filesystem
+	// needs to decorate any functions that can return fs.FileInfo:
+	//
+	//   - `Stat` as defined on `fs.File` (always)
+	//   - `Readdir` as defined on `os.File` (if defined)
+	//
+	// See sys.NewStat_t for examples.
+	WithFSMount(fs fs.FS, guestPath string) FSConfig
+}
+
+type fsConfig struct {
+	// fs are the currently configured filesystems.
+	fs []experimentalsys.FS
+	// guestPaths are the user-supplied names of the filesystems, retained for
+	// error messages and fmt.Stringer.
+	guestPaths []string
+	// guestPathToFS are the normalized paths to the currently configured
+	// filesystems, used for de-duplicating.
+	guestPathToFS map[string]int
+}
+
+// NewFSConfig returns a FSConfig that can be used for configuring module instantiation.
+func NewFSConfig() FSConfig {
+	return &fsConfig{guestPathToFS: map[string]int{}}
+}
+
+// clone makes a deep copy of this module config.
+func (c *fsConfig) clone() *fsConfig {
+	ret := *c // copy except slice and maps which share a ref
+	ret.fs = make([]experimentalsys.FS, 0, len(c.fs))
+	ret.fs = append(ret.fs, c.fs...)
+	ret.guestPaths = make([]string, 0, len(c.guestPaths))
+	ret.guestPaths = append(ret.guestPaths, c.guestPaths...)
+	ret.guestPathToFS = make(map[string]int, len(c.guestPathToFS))
+	for key, value := range c.guestPathToFS {
+		ret.guestPathToFS[key] = value
+	}
+	return &ret
+}
+
+// WithDirMount implements FSConfig.WithDirMount
+func (c *fsConfig) WithDirMount(dir, guestPath string) FSConfig {
+	return c.WithSysFSMount(sysfs.DirFS(dir), guestPath)
+}
+
+// WithReadOnlyDirMount implements FSConfig.WithReadOnlyDirMount
+func (c *fsConfig) WithReadOnlyDirMount(dir, guestPath string) FSConfig {
+	return c.WithSysFSMount(&sysfs.ReadFS{FS: sysfs.DirFS(dir)}, guestPath)
+}
+
+// WithFSMount implements FSConfig.WithFSMount
+func (c *fsConfig) WithFSMount(fs fs.FS, guestPath string) FSConfig {
+	var adapted experimentalsys.FS
+	if fs != nil {
+		adapted = &sysfs.AdaptFS{FS: fs}
+	}
+	return c.WithSysFSMount(adapted, guestPath)
+}
+
+// WithSysFSMount implements sysfs.FSConfig
+func (c *fsConfig) WithSysFSMount(fs experimentalsys.FS, guestPath string) FSConfig {
+	if _, ok := fs.(experimentalsys.UnimplementedFS); ok {
+		return c // don't add fake paths.
+	}
+	cleaned := sys.StripPrefixesAndTrailingSlash(guestPath)
+	ret := c.clone()
+	if i, ok := ret.guestPathToFS[cleaned]; ok {
+		ret.fs[i] = fs
+		ret.guestPaths[i] = guestPath
+	} else if fs != nil {
+		ret.guestPathToFS[cleaned] = len(ret.fs)
+		ret.fs = append(ret.fs, fs)
+		ret.guestPaths = append(ret.guestPaths, guestPath)
+	}
+	return ret
+}
+
+// preopens returns the possible nil index-correlated preopened filesystems
+// with guest paths.
+func (c *fsConfig) preopens() ([]experimentalsys.FS, []string) {
+	preopenCount := len(c.fs)
+	if preopenCount == 0 {
+		return nil, nil
+	}
+	fs := make([]experimentalsys.FS, len(c.fs))
+	copy(fs, c.fs)
+	guestPaths := make([]string, len(c.guestPaths))
+	copy(guestPaths, c.guestPaths)
+	return fs, guestPaths
+}
@@ -0,0 +1,97 @@
+package wasi_snapshot_preview1
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/wasip1"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// argsGet is the WASI function named ArgsGetName that reads command-line
+// argument data.
+//
+// # Parameters
+//
+//   - argv: offset to begin writing argument offsets in uint32 little-endian
+//     encoding to api.Memory
+//   - argsSizesGet result argc * 4 bytes are written to this offset
+//   - argvBuf: offset to write the null terminated arguments to api.Memory
+//   - argsSizesGet result argv_len bytes are written to this offset
+//
+// Result (Errno)
+//
+// The return value is ErrnoSuccess except the following error conditions:
+//   - sys.EFAULT: there is not enough memory to write results
+//
+// For example, if argsSizesGet wrote argc=2 and argvLen=5 for arguments:
+// "a" and "bc" parameters argv=7 and argvBuf=1, this function writes the below
+// to api.Memory:
+//
+//	                   argvLen          uint32le    uint32le
+//	            +----------------+     +--------+  +--------+
+//	            |                |     |        |  |        |
+//	 []byte{?, 'a', 0, 'b', 'c', 0, ?, 1, 0, 0, 0, 3, 0, 0, 0, ?}
+//	argvBuf --^                      ^           ^
+//	                          argv --|           |
+//	        offset that begins "a" --+           |
+//	                   offset that begins "bc" --+
+//
+// See argsSizesGet
+// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#args_get
+// See https://en.wikipedia.org/wiki/Null-terminated_string
+var argsGet = newHostFunc(wasip1.ArgsGetName, argsGetFn, []api.ValueType{i32, i32}, "argv", "argv_buf")
+
+func argsGetFn(_ context.Context, mod api.Module, params []uint64) sys.Errno {
+	sysCtx := mod.(*wasm.ModuleInstance).Sys
+	argv, argvBuf := uint32(params[0]), uint32(params[1])
+	return writeOffsetsAndNullTerminatedValues(mod.Memory(), sysCtx.Args(), argv, argvBuf, sysCtx.ArgsSize())
+}
+
+// argsSizesGet is the WASI function named ArgsSizesGetName that reads
+// command-line argument sizes.
+//
+// # Parameters
+//
+//   - resultArgc: offset to write the argument count to api.Memory
+//   - resultArgvLen: offset to write the null-terminated argument length to
+//     api.Memory
+//
+// Result (Errno)
+//
+// The return value is ErrnoSuccess except the following error conditions:
+//   - sys.EFAULT: there is not enough memory to write results
+//
+// For example, if args are "a", "bc" and parameters resultArgc=1 and
+// resultArgvLen=6, this function writes the below to api.Memory:
+//
+//	                uint32le       uint32le
+//	               +--------+     +--------+
+//	               |        |     |        |
+//	     []byte{?, 2, 0, 0, 0, ?, 5, 0, 0, 0, ?}
+//	  resultArgc --^              ^
+//	      2 args --+              |
+//	              resultArgvLen --|
+//	len([]byte{'a',0,'b',c',0}) --+
+//
+// See argsGet
+// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#args_sizes_get
+// See https://en.wikipedia.org/wiki/Null-terminated_string
+var argsSizesGet = newHostFunc(wasip1.ArgsSizesGetName, argsSizesGetFn, []api.ValueType{i32, i32}, "result.argc", "result.argv_len")
+
+func argsSizesGetFn(_ context.Context, mod api.Module, params []uint64) sys.Errno {
+	sysCtx := mod.(*wasm.ModuleInstance).Sys
+	mem := mod.Memory()
+	resultArgc, resultArgvLen := uint32(params[0]), uint32(params[1])
+
+	// argc and argv_len offsets are not necessarily sequential, so we have to
+	// write them independently.
+	if !mem.WriteUint32Le(resultArgc, uint32(len(sysCtx.Args()))) {
+		return sys.EFAULT
+	}
+	if !mem.WriteUint32Le(resultArgvLen, sysCtx.ArgsSize()) {
+		return sys.EFAULT
+	}
+	return 0
+}
@@ -0,0 +1,116 @@
+package wasi_snapshot_preview1
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/wasip1"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// clockResGet is the WASI function named ClockResGetName that returns the
+// resolution of time values returned by clockTimeGet.
+//
+// # Parameters
+//
+//   - id: clock ID to use
+//   - resultResolution: offset to write the resolution to api.Memory
+//   - the resolution is an uint64 little-endian encoding
+//
+// Result (Errno)
+//
+// The return value is 0 except the following error conditions:
+//   - sys.ENOTSUP: the clock ID is not supported.
+//   - sys.EINVAL: the clock ID is invalid.
+//   - sys.EFAULT: there is not enough memory to write results
+//
+// For example, if the resolution is 100ns, this function writes the below to
+// api.Memory:
+//
+//	                                   uint64le
+//	                   +-------------------------------------+
+//	                   |                                     |
+//	         []byte{?, 0x64, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, ?}
+//	resultResolution --^
+//
+// Note: This is similar to `clock_getres` in POSIX.
+// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-clock_res_getid-clockid---errno-timestamp
+// See https://linux.die.net/man/3/clock_getres
+var clockResGet = newHostFunc(wasip1.ClockResGetName, clockResGetFn, []api.ValueType{i32, i32}, "id", "result.resolution")
+
+func clockResGetFn(_ context.Context, mod api.Module, params []uint64) sys.Errno {
+	sysCtx := mod.(*wasm.ModuleInstance).Sys
+	id, resultResolution := uint32(params[0]), uint32(params[1])
+
+	var resolution uint64 // ns
+	switch id {
+	case wasip1.ClockIDRealtime:
+		resolution = uint64(sysCtx.WalltimeResolution())
+	case wasip1.ClockIDMonotonic:
+		resolution = uint64(sysCtx.NanotimeResolution())
+	default:
+		return sys.EINVAL
+	}
+
+	if !mod.Memory().WriteUint64Le(resultResolution, resolution) {
+		return sys.EFAULT
+	}
+	return 0
+}
+
+// clockTimeGet is the WASI function named ClockTimeGetName that returns
+// the time value of a name (time.Now).
+//
+// # Parameters
+//
+//   - id: clock ID to use
+//   - precision: maximum lag (exclusive) that the returned time value may have,
+//     compared to its actual value
+//   - resultTimestamp: offset to write the timestamp to api.Memory
+//   - the timestamp is epoch nanos encoded as a little-endian uint64
+//
+// Result (Errno)
+//
+// The return value is 0 except the following error conditions:
+//   - sys.ENOTSUP: the clock ID is not supported.
+//   - sys.EINVAL: the clock ID is invalid.
+//   - sys.EFAULT: there is not enough memory to write results
+//
+// For example, if time.Now returned exactly midnight UTC 2022-01-01
+// (1640995200000000000), and parameters resultTimestamp=1, this function
+// writes the below to api.Memory:
+//
+//	                                    uint64le
+//	                  +------------------------------------------+
+//	                  |                                          |
+//	        []byte{?, 0x0, 0x0, 0x1f, 0xa6, 0x70, 0xfc, 0xc5, 0x16, ?}
+//	resultTimestamp --^
+//
+// Note: This is similar to `clock_gettime` in POSIX.
+// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-clock_time_getid-clockid-precision-timestamp---errno-timestamp
+// See https://linux.die.net/man/3/clock_gettime
+var clockTimeGet = newHostFunc(wasip1.ClockTimeGetName, clockTimeGetFn, []api.ValueType{i32, i64, i32}, "id", "precision", "result.timestamp")
+
+func clockTimeGetFn(_ context.Context, mod api.Module, params []uint64) sys.Errno {
+	sysCtx := mod.(*wasm.ModuleInstance).Sys
+	id := uint32(params[0])
+	// TODO: precision is currently ignored.
+	// precision = params[1]
+	resultTimestamp := uint32(params[2])
+
+	var val int64
+	switch id {
+	case wasip1.ClockIDRealtime:
+		val = sysCtx.WalltimeNanos()
+	case wasip1.ClockIDMonotonic:
+		val = sysCtx.Nanotime()
+	default:
+		return sys.EINVAL
+	}
+
+	if !mod.Memory().WriteUint64Le(resultTimestamp, uint64(val)) {
+		return sys.EFAULT
+	}
+	return 0
+}
@@ -0,0 +1,100 @@
+package wasi_snapshot_preview1
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/wasip1"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// environGet is the WASI function named EnvironGetName that reads
+// environment variables.
+//
+// # Parameters
+//
+//   - environ: offset to begin writing environment offsets in uint32
+//     little-endian encoding to api.Memory
+//   - environSizesGet result environc * 4 bytes are written to this offset
+//   - environBuf: offset to write the null-terminated variables to api.Memory
+//   - the format is like os.Environ: null-terminated "key=val" entries
+//   - environSizesGet result environLen bytes are written to this offset
+//
+// Result (Errno)
+//
+// The return value is 0 except the following error conditions:
+//   - sys.EFAULT: there is not enough memory to write results
+//
+// For example, if environSizesGet wrote environc=2 and environLen=9 for
+// environment variables: "a=b", "b=cd" and parameters environ=11 and
+// environBuf=1, this function writes the below to api.Memory:
+//
+//	                              environLen                 uint32le    uint32le
+//	             +------------------------------------+     +--------+  +--------+
+//	             |                                    |     |        |  |        |
+//	  []byte{?, 'a', '=', 'b', 0, 'b', '=', 'c', 'd', 0, ?, 1, 0, 0, 0, 5, 0, 0, 0, ?}
+//	environBuf --^                                          ^           ^
+//	                             environ offset for "a=b" --+           |
+//	                                        environ offset for "b=cd" --+
+//
+// See environSizesGet
+// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#environ_get
+// See https://en.wikipedia.org/wiki/Null-terminated_string
+var environGet = newHostFunc(wasip1.EnvironGetName, environGetFn, []api.ValueType{i32, i32}, "environ", "environ_buf")
+
+func environGetFn(_ context.Context, mod api.Module, params []uint64) sys.Errno {
+	sysCtx := mod.(*wasm.ModuleInstance).Sys
+	environ, environBuf := uint32(params[0]), uint32(params[1])
+
+	return writeOffsetsAndNullTerminatedValues(mod.Memory(), sysCtx.Environ(), environ, environBuf, sysCtx.EnvironSize())
+}
+
+// environSizesGet is the WASI function named EnvironSizesGetName that
+// reads environment variable sizes.
+//
+// # Parameters
+//
+//   - resultEnvironc: offset to write the count of environment variables to
+//     api.Memory
+//   - resultEnvironvLen: offset to write the null-terminated environment
+//     variable length to api.Memory
+//
+// Result (Errno)
+//
+// The return value is 0 except the following error conditions:
+//   - sys.EFAULT: there is not enough memory to write results
+//
+// For example, if environ are "a=b","b=cd" and parameters resultEnvironc=1 and
+// resultEnvironvLen=6, this function writes the below to api.Memory:
+//
+//	                   uint32le       uint32le
+//	                  +--------+     +--------+
+//	                  |        |     |        |
+//	        []byte{?, 2, 0, 0, 0, ?, 9, 0, 0, 0, ?}
+//	 resultEnvironc --^              ^
+//		2 variables --+              |
+//	             resultEnvironvLen --|
+//	    len([]byte{'a','=','b',0,    |
+//	           'b','=','c','d',0}) --+
+//
+// See environGet
+// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#environ_sizes_get
+// and https://en.wikipedia.org/wiki/Null-terminated_string
+var environSizesGet = newHostFunc(wasip1.EnvironSizesGetName, environSizesGetFn, []api.ValueType{i32, i32}, "result.environc", "result.environv_len")
+
+func environSizesGetFn(_ context.Context, mod api.Module, params []uint64) sys.Errno {
+	sysCtx := mod.(*wasm.ModuleInstance).Sys
+	mem := mod.Memory()
+	resultEnvironc, resultEnvironvLen := uint32(params[0]), uint32(params[1])
+
+	// environc and environv_len offsets are not necessarily sequential, so we
+	// have to write them independently.
+	if !mem.WriteUint32Le(resultEnvironc, uint32(len(sysCtx.Environ()))) {
+		return sys.EFAULT
+	}
+	if !mem.WriteUint32Le(resultEnvironvLen, sysCtx.EnvironSize()) {
+		return sys.EFAULT
+	}
+	return 0
+}
@@ -0,0 +1,237 @@
+package wasi_snapshot_preview1
+
+import (
+	"context"
+	"time"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/fsapi"
+	internalsys "github.com/tetratelabs/wazero/internal/sys"
+	"github.com/tetratelabs/wazero/internal/wasip1"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// pollOneoff is the WASI function named PollOneoffName that concurrently
+// polls for the occurrence of a set of events.
+//
+// # Parameters
+//
+//   - in: pointer to the subscriptions (48 bytes each)
+//   - out: pointer to the resulting events (32 bytes each)
+//   - nsubscriptions: count of subscriptions, zero returns sys.EINVAL.
+//   - resultNevents: count of events.
+//
+// Result (Errno)
+//
+// The return value is 0 except the following error conditions:
+//   - sys.EINVAL: the parameters are invalid
+//   - sys.ENOTSUP: a parameters is valid, but not yet supported.
+//   - sys.EFAULT: there is not enough memory to read the subscriptions or
+//     write results.
+//
+// # Notes
+//
+//   - Since the `out` pointer nests Errno, the result is always 0.
+//   - This is similar to `poll` in POSIX.
+//
+// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#poll_oneoff
+// See https://linux.die.net/man/3/poll
+var pollOneoff = newHostFunc(
+	wasip1.PollOneoffName, pollOneoffFn,
+	[]api.ValueType{i32, i32, i32, i32},
+	"in", "out", "nsubscriptions", "result.nevents",
+)
+
+type event struct {
+	eventType byte
+	userData  []byte
+	errno     wasip1.Errno
+}
+
+func pollOneoffFn(_ context.Context, mod api.Module, params []uint64) sys.Errno {
+	in := uint32(params[0])
+	out := uint32(params[1])
+	nsubscriptions := uint32(params[2])
+	resultNevents := uint32(params[3])
+
+	if nsubscriptions == 0 {
+		return sys.EINVAL
+	}
+
+	mem := mod.Memory()
+
+	// Ensure capacity prior to the read loop to reduce error handling.
+	inBuf, ok := mem.Read(in, nsubscriptions*48)
+	if !ok {
+		return sys.EFAULT
+	}
+	outBuf, ok := mem.Read(out, nsubscriptions*32)
+	// zero-out all buffer before writing
+	clear(outBuf)
+
+	if !ok {
+		return sys.EFAULT
+	}
+
+	// Eagerly write the number of events which will equal subscriptions unless
+	// there's a fault in parsing (not processing).
+	if !mod.Memory().WriteUint32Le(resultNevents, nsubscriptions) {
+		return sys.EFAULT
+	}
+
+	// Loop through all subscriptions and write their output.
+
+	// Extract FS context, used in the body of the for loop for FS access.
+	fsc := mod.(*wasm.ModuleInstance).Sys.FS()
+	// Slice of events that are processed out of the loop (blocking stdin subscribers).
+	var blockingStdinSubs []*event
+	// The timeout is initialized at max Duration, the loop will find the minimum.
+	var timeout time.Duration = 1<<63 - 1
+	// Count of all the subscriptions that have been already written back to outBuf.
+	// nevents*32 returns at all times the offset where the next event should be written:
+	// this way we ensure that there are no gaps between records.
+	nevents := uint32(0)
+
+	// Layout is subscription_u: Union
+	// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#subscription_u
+	for i := uint32(0); i < nsubscriptions; i++ {
+		inOffset := i * 48
+		outOffset := nevents * 32
+
+		eventType := inBuf[inOffset+8] // +8 past userdata
+		// +8 past userdata +8 contents_offset
+		argBuf := inBuf[inOffset+8+8:]
+		userData := inBuf[inOffset : inOffset+8]
+
+		evt := &event{
+			eventType: eventType,
+			userData:  userData,
+			errno:     wasip1.ErrnoSuccess,
+		}
+
+		switch eventType {
+		case wasip1.EventTypeClock: // handle later
+			newTimeout, err := processClockEvent(argBuf)
+			if err != 0 {
+				return err
+			}
+			// Min timeout.
+			if newTimeout < timeout {
+				timeout = newTimeout
+			}
+			// Ack the clock event to the outBuf.
+			writeEvent(outBuf[outOffset:], evt)
+			nevents++
+		case wasip1.EventTypeFdRead:
+			fd := int32(le.Uint32(argBuf))
+			if fd < 0 {
+				return sys.EBADF
+			}
+			if file, ok := fsc.LookupFile(fd); !ok {
+				evt.errno = wasip1.ErrnoBadf
+				writeEvent(outBuf[outOffset:], evt)
+				nevents++
+			} else if fd != internalsys.FdStdin && file.File.IsNonblock() {
+				writeEvent(outBuf[outOffset:], evt)
+				nevents++
+			} else {
+				// if the fd is Stdin, and it is in blocking mode,
+				// do not ack yet, append to a slice for delayed evaluation.
+				blockingStdinSubs = append(blockingStdinSubs, evt)
+			}
+		case wasip1.EventTypeFdWrite:
+			fd := int32(le.Uint32(argBuf))
+			if fd < 0 {
+				return sys.EBADF
+			}
+			if _, ok := fsc.LookupFile(fd); ok {
+				evt.errno = wasip1.ErrnoNotsup
+			} else {
+				evt.errno = wasip1.ErrnoBadf
+			}
+			nevents++
+			writeEvent(outBuf[outOffset:], evt)
+		default:
+			return sys.EINVAL
+		}
+	}
+
+	sysCtx := mod.(*wasm.ModuleInstance).Sys
+	if nevents == nsubscriptions {
+		// We already wrote back all the results. We already wrote this number
+		// earlier to offset `resultNevents`.
+		// We only need to observe the timeout (nonzero if there are clock subscriptions)
+		// and return.
+		if timeout > 0 {
+			sysCtx.Nanosleep(int64(timeout))
+		}
+		return 0
+	}
+
+	// If there are blocking stdin subscribers, check for data with given timeout.
+	stdin, ok := fsc.LookupFile(internalsys.FdStdin)
+	if !ok {
+		return sys.EBADF
+	}
+	// Wait for the timeout to expire, or for some data to become available on Stdin.
+
+	if stdinReady, errno := stdin.File.Poll(fsapi.POLLIN, int32(timeout.Milliseconds())); errno != 0 {
+		return errno
+	} else if stdinReady {
+		// stdin has data ready to for reading, write back all the events
+		for i := range blockingStdinSubs {
+			evt := blockingStdinSubs[i]
+			evt.errno = 0
+			writeEvent(outBuf[nevents*32:], evt)
+			nevents++
+		}
+	}
+
+	if nevents != nsubscriptions {
+		if !mod.Memory().WriteUint32Le(resultNevents, nevents) {
+			return sys.EFAULT
+		}
+	}
+
+	return 0
+}
+
+// processClockEvent supports only relative name events, as that's what's used
+// to implement sleep in various compilers including Rust, Zig and TinyGo.
+func processClockEvent(inBuf []byte) (time.Duration, sys.Errno) {
+	_ /* ID */ = le.Uint32(inBuf[0:8])          // See below
+	timeout := le.Uint64(inBuf[8:16])           // nanos if relative
+	_ /* precision */ = le.Uint64(inBuf[16:24]) // Unused
+	flags := le.Uint16(inBuf[24:32])
+
+	var err sys.Errno
+	// subclockflags has only one flag defined:  subscription_clock_abstime
+	switch flags {
+	case 0: // relative time
+	case 1: // subscription_clock_abstime
+		err = sys.ENOTSUP
+	default: // subclockflags has only one flag defined.
+		err = sys.EINVAL
+	}
+
+	if err != 0 {
+		return 0, err
+	} else {
+		// https://linux.die.net/man/3/clock_settime says relative timers are
+		// unaffected. Since this function only supports relative timeout, we can
+		// skip name ID validation and use a single sleep function.
+
+		return time.Duration(timeout), 0
+	}
+}
+
+// writeEvent writes the event corresponding to the processed subscription.
+// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-event-struct
+func writeEvent(outBuf []byte, evt *event) {
+	copy(outBuf, evt.userData)  // userdata
+	outBuf[8] = byte(evt.errno) // uint16, but safe as < 255
+	outBuf[9] = 0
+	le.PutUint32(outBuf[10:], uint32(evt.eventType))
+	// TODO: When FD events are supported, write outOffset+16
+}
@@ -0,0 +1,44 @@
+package wasi_snapshot_preview1
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/wasip1"
+	"github.com/tetratelabs/wazero/internal/wasm"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// procExit is the WASI function named ProcExitName that terminates the
+// execution of the module with an exit code. The only successful exit code is
+// zero.
+//
+// # Parameters
+//
+//   - exitCode: exit code.
+//
+// See https://github.com/WebAssembly/WASI/blob/main/phases/snapshot/docs.md#proc_exit
+var procExit = &wasm.HostFunc{
+	ExportName: wasip1.ProcExitName,
+	Name:       wasip1.ProcExitName,
+	ParamTypes: []api.ValueType{i32},
+	ParamNames: []string{"rval"},
+	Code:       wasm.Code{GoFunc: api.GoModuleFunc(procExitFn)},
+}
+
+func procExitFn(ctx context.Context, mod api.Module, params []uint64) {
+	exitCode := uint32(params[0])
+
+	// Ensure other callers see the exit code.
+	_ = mod.CloseWithExitCode(ctx, exitCode)
+
+	// Prevent any code from executing after this function. For example, LLVM
+	// inserts unreachable instructions after calls to exit.
+	// See: https://github.com/emscripten-core/emscripten/issues/12322
+	panic(sys.NewExitError(exitCode))
+}
+
+// procRaise is stubbed and will never be supported, as it was removed.
+//
+// See https://github.com/WebAssembly/WASI/pull/136
+var procRaise = stubFunction(wasip1.ProcRaiseName, []api.ValueType{i32}, "sig")
@@ -0,0 +1,55 @@
+package wasi_snapshot_preview1
+
+import (
+	"context"
+	"io"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/wasip1"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// randomGet is the WASI function named RandomGetName which writes random
+// data to a buffer.
+//
+// # Parameters
+//
+//   - buf: api.Memory offset to write random values
+//   - bufLen: size of random data in bytes
+//
+// Result (Errno)
+//
+// The return value is ErrnoSuccess except the following error conditions:
+//   - sys.EFAULT: `buf` or `bufLen` point to an offset out of memory
+//   - sys.EIO: a file system error
+//
+// For example, if underlying random source was seeded like
+// `rand.NewSource(42)`, we expect api.Memory to contain:
+//
+//	                   bufLen (5)
+//	          +--------------------------+
+//	          |                        	 |
+//	[]byte{?, 0x53, 0x8c, 0x7f, 0x96, 0xb1, ?}
+//	    buf --^
+//
+// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-random_getbuf-pointeru8-bufLen-size---errno
+var randomGet = newHostFunc(wasip1.RandomGetName, randomGetFn, []api.ValueType{i32, i32}, "buf", "buf_len")
+
+func randomGetFn(_ context.Context, mod api.Module, params []uint64) sys.Errno {
+	sysCtx := mod.(*wasm.ModuleInstance).Sys
+	randSource := sysCtx.RandSource()
+	buf, bufLen := uint32(params[0]), uint32(params[1])
+
+	randomBytes, ok := mod.Memory().Read(buf, bufLen)
+	if !ok { // out-of-range
+		return sys.EFAULT
+	}
+
+	// We can ignore the returned n as it only != byteCount on error
+	if _, err := io.ReadAtLeast(randSource, randomBytes, int(bufLen)); err != nil {
+		return sys.EIO
+	}
+
+	return 0
+}
@@ -0,0 +1,22 @@
+package wasi_snapshot_preview1
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/wasip1"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// schedYield is the WASI function named SchedYieldName which temporarily
+// yields execution of the calling thread.
+//
+// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-sched_yield---errno
+var schedYield = newHostFunc(wasip1.SchedYieldName, schedYieldFn, nil)
+
+func schedYieldFn(_ context.Context, mod api.Module, _ []uint64) sys.Errno {
+	sysCtx := mod.(*wasm.ModuleInstance).Sys
+	sysCtx.Osyield()
+	return 0
+}
@@ -0,0 +1,188 @@
+package wasi_snapshot_preview1
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental/sys"
+	socketapi "github.com/tetratelabs/wazero/internal/sock"
+	"github.com/tetratelabs/wazero/internal/sysfs"
+	"github.com/tetratelabs/wazero/internal/wasip1"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// sockAccept is the WASI function named SockAcceptName which accepts a new
+// incoming connection.
+//
+// See: https://github.com/WebAssembly/WASI/blob/0ba0c5e2e37625ca5a6d3e4255a998dfaa3efc52/phases/snapshot/docs.md#sock_accept
+// and https://github.com/WebAssembly/WASI/pull/458
+var sockAccept = newHostFunc(
+	wasip1.SockAcceptName,
+	sockAcceptFn,
+	[]wasm.ValueType{i32, i32, i32},
+	"fd", "flags", "result.fd",
+)
+
+func sockAcceptFn(_ context.Context, mod api.Module, params []uint64) (errno sys.Errno) {
+	mem := mod.Memory()
+	fsc := mod.(*wasm.ModuleInstance).Sys.FS()
+
+	fd := int32(params[0])
+	flags := uint32(params[1])
+	resultFd := uint32(params[2])
+	nonblock := flags&uint32(wasip1.FD_NONBLOCK) != 0
+
+	var connFD int32
+	if connFD, errno = fsc.SockAccept(fd, nonblock); errno == 0 {
+		mem.WriteUint32Le(resultFd, uint32(connFD))
+	}
+	return
+}
+
+// sockRecv is the WASI function named SockRecvName which receives a
+// message from a socket.
+//
+// See: https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-sock_recvfd-fd-ri_data-iovec_array-ri_flags-riflags---errno-size-roflags
+var sockRecv = newHostFunc(
+	wasip1.SockRecvName,
+	sockRecvFn,
+	[]wasm.ValueType{i32, i32, i32, i32, i32, i32},
+	"fd", "ri_data", "ri_data_len", "ri_flags", "result.ro_datalen", "result.ro_flags",
+)
+
+func sockRecvFn(_ context.Context, mod api.Module, params []uint64) sys.Errno {
+	mem := mod.Memory()
+	fsc := mod.(*wasm.ModuleInstance).Sys.FS()
+
+	fd := int32(params[0])
+	riData := uint32(params[1])
+	riDataCount := uint32(params[2])
+	riFlags := uint8(params[3])
+	resultRoDatalen := uint32(params[4])
+	resultRoFlags := uint32(params[5])
+
+	var conn socketapi.TCPConn
+	if e, ok := fsc.LookupFile(fd); !ok {
+		return sys.EBADF // Not open
+	} else if conn, ok = e.File.(socketapi.TCPConn); !ok {
+		return sys.EBADF // Not a conn
+	}
+
+	if riFlags & ^(wasip1.RI_RECV_PEEK|wasip1.RI_RECV_WAITALL) != 0 {
+		return sys.ENOTSUP
+	}
+
+	if riFlags&wasip1.RI_RECV_PEEK != 0 {
+		// Each record in riData is of the form:
+		// type iovec struct { buf *uint8; bufLen uint32 }
+		// This means that the first `uint32` is a `buf *uint8`.
+		firstIovecBufAddr, ok := mem.ReadUint32Le(riData)
+		if !ok {
+			return sys.EINVAL
+		}
+		// Read bufLen
+		firstIovecBufLen, ok := mem.ReadUint32Le(riData + 4)
+		if !ok {
+			return sys.EINVAL
+		}
+		firstIovecBuf, ok := mem.Read(firstIovecBufAddr, firstIovecBufLen)
+		if !ok {
+			return sys.EINVAL
+		}
+		n, err := conn.Recvfrom(firstIovecBuf, sysfs.MSG_PEEK)
+		if err != 0 {
+			return err
+		}
+		mem.WriteUint32Le(resultRoDatalen, uint32(n))
+		mem.WriteUint16Le(resultRoFlags, 0)
+		return 0
+	}
+
+	// If riFlags&wasip1.RECV_WAITALL != 0 then we should
+	// do a blocking operation until all data has been retrieved;
+	// otherwise we are able to return earlier.
+	// For simplicity, we currently wait all regardless the flag.
+	bufSize, errno := readv(mem, riData, riDataCount, conn.Read)
+	if errno != 0 {
+		return errno
+	}
+	mem.WriteUint32Le(resultRoDatalen, bufSize)
+	mem.WriteUint16Le(resultRoFlags, 0)
+	return 0
+}
+
+// sockSend is the WASI function named SockSendName which sends a message
+// on a socket.
+//
+// See: https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-sock_sendfd-fd-si_data-ciovec_array-si_flags-siflags---errno-size
+var sockSend = newHostFunc(
+	wasip1.SockSendName,
+	sockSendFn,
+	[]wasm.ValueType{i32, i32, i32, i32, i32},
+	"fd", "si_data", "si_data_len", "si_flags", "result.so_datalen",
+)
+
+func sockSendFn(_ context.Context, mod api.Module, params []uint64) sys.Errno {
+	mem := mod.Memory()
+	fsc := mod.(*wasm.ModuleInstance).Sys.FS()
+
+	fd := int32(params[0])
+	siData := uint32(params[1])
+	siDataCount := uint32(params[2])
+	siFlags := uint32(params[3])
+	resultSoDatalen := uint32(params[4])
+
+	if siFlags != 0 {
+		return sys.ENOTSUP
+	}
+
+	var conn socketapi.TCPConn
+	if e, ok := fsc.LookupFile(fd); !ok {
+		return sys.EBADF // Not open
+	} else if conn, ok = e.File.(socketapi.TCPConn); !ok {
+		return sys.EBADF // Not a conn
+	}
+
+	bufSize, errno := writev(mem, siData, siDataCount, conn.Write)
+	if errno != 0 {
+		return errno
+	}
+	mem.WriteUint32Le(resultSoDatalen, bufSize)
+	return 0
+}
+
+// sockShutdown is the WASI function named SockShutdownName which shuts
+// down socket send and receive channels.
+//
+// See: https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-sock_shutdownfd-fd-how-sdflags---errno
+var sockShutdown = newHostFunc(wasip1.SockShutdownName, sockShutdownFn, []wasm.ValueType{i32, i32}, "fd", "how")
+
+func sockShutdownFn(_ context.Context, mod api.Module, params []uint64) sys.Errno {
+	fsc := mod.(*wasm.ModuleInstance).Sys.FS()
+
+	fd := int32(params[0])
+	how := uint8(params[1])
+
+	var conn socketapi.TCPConn
+	if e, ok := fsc.LookupFile(fd); !ok {
+		return sys.EBADF // Not open
+	} else if conn, ok = e.File.(socketapi.TCPConn); !ok {
+		return sys.EBADF // Not a conn
+	}
+
+	sysHow := 0
+
+	switch how {
+	case wasip1.SD_RD | wasip1.SD_WR:
+		sysHow = socketapi.SHUT_RD | socketapi.SHUT_WR
+	case wasip1.SD_RD:
+		sysHow = socketapi.SHUT_RD
+	case wasip1.SD_WR:
+		sysHow = socketapi.SHUT_WR
+	default:
+		return sys.EINVAL
+	}
+
+	// TODO: Map this instead of relying on syscall symbols.
+	return conn.Shutdown(sysHow)
+}
@@ -0,0 +1,314 @@
+// Package wasi_snapshot_preview1 contains Go-defined functions to access
+// system calls, such as opening a file, similar to Go's x/sys package. These
+// are accessible from WebAssembly-defined functions via importing ModuleName.
+// All WASI functions return a single Errno result: ErrnoSuccess on success.
+//
+// e.g. Call Instantiate before instantiating any wasm binary that imports
+// "wasi_snapshot_preview1", Otherwise, it will error due to missing imports.
+//
+//	ctx := context.Background()
+//	r := wazero.NewRuntime(ctx)
+//	defer r.Close(ctx) // This closes everything this Runtime created.
+//
+//	wasi_snapshot_preview1.MustInstantiate(ctx, r)
+//	mod, _ := r.Instantiate(ctx, wasm)
+//
+// See https://github.com/WebAssembly/WASI
+package wasi_snapshot_preview1
+
+import (
+	"context"
+	"encoding/binary"
+
+	"github.com/tetratelabs/wazero"
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/wasip1"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// ModuleName is the module name WASI functions are exported into.
+//
+// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md
+const ModuleName = wasip1.InternalModuleName
+
+const i32, i64 = wasm.ValueTypeI32, wasm.ValueTypeI64
+
+var le = binary.LittleEndian
+
+// MustInstantiate calls Instantiate or panics on error.
+//
+// This is a simpler function for those who know the module ModuleName is not
+// already instantiated, and don't need to unload it.
+func MustInstantiate(ctx context.Context, r wazero.Runtime) {
+	if _, err := Instantiate(ctx, r); err != nil {
+		panic(err)
+	}
+}
+
+// Instantiate instantiates the ModuleName module into the runtime.
+//
+// # Notes
+//
+//   - Failure cases are documented on wazero.Runtime InstantiateModule.
+//   - Closing the wazero.Runtime has the same effect as closing the result.
+func Instantiate(ctx context.Context, r wazero.Runtime) (api.Closer, error) {
+	return NewBuilder(r).Instantiate(ctx)
+}
+
+// Builder configures the ModuleName module for later use via Compile or Instantiate.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type Builder interface {
+	// Compile compiles the ModuleName module. Call this before Instantiate.
+	//
+	// Note: This has the same effect as the same function on wazero.HostModuleBuilder.
+	Compile(context.Context) (wazero.CompiledModule, error)
+
+	// Instantiate instantiates the ModuleName module and returns a function to close it.
+	//
+	// Note: This has the same effect as the same function on wazero.HostModuleBuilder.
+	Instantiate(context.Context) (api.Closer, error)
+}
+
+// NewBuilder returns a new Builder.
+func NewBuilder(r wazero.Runtime) Builder {
+	return &builder{r}
+}
+
+type builder struct{ r wazero.Runtime }
+
+// hostModuleBuilder returns a new wazero.HostModuleBuilder for ModuleName
+func (b *builder) hostModuleBuilder() wazero.HostModuleBuilder {
+	ret := b.r.NewHostModuleBuilder(ModuleName)
+	exportFunctions(ret)
+	return ret
+}
+
+// Compile implements Builder.Compile
+func (b *builder) Compile(ctx context.Context) (wazero.CompiledModule, error) {
+	return b.hostModuleBuilder().Compile(ctx)
+}
+
+// Instantiate implements Builder.Instantiate
+func (b *builder) Instantiate(ctx context.Context) (api.Closer, error) {
+	return b.hostModuleBuilder().Instantiate(ctx)
+}
+
+// FunctionExporter exports functions into a wazero.HostModuleBuilder.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type FunctionExporter interface {
+	ExportFunctions(wazero.HostModuleBuilder)
+}
+
+// NewFunctionExporter returns a new FunctionExporter. This is used for the
+// following two use cases:
+//   - Overriding a builtin function with an alternate implementation.
+//   - Exporting functions to the module "wasi_unstable" for legacy code.
+//
+// # Example of overriding default behavior
+//
+//	// Export the default WASI functions.
+//	wasiBuilder := r.NewHostModuleBuilder(ModuleName)
+//	wasi_snapshot_preview1.NewFunctionExporter().ExportFunctions(wasiBuilder)
+//
+//	// Subsequent calls to NewFunctionBuilder override built-in exports.
+//	wasiBuilder.NewFunctionBuilder().
+//		WithFunc(func(ctx context.Context, mod api.Module, exitCode uint32) {
+//		// your custom logic
+//		}).Export("proc_exit")
+//
+// # Example of using the old module name for WASI
+//
+//	// Instantiate the current WASI functions under the wasi_unstable
+//	// instead of wasi_snapshot_preview1.
+//	wasiBuilder := r.NewHostModuleBuilder("wasi_unstable")
+//	wasi_snapshot_preview1.NewFunctionExporter().ExportFunctions(wasiBuilder)
+//	_, err := wasiBuilder.Instantiate(testCtx, r)
+func NewFunctionExporter() FunctionExporter {
+	return &functionExporter{}
+}
+
+type functionExporter struct{}
+
+// ExportFunctions implements FunctionExporter.ExportFunctions
+func (functionExporter) ExportFunctions(builder wazero.HostModuleBuilder) {
+	exportFunctions(builder)
+}
+
+// ## Translation notes
+// ### String
+// WebAssembly 1.0 has no string type, so any string input parameter expands to two uint32 parameters: offset
+// and length.
+//
+// ### iovec_array
+// `iovec_array` is encoded as two uin32le values (i32): offset and count.
+//
+// ### Result
+// Each result besides Errno is always an uint32 parameter. WebAssembly 1.0 can have up to one result,
+// which is already used by Errno. This forces other results to be parameters. A result parameter is a memory
+// offset to write the result to. As memory offsets are uint32, each parameter representing a result is uint32.
+//
+// ### Errno
+// The WASI specification is sometimes ambiguous resulting in some runtimes interpreting the same function ways.
+// Errno mappings are not defined in WASI, yet, so these mappings are best efforts by maintainers. When in doubt
+// about portability, first look at /RATIONALE.md and if needed an issue on
+// https://github.com/WebAssembly/WASI/issues
+//
+// ## Memory
+// In WebAssembly 1.0 (20191205), there may be up to one Memory per store, which means api.Memory is always the
+// wasm.Store Memories index zero: `store.Memories[0].Buffer`
+//
+// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md
+// See https://github.com/WebAssembly/WASI/issues/215
+// See https://wwa.w3.org/TR/2019/REC-wasm-core-1-20191205/#memory-instances%E2%91%A0.
+
+// exportFunctions adds all go functions that implement wasi.
+// These should be exported in the module named ModuleName.
+func exportFunctions(builder wazero.HostModuleBuilder) {
+	exporter := builder.(wasm.HostFuncExporter)
+
+	// Note: these are ordered per spec for consistency even if the resulting
+	// map can't guarantee that.
+	// See https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#functions
+	exporter.ExportHostFunc(argsGet)
+	exporter.ExportHostFunc(argsSizesGet)
+	exporter.ExportHostFunc(environGet)
+	exporter.ExportHostFunc(environSizesGet)
+	exporter.ExportHostFunc(clockResGet)
+	exporter.ExportHostFunc(clockTimeGet)
+	exporter.ExportHostFunc(fdAdvise)
+	exporter.ExportHostFunc(fdAllocate)
+	exporter.ExportHostFunc(fdClose)
+	exporter.ExportHostFunc(fdDatasync)
+	exporter.ExportHostFunc(fdFdstatGet)
+	exporter.ExportHostFunc(fdFdstatSetFlags)
+	exporter.ExportHostFunc(fdFdstatSetRights)
+	exporter.ExportHostFunc(fdFilestatGet)
+	exporter.ExportHostFunc(fdFilestatSetSize)
+	exporter.ExportHostFunc(fdFilestatSetTimes)
+	exporter.ExportHostFunc(fdPread)
+	exporter.ExportHostFunc(fdPrestatGet)
+	exporter.ExportHostFunc(fdPrestatDirName)
+	exporter.ExportHostFunc(fdPwrite)
+	exporter.ExportHostFunc(fdRead)
+	exporter.ExportHostFunc(fdReaddir)
+	exporter.ExportHostFunc(fdRenumber)
+	exporter.ExportHostFunc(fdSeek)
+	exporter.ExportHostFunc(fdSync)
+	exporter.ExportHostFunc(fdTell)
+	exporter.ExportHostFunc(fdWrite)
+	exporter.ExportHostFunc(pathCreateDirectory)
+	exporter.ExportHostFunc(pathFilestatGet)
+	exporter.ExportHostFunc(pathFilestatSetTimes)
+	exporter.ExportHostFunc(pathLink)
+	exporter.ExportHostFunc(pathOpen)
+	exporter.ExportHostFunc(pathReadlink)
+	exporter.ExportHostFunc(pathRemoveDirectory)
+	exporter.ExportHostFunc(pathRename)
+	exporter.ExportHostFunc(pathSymlink)
+	exporter.ExportHostFunc(pathUnlinkFile)
+	exporter.ExportHostFunc(pollOneoff)
+	exporter.ExportHostFunc(procExit)
+	exporter.ExportHostFunc(procRaise)
+	exporter.ExportHostFunc(schedYield)
+	exporter.ExportHostFunc(randomGet)
+	exporter.ExportHostFunc(sockAccept)
+	exporter.ExportHostFunc(sockRecv)
+	exporter.ExportHostFunc(sockSend)
+	exporter.ExportHostFunc(sockShutdown)
+}
+
+// writeOffsetsAndNullTerminatedValues is used to write NUL-terminated values
+// for args or environ, given a pre-defined bytesLen (which includes NUL
+// terminators).
+func writeOffsetsAndNullTerminatedValues(mem api.Memory, values [][]byte, offsets, bytes, bytesLen uint32) sys.Errno {
+	// The caller may not place bytes directly after offsets, so we have to
+	// read them independently.
+	valuesLen := len(values)
+	offsetsLen := uint32(valuesLen * 4) // uint32Le
+	offsetsBuf, ok := mem.Read(offsets, offsetsLen)
+	if !ok {
+		return sys.EFAULT
+	}
+	bytesBuf, ok := mem.Read(bytes, bytesLen)
+	if !ok {
+		return sys.EFAULT
+	}
+
+	// Loop through the values, first writing the location of its data to
+	// offsetsBuf[oI], then its NUL-terminated data at bytesBuf[bI]
+	var oI, bI uint32
+	for _, value := range values {
+		// Go can't guarantee inlining as there's not //go:inline directive.
+		// This inlines uint32 little-endian encoding instead.
+		bytesOffset := bytes + bI
+		offsetsBuf[oI] = byte(bytesOffset)
+		offsetsBuf[oI+1] = byte(bytesOffset >> 8)
+		offsetsBuf[oI+2] = byte(bytesOffset >> 16)
+		offsetsBuf[oI+3] = byte(bytesOffset >> 24)
+		oI += 4 // size of uint32 we just wrote
+
+		// Write the next value to memory with a NUL terminator
+		copy(bytesBuf[bI:], value)
+		bI += uint32(len(value))
+		bytesBuf[bI] = 0 // NUL terminator
+		bI++
+	}
+
+	return 0
+}
+
+func newHostFunc(
+	name string,
+	goFunc wasiFunc,
+	paramTypes []api.ValueType,
+	paramNames ...string,
+) *wasm.HostFunc {
+	return &wasm.HostFunc{
+		ExportName:  name,
+		Name:        name,
+		ParamTypes:  paramTypes,
+		ParamNames:  paramNames,
+		ResultTypes: []api.ValueType{i32},
+		ResultNames: []string{"errno"},
+		Code:        wasm.Code{GoFunc: goFunc},
+	}
+}
+
+// wasiFunc special cases that all WASI functions return a single Errno
+// result. The returned value will be written back to the stack at index zero.
+type wasiFunc func(ctx context.Context, mod api.Module, params []uint64) sys.Errno
+
+// Call implements the same method as documented on api.GoModuleFunction.
+func (f wasiFunc) Call(ctx context.Context, mod api.Module, stack []uint64) {
+	// Write the result back onto the stack
+	errno := f(ctx, mod, stack)
+	if errno != 0 {
+		stack[0] = uint64(wasip1.ToErrno(errno))
+	} else { // special case ass ErrnoSuccess is zero
+		stack[0] = 0
+	}
+}
+
+// stubFunction stubs for GrainLang per #271.
+func stubFunction(name string, paramTypes []wasm.ValueType, paramNames ...string) *wasm.HostFunc {
+	return &wasm.HostFunc{
+		ExportName:  name,
+		Name:        name,
+		ParamTypes:  paramTypes,
+		ParamNames:  paramNames,
+		ResultTypes: []api.ValueType{i32},
+		ResultNames: []string{"errno"},
+		Code: wasm.Code{
+			GoFunc: api.GoModuleFunc(func(_ context.Context, _ api.Module, stack []uint64) { stack[0] = uint64(wasip1.ErrnoNosys) }),
+		},
+	}
+}
@@ -0,0 +1,149 @@
+package descriptor
+
+import (
+	"math/bits"
+	"slices"
+)
+
+// Table is a data structure mapping 32 bit descriptor to items.
+//
+// # Negative keys are invalid.
+//
+// Negative keys (e.g. -1) are invalid inputs and will return a corresponding
+// not-found value. This matches POSIX behavior of file descriptors.
+// See https://pubs.opengroup.org/onlinepubs/9699919799/functions/dirfd.html#tag_16_90
+//
+// # Data structure design
+//
+// The data structure optimizes for memory density and lookup performance,
+// trading off compute at insertion time. This is a useful compromise for the
+// use cases we employ it with: items are usually accessed a lot more often
+// than they are inserted, each operation requires a table lookup, so we are
+// better off spending extra compute to insert items in the table in order to
+// get cheaper lookups. Memory efficiency is also crucial to support scaling
+// with programs that maintain thousands of items: having a high or non-linear
+// memory-to-item ratio could otherwise be used as an attack vector by
+// malicious applications attempting to damage performance of the host.
+type Table[Key ~int32, Item any] struct {
+	masks []uint64
+	items []Item
+}
+
+// Len returns the number of items stored in the table.
+func (t *Table[Key, Item]) Len() (n int) {
+	// We could make this a O(1) operation if we cached the number of items in
+	// the table. More state usually means more problems, so until we have a
+	// clear need for this, the simple implementation may be a better trade off.
+	for _, mask := range t.masks {
+		n += bits.OnesCount64(mask)
+	}
+	return n
+}
+
+// grow grows the table by n * 64 items.
+func (t *Table[Key, Item]) grow(n int) {
+	total := len(t.masks) + n
+	t.masks = slices.Grow(t.masks, n)[:total]
+
+	total = len(t.items) + n*64
+	t.items = slices.Grow(t.items, n*64)[:total]
+}
+
+// Insert inserts the given item to the table, returning the key that it is
+// mapped to or false if the table was full.
+//
+// The method does not perform deduplication, it is possible for the same item
+// to be inserted multiple times, each insertion will return a different key.
+func (t *Table[Key, Item]) Insert(item Item) (key Key, ok bool) {
+	offset := 0
+insert:
+	// Note: this loop could be made a lot more efficient using vectorized
+	// operations: 256 bits vector registers would yield a theoretical 4x
+	// speed up (e.g. using AVX2).
+	for index, mask := range t.masks[offset:] {
+		if ^mask != 0 { // not full?
+			shift := bits.TrailingZeros64(^mask)
+			index += offset
+			key = Key(index)*64 + Key(shift)
+			t.items[key] = item
+			t.masks[index] = mask | uint64(1<<shift)
+			return key, key >= 0
+		}
+	}
+
+	// No free slot found, grow the table and retry.
+	offset = len(t.masks)
+	t.grow(1)
+	goto insert
+}
+
+// Lookup returns the item associated with the given key (may be nil).
+func (t *Table[Key, Item]) Lookup(key Key) (item Item, found bool) {
+	if key < 0 { // invalid key
+		return
+	}
+	if i := int(key); i >= 0 && i < len(t.items) {
+		index := uint(key) / 64
+		shift := uint(key) % 64
+		if (t.masks[index] & (1 << shift)) != 0 {
+			item, found = t.items[i], true
+		}
+	}
+	return
+}
+
+// InsertAt inserts the given `item` at the item descriptor `key`. This returns
+// false if the insert was impossible due to negative key.
+func (t *Table[Key, Item]) InsertAt(item Item, key Key) bool {
+	if key < 0 {
+		return false
+	}
+	index := uint(key) / 64
+	if diff := int(index) - len(t.masks) + 1; diff > 0 {
+		t.grow(diff)
+	}
+	shift := uint(key) % 64
+	t.masks[index] |= 1 << shift
+	t.items[key] = item
+	return true
+}
+
+// Delete deletes the item stored at the given key from the table.
+func (t *Table[Key, Item]) Delete(key Key) {
+	if key < 0 { // invalid key
+		return
+	}
+	if index := uint(key) / 64; int(index) < len(t.masks) {
+		shift := uint(key) % 64
+		mask := t.masks[index]
+		if (mask & (1 << shift)) != 0 {
+			var zero Item
+			t.items[key] = zero
+			t.masks[index] = mask & ^uint64(1<<shift)
+		}
+	}
+}
+
+// Range calls f for each item and its associated key in the table. The function
+// f might return false to interupt the iteration.
+func (t *Table[Key, Item]) Range(f func(Key, Item) bool) {
+	for i, mask := range t.masks {
+		if mask == 0 {
+			continue
+		}
+		for j := Key(0); j < 64; j++ {
+			if (mask & (1 << j)) == 0 {
+				continue
+			}
+			if key := Key(i)*64 + j; !f(key, t.items[key]) {
+				return
+			}
+		}
+	}
+}
+
+// Reset clears the content of the table.
+func (t *Table[Key, Item]) Reset() {
+	clear(t.masks)
+	clear(t.items)
+}
@@ -0,0 +1,22 @@
+package interpreter
+
+import (
+	"bytes"
+)
+
+func format(ops []unionOperation) string {
+	buf := bytes.NewBuffer(nil)
+
+	_, _ = buf.WriteString(".entrypoint\n")
+	for i := range ops {
+		op := &ops[i]
+		str := op.String()
+		isLabel := op.Kind == operationKindLabel
+		if !isLabel {
+			const indent = "\t"
+			str = indent + str
+		}
+		_, _ = buf.WriteString(str + "\n")
+	}
+	return buf.String()
+}
@@ -0,0 +1,767 @@
+package interpreter
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// signature represents how a Wasm opcode
+// manipulates the value stacks in terms of value types.
+type signature struct {
+	in, out []unsignedType
+}
+
+var (
+	signature_None_None    = &signature{}
+	signature_Unknown_None = &signature{
+		in: []unsignedType{unsignedTypeUnknown},
+	}
+	signature_None_I32 = &signature{
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_None_I64 = &signature{
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_None_V128 = &signature{
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_None_F32 = &signature{
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_None_F64 = &signature{
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64_None = &signature{
+		in: []unsignedType{unsignedTypeI64},
+	}
+	signature_F32_None = &signature{
+		in: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64_None = &signature{
+		in: []unsignedType{unsignedTypeF64},
+	}
+	signature_V128_None = &signature{
+		in: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_I32_F32 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_I32_F64 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64_F32 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_I64_F64 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_F32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F32_I64 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_F32_F64 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_F32_F32 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F64_F32 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_F64_F64 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I32I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI32},
+	}
+
+	signature_I32I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I64_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI64},
+	}
+	signature_I32F32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeF32},
+	}
+	signature_I32F64_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeF64},
+	}
+	signature_I64I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI64, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_F32F32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF32, unsignedTypeF32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F32F32_F32 = &signature{
+		in:  []unsignedType{unsignedTypeF32, unsignedTypeF32},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64F64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF64, unsignedTypeF64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F64F64_F64 = &signature{
+		in:  []unsignedType{unsignedTypeF64, unsignedTypeF64},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I32I32I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI32},
+	}
+	signature_I32I64I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI32},
+	}
+	signature_UnknownUnknownI32_Unknown = &signature{
+		in:  []unsignedType{unsignedTypeUnknown, unsignedTypeUnknown, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeUnknown},
+	}
+	signature_V128V128_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128V128V128_V32 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeV128, unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32V128_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeV128},
+	}
+	signature_I32V128_V128 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128I32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128I64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128F32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeF32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128F64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeF64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128_I32 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_V128_I64 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_V128_F32 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_V128_F64 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_V128_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_F32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_F64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_I32I32I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I64I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I32I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I64I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+)
+
+// wasmOpcodeSignature returns the signature of given Wasm opcode.
+// Note that some of opcodes' signature vary depending on
+// the function instance (for example, local types).
+// "index" parameter is not used by most of opcodes.
+// The returned signature is used for stack validation when lowering Wasm's opcodes to interpreterir.
+func (c *compiler) wasmOpcodeSignature(op wasm.Opcode, index uint32) (*signature, error) {
+	switch op {
+	case wasm.OpcodeUnreachable, wasm.OpcodeNop, wasm.OpcodeBlock, wasm.OpcodeLoop:
+		return signature_None_None, nil
+	case wasm.OpcodeIf:
+		return signature_I32_None, nil
+	case wasm.OpcodeElse, wasm.OpcodeEnd, wasm.OpcodeBr:
+		return signature_None_None, nil
+	case wasm.OpcodeBrIf, wasm.OpcodeBrTable:
+		return signature_I32_None, nil
+	case wasm.OpcodeReturn:
+		return signature_None_None, nil
+	case wasm.OpcodeCall, wasm.OpcodeTailCallReturnCall:
+		return c.funcTypeToSigs.get(c.funcs[index], false /* direct */), nil
+	case wasm.OpcodeCallIndirect, wasm.OpcodeTailCallReturnCallIndirect:
+		return c.funcTypeToSigs.get(index, true /* call_indirect */), nil
+	case wasm.OpcodeDrop:
+		return signature_Unknown_None, nil
+	case wasm.OpcodeSelect, wasm.OpcodeTypedSelect:
+		return signature_UnknownUnknownI32_Unknown, nil
+	case wasm.OpcodeLocalGet:
+		inputLen := uint32(len(c.sig.Params))
+		if l := uint32(len(c.localTypes)) + inputLen; index >= l {
+			return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
+		}
+		var t wasm.ValueType
+		if index < inputLen {
+			t = c.sig.Params[index]
+		} else {
+			t = c.localTypes[index-inputLen]
+		}
+		return wasmValueTypeToUnsignedOutSignature(t), nil
+	case wasm.OpcodeLocalSet:
+		inputLen := uint32(len(c.sig.Params))
+		if l := uint32(len(c.localTypes)) + inputLen; index >= l {
+			return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
+		}
+		var t wasm.ValueType
+		if index < inputLen {
+			t = c.sig.Params[index]
+		} else {
+			t = c.localTypes[index-inputLen]
+		}
+		return wasmValueTypeToUnsignedInSignature(t), nil
+	case wasm.OpcodeLocalTee:
+		inputLen := uint32(len(c.sig.Params))
+		if l := uint32(len(c.localTypes)) + inputLen; index >= l {
+			return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
+		}
+		var t wasm.ValueType
+		if index < inputLen {
+			t = c.sig.Params[index]
+		} else {
+			t = c.localTypes[index-inputLen]
+		}
+		return wasmValueTypeToUnsignedInOutSignature(t), nil
+	case wasm.OpcodeGlobalGet:
+		if len(c.globals) <= int(index) {
+			return nil, fmt.Errorf("invalid global index for global.get %d >= %d", index, len(c.globals))
+		}
+		return wasmValueTypeToUnsignedOutSignature(c.globals[index].ValType), nil
+	case wasm.OpcodeGlobalSet:
+		if len(c.globals) <= int(index) {
+			return nil, fmt.Errorf("invalid global index for global.get %d >= %d", index, len(c.globals))
+		}
+		return wasmValueTypeToUnsignedInSignature(c.globals[index].ValType), nil
+	case wasm.OpcodeI32Load:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI64Load:
+		return signature_I32_I64, nil
+	case wasm.OpcodeF32Load:
+		return signature_I32_F32, nil
+	case wasm.OpcodeF64Load:
+		return signature_I32_F64, nil
+	case wasm.OpcodeI32Load8S, wasm.OpcodeI32Load8U, wasm.OpcodeI32Load16S, wasm.OpcodeI32Load16U:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI64Load8S, wasm.OpcodeI64Load8U, wasm.OpcodeI64Load16S, wasm.OpcodeI64Load16U,
+		wasm.OpcodeI64Load32S, wasm.OpcodeI64Load32U:
+		return signature_I32_I64, nil
+	case wasm.OpcodeI32Store:
+		return signature_I32I32_None, nil
+	case wasm.OpcodeI64Store:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeF32Store:
+		return signature_I32F32_None, nil
+	case wasm.OpcodeF64Store:
+		return signature_I32F64_None, nil
+	case wasm.OpcodeI32Store8:
+		return signature_I32I32_None, nil
+	case wasm.OpcodeI32Store16:
+		return signature_I32I32_None, nil
+	case wasm.OpcodeI64Store8:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeI64Store16:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeI64Store32:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeMemorySize:
+		return signature_None_I32, nil
+	case wasm.OpcodeMemoryGrow:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI32Const:
+		return signature_None_I32, nil
+	case wasm.OpcodeI64Const:
+		return signature_None_I64, nil
+	case wasm.OpcodeF32Const:
+		return signature_None_F32, nil
+	case wasm.OpcodeF64Const:
+		return signature_None_F64, nil
+	case wasm.OpcodeI32Eqz:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI32Eq, wasm.OpcodeI32Ne, wasm.OpcodeI32LtS,
+		wasm.OpcodeI32LtU, wasm.OpcodeI32GtS, wasm.OpcodeI32GtU,
+		wasm.OpcodeI32LeS, wasm.OpcodeI32LeU, wasm.OpcodeI32GeS,
+		wasm.OpcodeI32GeU:
+		return signature_I32I32_I32, nil
+	case wasm.OpcodeI64Eqz:
+		return signature_I64_I32, nil
+	case wasm.OpcodeI64Eq, wasm.OpcodeI64Ne, wasm.OpcodeI64LtS,
+		wasm.OpcodeI64LtU, wasm.OpcodeI64GtS, wasm.OpcodeI64GtU,
+		wasm.OpcodeI64LeS, wasm.OpcodeI64LeU, wasm.OpcodeI64GeS,
+		wasm.OpcodeI64GeU:
+		return signature_I64I64_I32, nil
+	case wasm.OpcodeF32Eq, wasm.OpcodeF32Ne, wasm.OpcodeF32Lt,
+		wasm.OpcodeF32Gt, wasm.OpcodeF32Le, wasm.OpcodeF32Ge:
+		return signature_F32F32_I32, nil
+	case wasm.OpcodeF64Eq, wasm.OpcodeF64Ne, wasm.OpcodeF64Lt,
+		wasm.OpcodeF64Gt, wasm.OpcodeF64Le, wasm.OpcodeF64Ge:
+		return signature_F64F64_I32, nil
+	case wasm.OpcodeI32Clz, wasm.OpcodeI32Ctz, wasm.OpcodeI32Popcnt:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI32Add, wasm.OpcodeI32Sub, wasm.OpcodeI32Mul,
+		wasm.OpcodeI32DivS, wasm.OpcodeI32DivU, wasm.OpcodeI32RemS,
+		wasm.OpcodeI32RemU, wasm.OpcodeI32And, wasm.OpcodeI32Or,
+		wasm.OpcodeI32Xor, wasm.OpcodeI32Shl, wasm.OpcodeI32ShrS,
+		wasm.OpcodeI32ShrU, wasm.OpcodeI32Rotl, wasm.OpcodeI32Rotr:
+		return signature_I32I32_I32, nil
+	case wasm.OpcodeI64Clz, wasm.OpcodeI64Ctz, wasm.OpcodeI64Popcnt:
+		return signature_I64_I64, nil
+	case wasm.OpcodeI64Add, wasm.OpcodeI64Sub, wasm.OpcodeI64Mul,
+		wasm.OpcodeI64DivS, wasm.OpcodeI64DivU, wasm.OpcodeI64RemS,
+		wasm.OpcodeI64RemU, wasm.OpcodeI64And, wasm.OpcodeI64Or,
+		wasm.OpcodeI64Xor, wasm.OpcodeI64Shl, wasm.OpcodeI64ShrS,
+		wasm.OpcodeI64ShrU, wasm.OpcodeI64Rotl, wasm.OpcodeI64Rotr:
+		return signature_I64I64_I64, nil
+	case wasm.OpcodeF32Abs, wasm.OpcodeF32Neg, wasm.OpcodeF32Ceil,
+		wasm.OpcodeF32Floor, wasm.OpcodeF32Trunc, wasm.OpcodeF32Nearest,
+		wasm.OpcodeF32Sqrt:
+		return signature_F32_F32, nil
+	case wasm.OpcodeF32Add, wasm.OpcodeF32Sub, wasm.OpcodeF32Mul,
+		wasm.OpcodeF32Div, wasm.OpcodeF32Min, wasm.OpcodeF32Max,
+		wasm.OpcodeF32Copysign:
+		return signature_F32F32_F32, nil
+	case wasm.OpcodeF64Abs, wasm.OpcodeF64Neg, wasm.OpcodeF64Ceil,
+		wasm.OpcodeF64Floor, wasm.OpcodeF64Trunc, wasm.OpcodeF64Nearest,
+		wasm.OpcodeF64Sqrt:
+		return signature_F64_F64, nil
+	case wasm.OpcodeF64Add, wasm.OpcodeF64Sub, wasm.OpcodeF64Mul,
+		wasm.OpcodeF64Div, wasm.OpcodeF64Min, wasm.OpcodeF64Max,
+		wasm.OpcodeF64Copysign:
+		return signature_F64F64_F64, nil
+	case wasm.OpcodeI32WrapI64:
+		return signature_I64_I32, nil
+	case wasm.OpcodeI32TruncF32S, wasm.OpcodeI32TruncF32U:
+		return signature_F32_I32, nil
+	case wasm.OpcodeI32TruncF64S, wasm.OpcodeI32TruncF64U:
+		return signature_F64_I32, nil
+	case wasm.OpcodeI64ExtendI32S, wasm.OpcodeI64ExtendI32U:
+		return signature_I32_I64, nil
+	case wasm.OpcodeI64TruncF32S, wasm.OpcodeI64TruncF32U:
+		return signature_F32_I64, nil
+	case wasm.OpcodeI64TruncF64S, wasm.OpcodeI64TruncF64U:
+		return signature_F64_I64, nil
+	case wasm.OpcodeF32ConvertI32S, wasm.OpcodeF32ConvertI32U:
+		return signature_I32_F32, nil
+	case wasm.OpcodeF32ConvertI64S, wasm.OpcodeF32ConvertI64U:
+		return signature_I64_F32, nil
+	case wasm.OpcodeF32DemoteF64:
+		return signature_F64_F32, nil
+	case wasm.OpcodeF64ConvertI32S, wasm.OpcodeF64ConvertI32U:
+		return signature_I32_F64, nil
+	case wasm.OpcodeF64ConvertI64S, wasm.OpcodeF64ConvertI64U:
+		return signature_I64_F64, nil
+	case wasm.OpcodeF64PromoteF32:
+		return signature_F32_F64, nil
+	case wasm.OpcodeI32ReinterpretF32:
+		return signature_F32_I32, nil
+	case wasm.OpcodeI64ReinterpretF64:
+		return signature_F64_I64, nil
+	case wasm.OpcodeF32ReinterpretI32:
+		return signature_I32_F32, nil
+	case wasm.OpcodeF64ReinterpretI64:
+		return signature_I64_F64, nil
+	case wasm.OpcodeI32Extend8S, wasm.OpcodeI32Extend16S:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI64Extend8S, wasm.OpcodeI64Extend16S, wasm.OpcodeI64Extend32S:
+		return signature_I64_I64, nil
+	case wasm.OpcodeTableGet:
+		// table.get takes table's offset and pushes the ref type value of opaque pointer as i64 value onto the stack.
+		return signature_I32_I64, nil
+	case wasm.OpcodeTableSet:
+		// table.set takes table's offset and the ref type value of opaque pointer as i64 value.
+		return signature_I32I64_None, nil
+	case wasm.OpcodeRefFunc:
+		// ref.func is translated as pushing the compiled function's opaque pointer (uint64) at interpreterir layer.
+		return signature_None_I64, nil
+	case wasm.OpcodeRefIsNull:
+		// ref.is_null is translated as checking if the uint64 on the top of the stack (opaque pointer) is zero or not.
+		return signature_I64_I32, nil
+	case wasm.OpcodeRefNull:
+		// ref.null is translated as i64.const 0.
+		return signature_None_I64, nil
+	case wasm.OpcodeMiscPrefix:
+		switch miscOp := c.body[c.pc+1]; miscOp {
+		case wasm.OpcodeMiscI32TruncSatF32S, wasm.OpcodeMiscI32TruncSatF32U:
+			return signature_F32_I32, nil
+		case wasm.OpcodeMiscI32TruncSatF64S, wasm.OpcodeMiscI32TruncSatF64U:
+			return signature_F64_I32, nil
+		case wasm.OpcodeMiscI64TruncSatF32S, wasm.OpcodeMiscI64TruncSatF32U:
+			return signature_F32_I64, nil
+		case wasm.OpcodeMiscI64TruncSatF64S, wasm.OpcodeMiscI64TruncSatF64U:
+			return signature_F64_I64, nil
+		case wasm.OpcodeMiscMemoryInit, wasm.OpcodeMiscMemoryCopy, wasm.OpcodeMiscMemoryFill,
+			wasm.OpcodeMiscTableInit, wasm.OpcodeMiscTableCopy:
+			return signature_I32I32I32_None, nil
+		case wasm.OpcodeMiscDataDrop, wasm.OpcodeMiscElemDrop:
+			return signature_None_None, nil
+		case wasm.OpcodeMiscTableGrow:
+			return signature_I64I32_I32, nil
+		case wasm.OpcodeMiscTableSize:
+			return signature_None_I32, nil
+		case wasm.OpcodeMiscTableFill:
+			return signature_I32I64I32_None, nil
+		default:
+			return nil, fmt.Errorf("unsupported misc instruction in interpreterir: 0x%x", op)
+		}
+	case wasm.OpcodeVecPrefix:
+		switch vecOp := c.body[c.pc+1]; vecOp {
+		case wasm.OpcodeVecV128Const:
+			return signature_None_V128, nil
+		case wasm.OpcodeVecV128Load, wasm.OpcodeVecV128Load8x8s, wasm.OpcodeVecV128Load8x8u,
+			wasm.OpcodeVecV128Load16x4s, wasm.OpcodeVecV128Load16x4u, wasm.OpcodeVecV128Load32x2s,
+			wasm.OpcodeVecV128Load32x2u, wasm.OpcodeVecV128Load8Splat, wasm.OpcodeVecV128Load16Splat,
+			wasm.OpcodeVecV128Load32Splat, wasm.OpcodeVecV128Load64Splat, wasm.OpcodeVecV128Load32zero,
+			wasm.OpcodeVecV128Load64zero:
+			return signature_I32_V128, nil
+		case wasm.OpcodeVecV128Load8Lane, wasm.OpcodeVecV128Load16Lane,
+			wasm.OpcodeVecV128Load32Lane, wasm.OpcodeVecV128Load64Lane:
+			return signature_I32V128_V128, nil
+		case wasm.OpcodeVecV128Store,
+			wasm.OpcodeVecV128Store8Lane,
+			wasm.OpcodeVecV128Store16Lane,
+			wasm.OpcodeVecV128Store32Lane,
+			wasm.OpcodeVecV128Store64Lane:
+			return signature_I32V128_None, nil
+		case wasm.OpcodeVecI8x16ExtractLaneS,
+			wasm.OpcodeVecI8x16ExtractLaneU,
+			wasm.OpcodeVecI16x8ExtractLaneS,
+			wasm.OpcodeVecI16x8ExtractLaneU,
+			wasm.OpcodeVecI32x4ExtractLane:
+			return signature_V128_I32, nil
+		case wasm.OpcodeVecI64x2ExtractLane:
+			return signature_V128_I64, nil
+		case wasm.OpcodeVecF32x4ExtractLane:
+			return signature_V128_F32, nil
+		case wasm.OpcodeVecF64x2ExtractLane:
+			return signature_V128_F64, nil
+		case wasm.OpcodeVecI8x16ReplaceLane, wasm.OpcodeVecI16x8ReplaceLane, wasm.OpcodeVecI32x4ReplaceLane,
+			wasm.OpcodeVecI8x16Shl, wasm.OpcodeVecI8x16ShrS, wasm.OpcodeVecI8x16ShrU,
+			wasm.OpcodeVecI16x8Shl, wasm.OpcodeVecI16x8ShrS, wasm.OpcodeVecI16x8ShrU,
+			wasm.OpcodeVecI32x4Shl, wasm.OpcodeVecI32x4ShrS, wasm.OpcodeVecI32x4ShrU,
+			wasm.OpcodeVecI64x2Shl, wasm.OpcodeVecI64x2ShrS, wasm.OpcodeVecI64x2ShrU:
+			return signature_V128I32_V128, nil
+		case wasm.OpcodeVecI64x2ReplaceLane:
+			return signature_V128I64_V128, nil
+		case wasm.OpcodeVecF32x4ReplaceLane:
+			return signature_V128F32_V128, nil
+		case wasm.OpcodeVecF64x2ReplaceLane:
+			return signature_V128F64_V128, nil
+		case wasm.OpcodeVecI8x16Splat,
+			wasm.OpcodeVecI16x8Splat,
+			wasm.OpcodeVecI32x4Splat:
+			return signature_I32_V128, nil
+		case wasm.OpcodeVecI64x2Splat:
+			return signature_I64_V128, nil
+		case wasm.OpcodeVecF32x4Splat:
+			return signature_F32_V128, nil
+		case wasm.OpcodeVecF64x2Splat:
+			return signature_F64_V128, nil
+		case wasm.OpcodeVecV128i8x16Shuffle, wasm.OpcodeVecI8x16Swizzle, wasm.OpcodeVecV128And, wasm.OpcodeVecV128Or, wasm.OpcodeVecV128Xor, wasm.OpcodeVecV128AndNot:
+			return signature_V128V128_V128, nil
+		case wasm.OpcodeVecI8x16AllTrue, wasm.OpcodeVecI16x8AllTrue, wasm.OpcodeVecI32x4AllTrue, wasm.OpcodeVecI64x2AllTrue,
+			wasm.OpcodeVecV128AnyTrue,
+			wasm.OpcodeVecI8x16BitMask, wasm.OpcodeVecI16x8BitMask, wasm.OpcodeVecI32x4BitMask, wasm.OpcodeVecI64x2BitMask:
+			return signature_V128_I32, nil
+		case wasm.OpcodeVecV128Not, wasm.OpcodeVecI8x16Neg, wasm.OpcodeVecI16x8Neg, wasm.OpcodeVecI32x4Neg, wasm.OpcodeVecI64x2Neg,
+			wasm.OpcodeVecF32x4Neg, wasm.OpcodeVecF64x2Neg, wasm.OpcodeVecF32x4Sqrt, wasm.OpcodeVecF64x2Sqrt,
+			wasm.OpcodeVecI8x16Abs, wasm.OpcodeVecI8x16Popcnt, wasm.OpcodeVecI16x8Abs, wasm.OpcodeVecI32x4Abs, wasm.OpcodeVecI64x2Abs,
+			wasm.OpcodeVecF32x4Abs, wasm.OpcodeVecF64x2Abs,
+			wasm.OpcodeVecF32x4Ceil, wasm.OpcodeVecF32x4Floor, wasm.OpcodeVecF32x4Trunc, wasm.OpcodeVecF32x4Nearest,
+			wasm.OpcodeVecF64x2Ceil, wasm.OpcodeVecF64x2Floor, wasm.OpcodeVecF64x2Trunc, wasm.OpcodeVecF64x2Nearest,
+			wasm.OpcodeVecI16x8ExtendLowI8x16S, wasm.OpcodeVecI16x8ExtendHighI8x16S, wasm.OpcodeVecI16x8ExtendLowI8x16U, wasm.OpcodeVecI16x8ExtendHighI8x16U,
+			wasm.OpcodeVecI32x4ExtendLowI16x8S, wasm.OpcodeVecI32x4ExtendHighI16x8S, wasm.OpcodeVecI32x4ExtendLowI16x8U, wasm.OpcodeVecI32x4ExtendHighI16x8U,
+			wasm.OpcodeVecI64x2ExtendLowI32x4S, wasm.OpcodeVecI64x2ExtendHighI32x4S, wasm.OpcodeVecI64x2ExtendLowI32x4U, wasm.OpcodeVecI64x2ExtendHighI32x4U,
+			wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S, wasm.OpcodeVecI16x8ExtaddPairwiseI8x16U, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8U,
+			wasm.OpcodeVecF64x2PromoteLowF32x4Zero, wasm.OpcodeVecF32x4DemoteF64x2Zero,
+			wasm.OpcodeVecF32x4ConvertI32x4S, wasm.OpcodeVecF32x4ConvertI32x4U,
+			wasm.OpcodeVecF64x2ConvertLowI32x4S, wasm.OpcodeVecF64x2ConvertLowI32x4U,
+			wasm.OpcodeVecI32x4TruncSatF32x4S, wasm.OpcodeVecI32x4TruncSatF32x4U,
+			wasm.OpcodeVecI32x4TruncSatF64x2SZero, wasm.OpcodeVecI32x4TruncSatF64x2UZero:
+			return signature_V128_V128, nil
+		case wasm.OpcodeVecV128Bitselect:
+			return signature_V128V128V128_V32, nil
+		case wasm.OpcodeVecI8x16Eq, wasm.OpcodeVecI8x16Ne, wasm.OpcodeVecI8x16LtS, wasm.OpcodeVecI8x16LtU, wasm.OpcodeVecI8x16GtS,
+			wasm.OpcodeVecI8x16GtU, wasm.OpcodeVecI8x16LeS, wasm.OpcodeVecI8x16LeU, wasm.OpcodeVecI8x16GeS, wasm.OpcodeVecI8x16GeU,
+			wasm.OpcodeVecI16x8Eq, wasm.OpcodeVecI16x8Ne, wasm.OpcodeVecI16x8LtS, wasm.OpcodeVecI16x8LtU, wasm.OpcodeVecI16x8GtS,
+			wasm.OpcodeVecI16x8GtU, wasm.OpcodeVecI16x8LeS, wasm.OpcodeVecI16x8LeU, wasm.OpcodeVecI16x8GeS, wasm.OpcodeVecI16x8GeU,
+			wasm.OpcodeVecI32x4Eq, wasm.OpcodeVecI32x4Ne, wasm.OpcodeVecI32x4LtS, wasm.OpcodeVecI32x4LtU, wasm.OpcodeVecI32x4GtS,
+			wasm.OpcodeVecI32x4GtU, wasm.OpcodeVecI32x4LeS, wasm.OpcodeVecI32x4LeU, wasm.OpcodeVecI32x4GeS, wasm.OpcodeVecI32x4GeU,
+			wasm.OpcodeVecI64x2Eq, wasm.OpcodeVecI64x2Ne, wasm.OpcodeVecI64x2LtS, wasm.OpcodeVecI64x2GtS, wasm.OpcodeVecI64x2LeS,
+			wasm.OpcodeVecI64x2GeS, wasm.OpcodeVecF32x4Eq, wasm.OpcodeVecF32x4Ne, wasm.OpcodeVecF32x4Lt, wasm.OpcodeVecF32x4Gt,
+			wasm.OpcodeVecF32x4Le, wasm.OpcodeVecF32x4Ge, wasm.OpcodeVecF64x2Eq, wasm.OpcodeVecF64x2Ne, wasm.OpcodeVecF64x2Lt,
+			wasm.OpcodeVecF64x2Gt, wasm.OpcodeVecF64x2Le, wasm.OpcodeVecF64x2Ge,
+			wasm.OpcodeVecI8x16Add, wasm.OpcodeVecI8x16AddSatS, wasm.OpcodeVecI8x16AddSatU, wasm.OpcodeVecI8x16Sub,
+			wasm.OpcodeVecI8x16SubSatS, wasm.OpcodeVecI8x16SubSatU,
+			wasm.OpcodeVecI16x8Add, wasm.OpcodeVecI16x8AddSatS, wasm.OpcodeVecI16x8AddSatU, wasm.OpcodeVecI16x8Sub,
+			wasm.OpcodeVecI16x8SubSatS, wasm.OpcodeVecI16x8SubSatU, wasm.OpcodeVecI16x8Mul,
+			wasm.OpcodeVecI32x4Add, wasm.OpcodeVecI32x4Sub, wasm.OpcodeVecI32x4Mul,
+			wasm.OpcodeVecI64x2Add, wasm.OpcodeVecI64x2Sub, wasm.OpcodeVecI64x2Mul,
+			wasm.OpcodeVecF32x4Add, wasm.OpcodeVecF32x4Sub, wasm.OpcodeVecF32x4Mul, wasm.OpcodeVecF32x4Div,
+			wasm.OpcodeVecF64x2Add, wasm.OpcodeVecF64x2Sub, wasm.OpcodeVecF64x2Mul, wasm.OpcodeVecF64x2Div,
+			wasm.OpcodeVecI8x16MinS, wasm.OpcodeVecI8x16MinU, wasm.OpcodeVecI8x16MaxS, wasm.OpcodeVecI8x16MaxU, wasm.OpcodeVecI8x16AvgrU,
+			wasm.OpcodeVecI16x8MinS, wasm.OpcodeVecI16x8MinU, wasm.OpcodeVecI16x8MaxS, wasm.OpcodeVecI16x8MaxU, wasm.OpcodeVecI16x8AvgrU,
+			wasm.OpcodeVecI32x4MinS, wasm.OpcodeVecI32x4MinU, wasm.OpcodeVecI32x4MaxS, wasm.OpcodeVecI32x4MaxU,
+			wasm.OpcodeVecF32x4Min, wasm.OpcodeVecF32x4Max, wasm.OpcodeVecF64x2Min, wasm.OpcodeVecF64x2Max,
+			wasm.OpcodeVecF32x4Pmin, wasm.OpcodeVecF32x4Pmax, wasm.OpcodeVecF64x2Pmin, wasm.OpcodeVecF64x2Pmax,
+			wasm.OpcodeVecI16x8Q15mulrSatS,
+			wasm.OpcodeVecI16x8ExtMulLowI8x16S, wasm.OpcodeVecI16x8ExtMulHighI8x16S, wasm.OpcodeVecI16x8ExtMulLowI8x16U, wasm.OpcodeVecI16x8ExtMulHighI8x16U,
+			wasm.OpcodeVecI32x4ExtMulLowI16x8S, wasm.OpcodeVecI32x4ExtMulHighI16x8S, wasm.OpcodeVecI32x4ExtMulLowI16x8U, wasm.OpcodeVecI32x4ExtMulHighI16x8U,
+			wasm.OpcodeVecI64x2ExtMulLowI32x4S, wasm.OpcodeVecI64x2ExtMulHighI32x4S, wasm.OpcodeVecI64x2ExtMulLowI32x4U, wasm.OpcodeVecI64x2ExtMulHighI32x4U,
+			wasm.OpcodeVecI32x4DotI16x8S,
+			wasm.OpcodeVecI8x16NarrowI16x8S, wasm.OpcodeVecI8x16NarrowI16x8U, wasm.OpcodeVecI16x8NarrowI32x4S, wasm.OpcodeVecI16x8NarrowI32x4U:
+			return signature_V128V128_V128, nil
+		default:
+			return nil, fmt.Errorf("unsupported vector instruction in interpreterir: %s", wasm.VectorInstructionName(vecOp))
+		}
+	case wasm.OpcodeAtomicPrefix:
+		switch atomicOp := c.body[c.pc+1]; atomicOp {
+		case wasm.OpcodeAtomicMemoryNotify:
+			return signature_I32I32_I32, nil
+		case wasm.OpcodeAtomicMemoryWait32:
+			return signature_I32I32I64_I32, nil
+		case wasm.OpcodeAtomicMemoryWait64:
+			return signature_I32I64I64_I32, nil
+		case wasm.OpcodeAtomicFence:
+			return signature_None_None, nil
+		case wasm.OpcodeAtomicI32Load, wasm.OpcodeAtomicI32Load8U, wasm.OpcodeAtomicI32Load16U:
+			return signature_I32_I32, nil
+		case wasm.OpcodeAtomicI64Load, wasm.OpcodeAtomicI64Load8U, wasm.OpcodeAtomicI64Load16U, wasm.OpcodeAtomicI64Load32U:
+			return signature_I32_I64, nil
+		case wasm.OpcodeAtomicI32Store, wasm.OpcodeAtomicI32Store8, wasm.OpcodeAtomicI32Store16:
+			return signature_I32I32_None, nil
+		case wasm.OpcodeAtomicI64Store, wasm.OpcodeAtomicI64Store8, wasm.OpcodeAtomicI64Store16, wasm.OpcodeAtomicI64Store32:
+			return signature_I32I64_None, nil
+		case wasm.OpcodeAtomicI32RmwAdd, wasm.OpcodeAtomicI32RmwSub, wasm.OpcodeAtomicI32RmwAnd, wasm.OpcodeAtomicI32RmwOr, wasm.OpcodeAtomicI32RmwXor, wasm.OpcodeAtomicI32RmwXchg,
+			wasm.OpcodeAtomicI32Rmw8AddU, wasm.OpcodeAtomicI32Rmw8SubU, wasm.OpcodeAtomicI32Rmw8AndU, wasm.OpcodeAtomicI32Rmw8OrU, wasm.OpcodeAtomicI32Rmw8XorU, wasm.OpcodeAtomicI32Rmw8XchgU,
+			wasm.OpcodeAtomicI32Rmw16AddU, wasm.OpcodeAtomicI32Rmw16SubU, wasm.OpcodeAtomicI32Rmw16AndU, wasm.OpcodeAtomicI32Rmw16OrU, wasm.OpcodeAtomicI32Rmw16XorU, wasm.OpcodeAtomicI32Rmw16XchgU:
+			return signature_I32I32_I32, nil
+		case wasm.OpcodeAtomicI64RmwAdd, wasm.OpcodeAtomicI64RmwSub, wasm.OpcodeAtomicI64RmwAnd, wasm.OpcodeAtomicI64RmwOr, wasm.OpcodeAtomicI64RmwXor, wasm.OpcodeAtomicI64RmwXchg,
+			wasm.OpcodeAtomicI64Rmw8AddU, wasm.OpcodeAtomicI64Rmw8SubU, wasm.OpcodeAtomicI64Rmw8AndU, wasm.OpcodeAtomicI64Rmw8OrU, wasm.OpcodeAtomicI64Rmw8XorU, wasm.OpcodeAtomicI64Rmw8XchgU,
+			wasm.OpcodeAtomicI64Rmw16AddU, wasm.OpcodeAtomicI64Rmw16SubU, wasm.OpcodeAtomicI64Rmw16AndU, wasm.OpcodeAtomicI64Rmw16OrU, wasm.OpcodeAtomicI64Rmw16XorU, wasm.OpcodeAtomicI64Rmw16XchgU,
+			wasm.OpcodeAtomicI64Rmw32AddU, wasm.OpcodeAtomicI64Rmw32SubU, wasm.OpcodeAtomicI64Rmw32AndU, wasm.OpcodeAtomicI64Rmw32OrU, wasm.OpcodeAtomicI64Rmw32XorU, wasm.OpcodeAtomicI64Rmw32XchgU:
+			return signature_I32I64_I64, nil
+		case wasm.OpcodeAtomicI32RmwCmpxchg, wasm.OpcodeAtomicI32Rmw8CmpxchgU, wasm.OpcodeAtomicI32Rmw16CmpxchgU:
+			return signature_I32I32I32_I32, nil
+		case wasm.OpcodeAtomicI64RmwCmpxchg, wasm.OpcodeAtomicI64Rmw8CmpxchgU, wasm.OpcodeAtomicI64Rmw16CmpxchgU, wasm.OpcodeAtomicI64Rmw32CmpxchgU:
+			return signature_I32I64I64_I64, nil
+		default:
+			return nil, fmt.Errorf("unsupported atomic instruction in interpreterir: %s", wasm.AtomicInstructionName(atomicOp))
+		}
+	default:
+		return nil, fmt.Errorf("unsupported instruction in interpreterir: 0x%x", op)
+	}
+}
+
+// funcTypeToIRSignatures is the central cache for a module to get the *signature
+// for function calls.
+type funcTypeToIRSignatures struct {
+	directCalls   []*signature
+	indirectCalls []*signature
+	wasmTypes     []wasm.FunctionType
+}
+
+// get returns the *signature for the direct or indirect function call against functions whose type is at `typeIndex`.
+func (f *funcTypeToIRSignatures) get(typeIndex wasm.Index, indirect bool) *signature {
+	var sig *signature
+	if indirect {
+		sig = f.indirectCalls[typeIndex]
+	} else {
+		sig = f.directCalls[typeIndex]
+	}
+	if sig != nil {
+		return sig
+	}
+
+	tp := &f.wasmTypes[typeIndex]
+	if indirect {
+		sig = &signature{
+			in:  make([]unsignedType, 0, len(tp.Params)+1), // +1 to reserve space for call indirect index.
+			out: make([]unsignedType, 0, len(tp.Results)),
+		}
+	} else {
+		sig = &signature{
+			in:  make([]unsignedType, 0, len(tp.Params)),
+			out: make([]unsignedType, 0, len(tp.Results)),
+		}
+	}
+
+	for _, vt := range tp.Params {
+		sig.in = append(sig.in, wasmValueTypeTounsignedType(vt))
+	}
+	for _, vt := range tp.Results {
+		sig.out = append(sig.out, wasmValueTypeTounsignedType(vt))
+	}
+
+	if indirect {
+		sig.in = append(sig.in, unsignedTypeI32)
+		f.indirectCalls[typeIndex] = sig
+	} else {
+		f.directCalls[typeIndex] = sig
+	}
+	return sig
+}
+
+func wasmValueTypeTounsignedType(vt wasm.ValueType) unsignedType {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return unsignedTypeI32
+	case wasm.ValueTypeI64,
+		// From interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return unsignedTypeI64
+	case wasm.ValueTypeF32:
+		return unsignedTypeF32
+	case wasm.ValueTypeF64:
+		return unsignedTypeF64
+	case wasm.ValueTypeV128:
+		return unsignedTypeV128
+	}
+	panic("unreachable")
+}
+
+func wasmValueTypeToUnsignedOutSignature(vt wasm.ValueType) *signature {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return signature_None_I32
+	case wasm.ValueTypeI64,
+		// From interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return signature_None_I64
+	case wasm.ValueTypeF32:
+		return signature_None_F32
+	case wasm.ValueTypeF64:
+		return signature_None_F64
+	case wasm.ValueTypeV128:
+		return signature_None_V128
+	}
+	panic("unreachable")
+}
+
+func wasmValueTypeToUnsignedInSignature(vt wasm.ValueType) *signature {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return signature_I32_None
+	case wasm.ValueTypeI64,
+		// From interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return signature_I64_None
+	case wasm.ValueTypeF32:
+		return signature_F32_None
+	case wasm.ValueTypeF64:
+		return signature_F64_None
+	case wasm.ValueTypeV128:
+		return signature_V128_None
+	}
+	panic("unreachable")
+}
+
+func wasmValueTypeToUnsignedInOutSignature(vt wasm.ValueType) *signature {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return signature_I32_I32
+	case wasm.ValueTypeI64,
+		// At interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return signature_I64_I64
+	case wasm.ValueTypeF32:
+		return signature_F32_F32
+	case wasm.ValueTypeF64:
+		return signature_F64_F64
+	case wasm.ValueTypeV128:
+		return signature_V128_V128
+	}
+	panic("unreachable")
+}
@@ -0,0 +1,170 @@
+package backend
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type (
+	// FunctionABI represents the ABI information for a function which corresponds to a ssa.Signature.
+	FunctionABI struct {
+		Initialized bool
+
+		Args, Rets                 []ABIArg
+		ArgStackSize, RetStackSize int64
+
+		ArgIntRealRegs   byte
+		ArgFloatRealRegs byte
+		RetIntRealRegs   byte
+		RetFloatRealRegs byte
+	}
+
+	// ABIArg represents either argument or return value's location.
+	ABIArg struct {
+		// Index is the index of the argument.
+		Index int
+		// Kind is the kind of the argument.
+		Kind ABIArgKind
+		// Reg is valid if Kind == ABIArgKindReg.
+		// This VReg must be based on RealReg.
+		Reg regalloc.VReg
+		// Offset is valid if Kind == ABIArgKindStack.
+		// This is the offset from the beginning of either arg or ret stack slot.
+		Offset int64
+		// Type is the type of the argument.
+		Type ssa.Type
+	}
+
+	// ABIArgKind is the kind of ABI argument.
+	ABIArgKind byte
+)
+
+const (
+	// ABIArgKindReg represents an argument passed in a register.
+	ABIArgKindReg = iota
+	// ABIArgKindStack represents an argument passed in the stack.
+	ABIArgKindStack
+)
+
+// String implements fmt.Stringer.
+func (a *ABIArg) String() string {
+	return fmt.Sprintf("args[%d]: %s", a.Index, a.Kind)
+}
+
+// String implements fmt.Stringer.
+func (a ABIArgKind) String() string {
+	switch a {
+	case ABIArgKindReg:
+		return "reg"
+	case ABIArgKindStack:
+		return "stack"
+	default:
+		panic("BUG")
+	}
+}
+
+// Init initializes the abiImpl for the given signature.
+func (a *FunctionABI) Init(sig *ssa.Signature, argResultInts, argResultFloats []regalloc.RealReg) {
+	if len(a.Rets) < len(sig.Results) {
+		a.Rets = make([]ABIArg, len(sig.Results))
+	}
+	a.Rets = a.Rets[:len(sig.Results)]
+	a.RetStackSize = a.setABIArgs(a.Rets, sig.Results, argResultInts, argResultFloats)
+	if argsNum := len(sig.Params); len(a.Args) < argsNum {
+		a.Args = make([]ABIArg, argsNum)
+	}
+	a.Args = a.Args[:len(sig.Params)]
+	a.ArgStackSize = a.setABIArgs(a.Args, sig.Params, argResultInts, argResultFloats)
+
+	// Gather the real registers usages in arg/return.
+	a.ArgIntRealRegs, a.ArgFloatRealRegs = 0, 0
+	a.RetIntRealRegs, a.RetFloatRealRegs = 0, 0
+	for i := range a.Rets {
+		r := &a.Rets[i]
+		if r.Kind == ABIArgKindReg {
+			if r.Type.IsInt() {
+				a.RetIntRealRegs++
+			} else {
+				a.RetFloatRealRegs++
+			}
+		}
+	}
+	for i := range a.Args {
+		arg := &a.Args[i]
+		if arg.Kind == ABIArgKindReg {
+			if arg.Type.IsInt() {
+				a.ArgIntRealRegs++
+			} else {
+				a.ArgFloatRealRegs++
+			}
+		}
+	}
+
+	a.Initialized = true
+}
+
+// setABIArgs sets the ABI arguments in the given slice. This assumes that len(s) >= len(types)
+// where if len(s) > len(types), the last elements of s is for the multi-return slot.
+func (a *FunctionABI) setABIArgs(s []ABIArg, types []ssa.Type, ints, floats []regalloc.RealReg) (stackSize int64) {
+	il, fl := len(ints), len(floats)
+
+	var stackOffset int64
+	intParamIndex, floatParamIndex := 0, 0
+	for i, typ := range types {
+		arg := &s[i]
+		arg.Index = i
+		arg.Type = typ
+		if typ.IsInt() {
+			if intParamIndex >= il {
+				arg.Kind = ABIArgKindStack
+				const slotSize = 8 // Align 8 bytes.
+				arg.Offset = stackOffset
+				stackOffset += slotSize
+			} else {
+				arg.Kind = ABIArgKindReg
+				arg.Reg = regalloc.FromRealReg(ints[intParamIndex], regalloc.RegTypeInt)
+				intParamIndex++
+			}
+		} else {
+			if floatParamIndex >= fl {
+				arg.Kind = ABIArgKindStack
+				slotSize := int64(8)   // Align at least 8 bytes.
+				if typ.Bits() == 128 { // Vector.
+					slotSize = 16
+				}
+				arg.Offset = stackOffset
+				stackOffset += slotSize
+			} else {
+				arg.Kind = ABIArgKindReg
+				arg.Reg = regalloc.FromRealReg(floats[floatParamIndex], regalloc.RegTypeFloat)
+				floatParamIndex++
+			}
+		}
+	}
+	return stackOffset
+}
+
+func (a *FunctionABI) AlignedArgResultStackSlotSize() uint32 {
+	stackSlotSize := a.RetStackSize + a.ArgStackSize
+	// Align stackSlotSize to 16 bytes.
+	stackSlotSize = (stackSlotSize + 15) &^ 15
+	// Check overflow 32-bit.
+	if stackSlotSize > 0xFFFFFFFF {
+		panic("ABI stack slot size overflow")
+	}
+	return uint32(stackSlotSize)
+}
+
+func (a *FunctionABI) ABIInfoAsUint64() uint64 {
+	return uint64(a.ArgIntRealRegs)<<56 |
+		uint64(a.ArgFloatRealRegs)<<48 |
+		uint64(a.RetIntRealRegs)<<40 |
+		uint64(a.RetFloatRealRegs)<<32 |
+		uint64(a.AlignedArgResultStackSlotSize())
+}
+
+func ABIInfoFromUint64(info uint64) (argIntRealRegs, argFloatRealRegs, retIntRealRegs, retFloatRealRegs byte, stackSlotSize uint32) {
+	return byte(info >> 56), byte(info >> 48), byte(info >> 40), byte(info >> 32), uint32(info)
+}
@@ -0,0 +1,3 @@
+// Package backend must be free of Wasm-specific concept. In other words,
+// this package must not import internal/wasm package.
+package backend
@@ -0,0 +1,402 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// NewCompiler returns a new Compiler that can generate a machine code.
+func NewCompiler(ctx context.Context, mach Machine, builder ssa.Builder) Compiler {
+	return newCompiler(ctx, mach, builder)
+}
+
+func newCompiler(_ context.Context, mach Machine, builder ssa.Builder) *compiler {
+	argResultInts, argResultFloats := mach.ArgsResultsRegs()
+	c := &compiler{
+		mach: mach, ssaBuilder: builder,
+		nextVRegID:      regalloc.VRegIDNonReservedBegin,
+		argResultInts:   argResultInts,
+		argResultFloats: argResultFloats,
+	}
+	mach.SetCompiler(c)
+	return c
+}
+
+// Compiler is the backend of wazevo which takes ssa.Builder and Machine,
+// use the information there to emit the final machine code.
+type Compiler interface {
+	// SSABuilder returns the ssa.Builder used by this compiler.
+	SSABuilder() ssa.Builder
+
+	// Compile executes the following steps:
+	// 	1. Lower()
+	// 	2. RegAlloc()
+	// 	3. Finalize()
+	// 	4. Encode()
+	//
+	// Each step can be called individually for testing purpose, therefore they are exposed in this interface too.
+	//
+	// The returned byte slices are the machine code and the relocation information for the machine code.
+	// The caller is responsible for copying them immediately since the compiler may reuse the buffer.
+	Compile(ctx context.Context) (_ []byte, _ []RelocationInfo, _ error)
+
+	// Lower lowers the given ssa.Instruction to the machine-specific instructions.
+	Lower()
+
+	// RegAlloc performs the register allocation after Lower is called.
+	RegAlloc()
+
+	// Finalize performs the finalization of the compilation, including machine code emission.
+	// This must be called after RegAlloc.
+	Finalize(ctx context.Context) error
+
+	// Buf returns the buffer of the encoded machine code. This is only used for testing purpose.
+	Buf() []byte
+
+	BufPtr() *[]byte
+
+	// Format returns the debug string of the current state of the compiler.
+	Format() string
+
+	// Init initializes the internal state of the compiler for the next compilation.
+	Init()
+
+	// AllocateVReg allocates a new virtual register of the given type.
+	AllocateVReg(typ ssa.Type) regalloc.VReg
+
+	// ValueDefinition returns the definition of the given value.
+	ValueDefinition(ssa.Value) SSAValueDefinition
+
+	// VRegOf returns the virtual register of the given ssa.Value.
+	VRegOf(value ssa.Value) regalloc.VReg
+
+	// TypeOf returns the ssa.Type of the given virtual register.
+	TypeOf(regalloc.VReg) ssa.Type
+
+	// MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID,
+	// and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group.
+	MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool
+
+	// MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode,
+	// this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid.
+	//
+	// Note: caller should be careful to avoid excessive allocation on opcodes slice.
+	MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
+
+	// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
+	AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool)
+
+	// AddSourceOffsetInfo appends the source offset information for the given offset.
+	AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
+
+	// SourceOffsetInfo returns the source offset information for the current buffer offset.
+	SourceOffsetInfo() []SourceOffsetInfo
+
+	// EmitByte appends a byte to the buffer. Used during the code emission.
+	EmitByte(b byte)
+
+	// Emit4Bytes appends 4 bytes to the buffer. Used during the code emission.
+	Emit4Bytes(b uint32)
+
+	// Emit8Bytes appends 8 bytes to the buffer. Used during the code emission.
+	Emit8Bytes(b uint64)
+
+	// GetFunctionABI returns the ABI information for the given signature.
+	GetFunctionABI(sig *ssa.Signature) *FunctionABI
+}
+
+// RelocationInfo represents the relocation information for a call instruction.
+type RelocationInfo struct {
+	// Offset represents the offset from the beginning of the machine code of either a function or the entire module.
+	Offset int64
+	// Target is the target function of the call instruction.
+	FuncRef ssa.FuncRef
+	// IsTailCall indicates whether the call instruction is a tail call.
+	IsTailCall bool
+}
+
+// compiler implements Compiler.
+type compiler struct {
+	mach       Machine
+	currentGID ssa.InstructionGroupID
+	ssaBuilder ssa.Builder
+	// nextVRegID is the next virtual register ID to be allocated.
+	nextVRegID regalloc.VRegID
+	// ssaValueToVRegs maps ssa.ValueID to regalloc.VReg.
+	ssaValueToVRegs [] /* VRegID to */ regalloc.VReg
+	ssaValuesInfo   []ssa.ValueInfo
+	// returnVRegs is the list of virtual registers that store the return values.
+	returnVRegs  []regalloc.VReg
+	varEdges     [][2]regalloc.VReg
+	varEdgeTypes []ssa.Type
+	constEdges   []struct {
+		cInst *ssa.Instruction
+		dst   regalloc.VReg
+	}
+	vRegSet         []bool
+	vRegIDs         []regalloc.VRegID
+	tempRegs        []regalloc.VReg
+	tmpVals         []ssa.Value
+	ssaTypeOfVRegID [] /* VRegID to */ ssa.Type
+	buf             []byte
+	relocations     []RelocationInfo
+	sourceOffsets   []SourceOffsetInfo
+	// abis maps ssa.SignatureID to the ABI implementation.
+	abis                           []FunctionABI
+	argResultInts, argResultFloats []regalloc.RealReg
+}
+
+// SourceOffsetInfo is a data to associate the source offset with the executable offset.
+type SourceOffsetInfo struct {
+	// SourceOffset is the source offset in the original source code.
+	SourceOffset ssa.SourceOffset
+	// ExecutableOffset is the offset in the compiled executable.
+	ExecutableOffset int64
+}
+
+// Compile implements Compiler.Compile.
+func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) {
+	c.Lower()
+	if wazevoapi.PrintSSAToBackendIRLowering && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
+	}
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format())
+	}
+	c.RegAlloc()
+	if wazevoapi.PrintRegisterAllocated && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
+	}
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format())
+	}
+	if err := c.Finalize(ctx); err != nil {
+		return nil, nil, err
+	}
+	if wazevoapi.PrintFinalizedMachineCode && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
+	}
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Finalization", c.Format())
+	}
+	return c.buf, c.relocations, nil
+}
+
+// RegAlloc implements Compiler.RegAlloc.
+func (c *compiler) RegAlloc() {
+	c.mach.RegAlloc()
+}
+
+// Finalize implements Compiler.Finalize.
+func (c *compiler) Finalize(ctx context.Context) error {
+	c.mach.PostRegAlloc()
+	return c.mach.Encode(ctx)
+}
+
+// setCurrentGroupID sets the current instruction group ID.
+func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) {
+	c.currentGID = gid
+}
+
+// assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder.
+func (c *compiler) assignVirtualRegisters() {
+	builder := c.ssaBuilder
+	c.ssaValuesInfo = builder.ValuesInfo()
+
+	if diff := len(c.ssaValuesInfo) - len(c.ssaValueToVRegs); diff > 0 {
+		c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, diff+1)...)
+	}
+
+	for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
+		// First we assign a virtual register to each parameter.
+		for i := 0; i < blk.Params(); i++ {
+			p := blk.Param(i)
+			pid := p.ID()
+			typ := p.Type()
+			vreg := c.AllocateVReg(typ)
+			c.ssaValueToVRegs[pid] = vreg
+			c.ssaTypeOfVRegID[vreg.ID()] = p.Type()
+		}
+
+		// Assigns each value to a virtual register produced by instructions.
+		for cur := blk.Root(); cur != nil; cur = cur.Next() {
+			r, rs := cur.Returns()
+			if r.Valid() {
+				id := r.ID()
+				ssaTyp := r.Type()
+				typ := r.Type()
+				vReg := c.AllocateVReg(typ)
+				c.ssaValueToVRegs[id] = vReg
+				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
+			}
+			for _, r := range rs {
+				id := r.ID()
+				ssaTyp := r.Type()
+				vReg := c.AllocateVReg(ssaTyp)
+				c.ssaValueToVRegs[id] = vReg
+				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
+			}
+		}
+	}
+
+	for i, retBlk := 0, builder.ReturnBlock(); i < retBlk.Params(); i++ {
+		typ := retBlk.Param(i).Type()
+		vReg := c.AllocateVReg(typ)
+		c.returnVRegs = append(c.returnVRegs, vReg)
+		c.ssaTypeOfVRegID[vReg.ID()] = typ
+	}
+}
+
+// AllocateVReg implements Compiler.AllocateVReg.
+func (c *compiler) AllocateVReg(typ ssa.Type) regalloc.VReg {
+	regType := regalloc.RegTypeOf(typ)
+	r := regalloc.VReg(c.nextVRegID).SetRegType(regType)
+
+	id := r.ID()
+	if int(id) >= len(c.ssaTypeOfVRegID) {
+		c.ssaTypeOfVRegID = append(c.ssaTypeOfVRegID, make([]ssa.Type, id+1)...)
+	}
+	c.ssaTypeOfVRegID[id] = typ
+	c.nextVRegID++
+	return r
+}
+
+// Init implements Compiler.Init.
+func (c *compiler) Init() {
+	c.currentGID = 0
+	c.nextVRegID = regalloc.VRegIDNonReservedBegin
+	c.returnVRegs = c.returnVRegs[:0]
+	c.mach.Reset()
+	c.varEdges = c.varEdges[:0]
+	c.constEdges = c.constEdges[:0]
+	c.buf = c.buf[:0]
+	c.sourceOffsets = c.sourceOffsets[:0]
+	c.relocations = c.relocations[:0]
+}
+
+// ValueDefinition implements Compiler.ValueDefinition.
+func (c *compiler) ValueDefinition(value ssa.Value) SSAValueDefinition {
+	return SSAValueDefinition{
+		V:        value,
+		Instr:    c.ssaBuilder.InstructionOfValue(value),
+		RefCount: c.ssaValuesInfo[value.ID()].RefCount,
+	}
+}
+
+// VRegOf implements Compiler.VRegOf.
+func (c *compiler) VRegOf(value ssa.Value) regalloc.VReg {
+	return c.ssaValueToVRegs[value.ID()]
+}
+
+// Format implements Compiler.Format.
+func (c *compiler) Format() string {
+	return c.mach.Format()
+}
+
+// TypeOf implements Compiler.Format.
+func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type {
+	return c.ssaTypeOfVRegID[v.ID()]
+}
+
+// MatchInstr implements Compiler.MatchInstr.
+func (c *compiler) MatchInstr(def SSAValueDefinition, opcode ssa.Opcode) bool {
+	instr := def.Instr
+	return def.IsFromInstr() &&
+		instr.Opcode() == opcode &&
+		instr.GroupID() == c.currentGID &&
+		def.RefCount < 2
+}
+
+// MatchInstrOneOf implements Compiler.MatchInstrOneOf.
+func (c *compiler) MatchInstrOneOf(def SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode {
+	instr := def.Instr
+	if !def.IsFromInstr() {
+		return ssa.OpcodeInvalid
+	}
+
+	if instr.GroupID() != c.currentGID {
+		return ssa.OpcodeInvalid
+	}
+
+	if def.RefCount >= 2 {
+		return ssa.OpcodeInvalid
+	}
+
+	opcode := instr.Opcode()
+	for _, op := range opcodes {
+		if opcode == op {
+			return opcode
+		}
+	}
+	return ssa.OpcodeInvalid
+}
+
+// SSABuilder implements Compiler .SSABuilder.
+func (c *compiler) SSABuilder() ssa.Builder {
+	return c.ssaBuilder
+}
+
+// AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo.
+func (c *compiler) AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) {
+	c.sourceOffsets = append(c.sourceOffsets, SourceOffsetInfo{
+		SourceOffset:     sourceOffset,
+		ExecutableOffset: executableOffset,
+	})
+}
+
+// SourceOffsetInfo implements Compiler.SourceOffsetInfo.
+func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
+	return c.sourceOffsets
+}
+
+// AddRelocationInfo implements Compiler.AddRelocationInfo.
+func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef, isTailCall bool) {
+	c.relocations = append(c.relocations, RelocationInfo{
+		Offset:     int64(len(c.buf)),
+		FuncRef:    funcRef,
+		IsTailCall: isTailCall,
+	})
+}
+
+// Emit8Bytes implements Compiler.Emit8Bytes.
+func (c *compiler) Emit8Bytes(b uint64) {
+	c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24), byte(b>>32), byte(b>>40), byte(b>>48), byte(b>>56))
+}
+
+// Emit4Bytes implements Compiler.Emit4Bytes.
+func (c *compiler) Emit4Bytes(b uint32) {
+	c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24))
+}
+
+// EmitByte implements Compiler.EmitByte.
+func (c *compiler) EmitByte(b byte) {
+	c.buf = append(c.buf, b)
+}
+
+// Buf implements Compiler.Buf.
+func (c *compiler) Buf() []byte {
+	return c.buf
+}
+
+// BufPtr implements Compiler.BufPtr.
+func (c *compiler) BufPtr() *[]byte {
+	return &c.buf
+}
+
+func (c *compiler) GetFunctionABI(sig *ssa.Signature) *FunctionABI {
+	if int(sig.ID) >= len(c.abis) {
+		c.abis = append(c.abis, make([]FunctionABI, int(sig.ID)+1)...)
+	}
+
+	abi := &c.abis[sig.ID]
+	if abi.Initialized {
+		return abi
+	}
+
+	abi.Init(sig, c.argResultInts, c.argResultFloats)
+	return abi
+}
@@ -0,0 +1,226 @@
+package backend
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// Lower implements Compiler.Lower.
+func (c *compiler) Lower() {
+	c.assignVirtualRegisters()
+	c.mach.SetCurrentABI(c.GetFunctionABI(c.ssaBuilder.Signature()))
+	c.mach.StartLoweringFunction(c.ssaBuilder.BlockIDMax())
+	c.lowerBlocks()
+}
+
+// lowerBlocks lowers each block in the ssa.Builder.
+func (c *compiler) lowerBlocks() {
+	builder := c.ssaBuilder
+	for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
+		c.lowerBlock(blk)
+	}
+
+	// After lowering all blocks, we need to link adjacent blocks to layout one single instruction list.
+	var prev ssa.BasicBlock
+	for next := builder.BlockIteratorReversePostOrderBegin(); next != nil; next = builder.BlockIteratorReversePostOrderNext() {
+		if prev != nil {
+			c.mach.LinkAdjacentBlocks(prev, next)
+		}
+		prev = next
+	}
+}
+
+func (c *compiler) lowerBlock(blk ssa.BasicBlock) {
+	mach := c.mach
+	mach.StartBlock(blk)
+
+	// We traverse the instructions in reverse order because we might want to lower multiple
+	// instructions together.
+	cur := blk.Tail()
+
+	// First gather the branching instructions at the end of the blocks.
+	var br0, br1 *ssa.Instruction
+	if cur.IsBranching() {
+		br0 = cur
+		cur = cur.Prev()
+		if cur != nil && cur.IsBranching() {
+			br1 = cur
+			cur = cur.Prev()
+		}
+	}
+
+	if br0 != nil {
+		c.lowerBranches(br0, br1)
+	}
+
+	if br1 != nil && br0 == nil {
+		panic("BUG? when a block has conditional branch but doesn't end with an unconditional branch?")
+	}
+
+	// Now start lowering the non-branching instructions.
+	for ; cur != nil; cur = cur.Prev() {
+		c.setCurrentGroupID(cur.GroupID())
+		if cur.Lowered() {
+			continue
+		}
+
+		switch cur.Opcode() {
+		case ssa.OpcodeReturn:
+			rets := cur.ReturnVals()
+			if len(rets) > 0 {
+				c.mach.LowerReturns(rets)
+			}
+			c.mach.InsertReturn()
+		default:
+			mach.LowerInstr(cur)
+		}
+		mach.FlushPendingInstructions()
+	}
+
+	// Finally, if this is the entry block, we have to insert copies of arguments from the real location to the VReg.
+	if blk.EntryBlock() {
+		c.lowerFunctionArguments(blk)
+	}
+
+	mach.EndBlock()
+}
+
+// lowerBranches is called right after StartBlock and before any LowerInstr call if
+// there are branches to the given block. br0 is the very end of the block and b1 is the before the br0 if it exists.
+// At least br0 is not nil, but br1 can be nil if there's no branching before br0.
+//
+// See ssa.Instruction IsBranching, and the comment on ssa.BasicBlock.
+func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) {
+	mach := c.mach
+
+	c.setCurrentGroupID(br0.GroupID())
+	c.mach.LowerSingleBranch(br0)
+	mach.FlushPendingInstructions()
+	if br1 != nil {
+		c.setCurrentGroupID(br1.GroupID())
+		c.mach.LowerConditionalBranch(br1)
+		mach.FlushPendingInstructions()
+	}
+
+	if br0.Opcode() == ssa.OpcodeJump {
+		_, args, targetBlockID := br0.BranchData()
+		argExists := len(args) != 0
+		if argExists && br1 != nil {
+			panic("BUG: critical edge split failed")
+		}
+		target := c.ssaBuilder.BasicBlock(targetBlockID)
+		if argExists && target.ReturnBlock() {
+			if len(args) > 0 {
+				c.mach.LowerReturns(args)
+			}
+		} else if argExists {
+			c.lowerBlockArguments(args, target)
+		}
+	}
+	mach.FlushPendingInstructions()
+}
+
+func (c *compiler) lowerFunctionArguments(entry ssa.BasicBlock) {
+	mach := c.mach
+
+	c.tmpVals = c.tmpVals[:0]
+	data := c.ssaBuilder.ValuesInfo()
+	for i := 0; i < entry.Params(); i++ {
+		p := entry.Param(i)
+		if data[p.ID()].RefCount > 0 {
+			c.tmpVals = append(c.tmpVals, p)
+		} else {
+			// If the argument is not used, we can just pass an invalid value.
+			c.tmpVals = append(c.tmpVals, ssa.ValueInvalid)
+		}
+	}
+	mach.LowerParams(c.tmpVals)
+	mach.FlushPendingInstructions()
+}
+
+// lowerBlockArguments lowers how to pass arguments to the given successor block.
+func (c *compiler) lowerBlockArguments(args []ssa.Value, succ ssa.BasicBlock) {
+	if len(args) != succ.Params() {
+		panic("BUG: mismatched number of arguments")
+	}
+
+	c.varEdges = c.varEdges[:0]
+	c.varEdgeTypes = c.varEdgeTypes[:0]
+	c.constEdges = c.constEdges[:0]
+	for i := 0; i < len(args); i++ {
+		dst := succ.Param(i)
+		src := args[i]
+
+		dstReg := c.VRegOf(dst)
+		srcInstr := c.ssaBuilder.InstructionOfValue(src)
+		if srcInstr != nil && srcInstr.Constant() {
+			c.constEdges = append(c.constEdges, struct {
+				cInst *ssa.Instruction
+				dst   regalloc.VReg
+			}{cInst: srcInstr, dst: dstReg})
+		} else {
+			srcReg := c.VRegOf(src)
+			// Even when the src=dst, insert the move so that we can keep such registers keep-alive.
+			c.varEdges = append(c.varEdges, [2]regalloc.VReg{srcReg, dstReg})
+			c.varEdgeTypes = append(c.varEdgeTypes, src.Type())
+		}
+	}
+
+	// Check if there's an overlap among the dsts and srcs in varEdges.
+	c.vRegIDs = c.vRegIDs[:0]
+	for _, edge := range c.varEdges {
+		src := edge[0].ID()
+		if int(src) >= len(c.vRegSet) {
+			c.vRegSet = append(c.vRegSet, make([]bool, src+1)...)
+		}
+		c.vRegSet[src] = true
+		c.vRegIDs = append(c.vRegIDs, src)
+	}
+	separated := true
+	for _, edge := range c.varEdges {
+		dst := edge[1].ID()
+		if int(dst) >= len(c.vRegSet) {
+			c.vRegSet = append(c.vRegSet, make([]bool, dst+1)...)
+		} else {
+			if c.vRegSet[dst] {
+				separated = false
+				break
+			}
+		}
+	}
+	for _, id := range c.vRegIDs {
+		c.vRegSet[id] = false // reset for the next use.
+	}
+
+	if separated {
+		// If there's no overlap, we can simply move the source to destination.
+		for i, edge := range c.varEdges {
+			src, dst := edge[0], edge[1]
+			c.mach.InsertMove(dst, src, c.varEdgeTypes[i])
+		}
+	} else {
+		// Otherwise, we allocate a temporary registers and move the source to the temporary register,
+		//
+		// First move all of them to temporary registers.
+		c.tempRegs = c.tempRegs[:0]
+		for i, edge := range c.varEdges {
+			src := edge[0]
+			typ := c.varEdgeTypes[i]
+			temp := c.AllocateVReg(typ)
+			c.tempRegs = append(c.tempRegs, temp)
+			c.mach.InsertMove(temp, src, typ)
+		}
+		// Then move the temporary registers to the destination.
+		for i, edge := range c.varEdges {
+			temp := c.tempRegs[i]
+			dst := edge[1]
+			c.mach.InsertMove(dst, temp, c.varEdgeTypes[i])
+		}
+	}
+
+	// Finally, move the constants.
+	for _, edge := range c.constEdges {
+		cInst, dst := edge.cInst, edge.dst
+		c.mach.InsertLoadConstantBlockArg(cInst, dst)
+	}
+}
@@ -0,0 +1,33 @@
+package backend
+
+import "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+
+// GoFunctionCallRequiredStackSize returns the size of the stack required for the Go function call.
+// argBegin is the index of the first argument in the signature which is not either execution context or module context.
+func GoFunctionCallRequiredStackSize(sig *ssa.Signature, argBegin int) (ret, retUnaligned int64) {
+	var paramNeededInBytes, resultNeededInBytes int64
+	for _, p := range sig.Params[argBegin:] {
+		s := int64(p.Size())
+		if s < 8 {
+			s = 8 // We use uint64 for all basic types, except SIMD v128.
+		}
+		paramNeededInBytes += s
+	}
+	for _, r := range sig.Results {
+		s := int64(r.Size())
+		if s < 8 {
+			s = 8 // We use uint64 for all basic types, except SIMD v128.
+		}
+		resultNeededInBytes += s
+	}
+
+	if paramNeededInBytes > resultNeededInBytes {
+		ret = paramNeededInBytes
+	} else {
+		ret = resultNeededInBytes
+	}
+	retUnaligned = ret
+	// Align to 16 bytes.
+	ret = (ret + 15) &^ 15
+	return
+}
@@ -0,0 +1,186 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// For the details of the ABI, see:
+// https://github.com/golang/go/blob/go1.24.0/src/cmd/compile/abi-internal.md#amd64-architecture
+
+var (
+	intArgResultRegs   = []regalloc.RealReg{rax, rbx, rcx, rdi, rsi, r8, r9, r10, r11}
+	floatArgResultRegs = []regalloc.RealReg{xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7}
+)
+
+var regInfo = &regalloc.RegisterInfo{
+	AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
+		regalloc.RegTypeInt: {
+			rax, rcx, rdx, rbx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15,
+		},
+		regalloc.RegTypeFloat: {
+			xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+		},
+	},
+	CalleeSavedRegisters: regalloc.NewRegSet(
+		rdx, r12, r13, r14, r15,
+		xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+	),
+	CallerSavedRegisters: regalloc.NewRegSet(
+		rax, rcx, rbx, rsi, rdi, r8, r9, r10, r11,
+		xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
+	),
+	RealRegToVReg: []regalloc.VReg{
+		rax: raxVReg, rcx: rcxVReg, rdx: rdxVReg, rbx: rbxVReg, rsp: rspVReg, rbp: rbpVReg, rsi: rsiVReg, rdi: rdiVReg,
+		r8: r8VReg, r9: r9VReg, r10: r10VReg, r11: r11VReg, r12: r12VReg, r13: r13VReg, r14: r14VReg, r15: r15VReg,
+		xmm0: xmm0VReg, xmm1: xmm1VReg, xmm2: xmm2VReg, xmm3: xmm3VReg, xmm4: xmm4VReg, xmm5: xmm5VReg, xmm6: xmm6VReg,
+		xmm7: xmm7VReg, xmm8: xmm8VReg, xmm9: xmm9VReg, xmm10: xmm10VReg, xmm11: xmm11VReg, xmm12: xmm12VReg,
+		xmm13: xmm13VReg, xmm14: xmm14VReg, xmm15: xmm15VReg,
+	},
+	RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
+	RealRegType: func(r regalloc.RealReg) regalloc.RegType {
+		if r < xmm0 {
+			return regalloc.RegTypeInt
+		}
+		return regalloc.RegTypeFloat
+	},
+}
+
+// ArgsResultsRegs implements backend.Machine.
+func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
+	return intArgResultRegs, floatArgResultRegs
+}
+
+// LowerParams implements backend.Machine.
+func (m *machine) LowerParams(args []ssa.Value) {
+	a := m.currentABI
+
+	for i, ssaArg := range args {
+		if !ssaArg.Valid() {
+			continue
+		}
+		reg := m.c.VRegOf(ssaArg)
+		arg := &a.Args[i]
+		if arg.Kind == backend.ABIArgKindReg {
+			m.InsertMove(reg, arg.Reg, arg.Type)
+		} else {
+			//
+			//            (high address)
+			//          +-----------------+
+			//          |     .......     |
+			//          |      ret Y      |
+			//          |     .......     |
+			//          |      ret 0      |
+			//          |      arg X      |
+			//          |     .......     |
+			//          |      arg 1      |
+			//          |      arg 0      |
+			//          |   ReturnAddress |
+			//          |    Caller_RBP   |
+			//          +-----------------+ <-- RBP
+			//          |   ...........   |
+			//          |   clobbered  M  |
+			//          |   ............  |
+			//          |   clobbered  0  |
+			//          |   spill slot N  |
+			//          |   ...........   |
+			//          |   spill slot 0  |
+			//   RSP--> +-----------------+
+			//             (low address)
+
+			// Load the value from the arg stack slot above the current RBP.
+			load := m.allocateInstr()
+			mem := newOperandMem(m.newAmodeImmRBPReg(uint32(arg.Offset + 16)))
+			switch arg.Type {
+			case ssa.TypeI32:
+				load.asMovzxRmR(extModeLQ, mem, reg)
+			case ssa.TypeI64:
+				load.asMov64MR(mem, reg)
+			case ssa.TypeF32:
+				load.asXmmUnaryRmR(sseOpcodeMovss, mem, reg)
+			case ssa.TypeF64:
+				load.asXmmUnaryRmR(sseOpcodeMovsd, mem, reg)
+			case ssa.TypeV128:
+				load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, reg)
+			default:
+				panic("BUG")
+			}
+			m.insert(load)
+		}
+	}
+}
+
+// LowerReturns implements backend.Machine.
+func (m *machine) LowerReturns(rets []ssa.Value) {
+	// Load the XMM registers first as it might need a temporary register to inline
+	// constant return.
+	a := m.currentABI
+	for i, ret := range rets {
+		r := &a.Rets[i]
+		if !r.Type.IsInt() {
+			m.LowerReturn(ret, r)
+		}
+	}
+	// Then load the GPR registers.
+	for i, ret := range rets {
+		r := &a.Rets[i]
+		if r.Type.IsInt() {
+			m.LowerReturn(ret, r)
+		}
+	}
+}
+
+func (m *machine) LowerReturn(ret ssa.Value, r *backend.ABIArg) {
+	reg := m.c.VRegOf(ret)
+	if def := m.c.ValueDefinition(ret); def.IsFromInstr() {
+		// Constant instructions are inlined.
+		if inst := def.Instr; inst.Constant() {
+			m.insertLoadConstant(inst, reg)
+		}
+	}
+	if r.Kind == backend.ABIArgKindReg {
+		m.InsertMove(r.Reg, reg, ret.Type())
+	} else {
+		//
+		//            (high address)
+		//          +-----------------+
+		//          |     .......     |
+		//          |      ret Y      |
+		//          |     .......     |
+		//          |      ret 0      |
+		//          |      arg X      |
+		//          |     .......     |
+		//          |      arg 1      |
+		//          |      arg 0      |
+		//          |   ReturnAddress |
+		//          |    Caller_RBP   |
+		//          +-----------------+ <-- RBP
+		//          |   ...........   |
+		//          |   clobbered  M  |
+		//          |   ............  |
+		//          |   clobbered  0  |
+		//          |   spill slot N  |
+		//          |   ...........   |
+		//          |   spill slot 0  |
+		//   RSP--> +-----------------+
+		//             (low address)
+
+		// Store the value to the return stack slot above the current RBP.
+		store := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmRBPReg(uint32(m.currentABI.ArgStackSize + 16 + r.Offset)))
+		switch r.Type {
+		case ssa.TypeI32:
+			store.asMovRM(reg, mem, 4)
+		case ssa.TypeI64:
+			store.asMovRM(reg, mem, 8)
+		case ssa.TypeF32:
+			store.asXmmMovRM(sseOpcodeMovss, reg, mem)
+		case ssa.TypeF64:
+			store.asXmmMovRM(sseOpcodeMovsd, reg, mem)
+		case ssa.TypeV128:
+			store.asXmmMovRM(sseOpcodeMovdqu, reg, mem)
+		}
+		m.insert(store)
+	}
+}
@@ -0,0 +1,9 @@
+package amd64
+
+// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below.
+// This implements wazevo.entrypoint, and see the comments there for detail.
+func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr)
+
+// afterGoFunctionCallEntrypoint enters the machine code after growing the stack.
+// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail.
+func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
@@ -0,0 +1,29 @@
+#include "funcdata.h"
+#include "textflag.h"
+
+// entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr
+TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48
+	MOVQ preambleExecutable+0(FP), R11
+	MOVQ functionExectuable+8(FP), R14
+	MOVQ executionContextPtr+16(FP), AX       // First argument is passed in AX.
+	MOVQ moduleContextPtr+24(FP), BX          // Second argument is passed in BX.
+	MOVQ paramResultSlicePtr+32(FP), R12
+	MOVQ goAllocatedStackSlicePtr+40(FP), R13
+	JMP  R11
+
+// afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
+TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32
+	MOVQ executable+0(FP), CX
+	MOVQ executionContextPtr+8(FP), AX // First argument is passed in AX.
+
+	// Save the stack pointer and frame pointer.
+	MOVQ BP, 16(AX) // 16 == ExecutionContextOffsetOriginalFramePointer
+	MOVQ SP, 24(AX) // 24 == ExecutionContextOffsetOriginalStackPointer
+
+	// Then set the stack pointer and frame pointer to the values we got from the Go runtime.
+	MOVQ framePointer+24(FP), BP
+
+	// WARNING: do not update SP before BP, because the Go translates (FP) as (SP) + 8.
+	MOVQ stackPointer+16(FP), SP
+
+	JMP CX
@@ -0,0 +1,248 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+var (
+	executionContextPtrReg = raxVReg
+
+	// Followings are callee saved registers. They can be used freely in the entry preamble
+	// since the preamble is called via Go assembly function which has stack-based ABI.
+
+	// savedExecutionContextPtr also must be a callee-saved reg so that they can be used in the prologue and epilogue.
+	savedExecutionContextPtr = rdxVReg
+	// paramResultSlicePtr must match with entrypoint function in abi_entry_amd64.s.
+	paramResultSlicePtr = r12VReg
+	// goAllocatedStackPtr must match with entrypoint function in abi_entry_amd64.s.
+	goAllocatedStackPtr = r13VReg
+	// functionExecutable must match with entrypoint function in abi_entry_amd64.s.
+	functionExecutable = r14VReg
+	tmpIntReg          = r15VReg
+	tmpXmmReg          = xmm15VReg
+)
+
+// CompileEntryPreamble implements backend.Machine.
+func (m *machine) CompileEntryPreamble(sig *ssa.Signature) []byte {
+	root := m.compileEntryPreamble(sig)
+	m.encodeWithoutSSA(root)
+	buf := m.c.Buf()
+	return buf
+}
+
+func (m *machine) compileEntryPreamble(sig *ssa.Signature) *instruction {
+	abi := backend.FunctionABI{}
+	abi.Init(sig, intArgResultRegs, floatArgResultRegs)
+
+	root := m.allocateNop()
+
+	//// ----------------------------------- prologue ----------------------------------- ////
+
+	// First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well.
+	// 		mov %executionContextPtrReg, %savedExecutionContextPtr
+	cur := m.move64(executionContextPtrReg, savedExecutionContextPtr, root)
+
+	// Next is to save the original RBP and RSP into the execution context.
+	cur = m.saveOriginalRSPRBP(cur)
+
+	// Now set the RSP to the Go-allocated stack pointer.
+	// 		mov %goAllocatedStackPtr, %rsp
+	cur = m.move64(goAllocatedStackPtr, rspVReg, cur)
+
+	if stackSlotSize := abi.AlignedArgResultStackSlotSize(); stackSlotSize > 0 {
+		// Allocate stack slots for the arguments and return values.
+		// 		sub $stackSlotSize, %rsp
+		spDec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(stackSlotSize)), rspVReg, true)
+		cur = linkInstr(cur, spDec)
+	}
+
+	var offset uint32
+	for i := range abi.Args {
+		if i < 2 {
+			// module context ptr and execution context ptr are passed in rax and rbx by the Go assembly function.
+			continue
+		}
+		arg := &abi.Args[i]
+		cur = m.goEntryPreamblePassArg(cur, paramResultSlicePtr, offset, arg)
+		if arg.Type == ssa.TypeV128 {
+			offset += 16
+		} else {
+			offset += 8
+		}
+	}
+
+	// Zero out RBP so that the unwind/stack growth code can correctly detect the end of the stack.
+	zerosRbp := m.allocateInstr().asAluRmiR(aluRmiROpcodeXor, newOperandReg(rbpVReg), rbpVReg, true)
+	cur = linkInstr(cur, zerosRbp)
+
+	// Now ready to call the real function. Note that at this point stack pointer is already set to the Go-allocated,
+	// which is aligned to 16 bytes.
+	call := m.allocateInstr().asCallIndirect(newOperandReg(functionExecutable), &abi)
+	cur = linkInstr(cur, call)
+
+	//// ----------------------------------- epilogue ----------------------------------- ////
+
+	// Read the results from regs and the stack, and set them correctly into the paramResultSlicePtr.
+	offset = 0
+	for i := range abi.Rets {
+		r := &abi.Rets[i]
+		cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, offset, r, uint32(abi.ArgStackSize))
+		if r.Type == ssa.TypeV128 {
+			offset += 16
+		} else {
+			offset += 8
+		}
+	}
+
+	// Finally, restore the original RBP and RSP.
+	cur = m.restoreOriginalRSPRBP(cur)
+
+	ret := m.allocateInstr().asRet()
+	linkInstr(cur, ret)
+	return root
+}
+
+// saveOriginalRSPRBP saves the original RSP and RBP into the execution context.
+func (m *machine) saveOriginalRSPRBP(cur *instruction) *instruction {
+	// 		mov %rbp, wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg)
+	// 		mov %rsp, wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg)
+	cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, true, cur)
+	cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, true, cur)
+	return cur
+}
+
+// restoreOriginalRSPRBP restores the original RSP and RBP from the execution context.
+func (m *machine) restoreOriginalRSPRBP(cur *instruction) *instruction {
+	// 		mov wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg), %rbp
+	// 		mov wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg), %rsp
+	cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, false, cur)
+	cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, false, cur)
+	return cur
+}
+
+func (m *machine) move64(src, dst regalloc.VReg, prev *instruction) *instruction {
+	mov := m.allocateInstr().asMovRR(src, dst, true)
+	return linkInstr(prev, mov)
+}
+
+func (m *machine) loadOrStore64AtExecutionCtx(execCtx regalloc.VReg, offset wazevoapi.Offset, r regalloc.VReg, store bool, prev *instruction) *instruction {
+	mem := newOperandMem(m.newAmodeImmReg(offset.U32(), execCtx))
+	instr := m.allocateInstr()
+	if store {
+		instr.asMovRM(r, mem, 8)
+	} else {
+		instr.asMov64MR(mem, r)
+	}
+	return linkInstr(prev, instr)
+}
+
+// This is for debugging.
+func (m *machine) linkUD2(cur *instruction) *instruction { //nolint
+	return linkInstr(cur, m.allocateInstr().asUD2())
+}
+
+func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, offsetInParamSlice uint32, arg *backend.ABIArg) *instruction {
+	var dst regalloc.VReg
+	argTyp := arg.Type
+	if arg.Kind == backend.ABIArgKindStack {
+		// Caller saved registers ca
+		switch argTyp {
+		case ssa.TypeI32, ssa.TypeI64:
+			dst = tmpIntReg
+		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+			dst = tmpXmmReg
+		default:
+			panic("BUG")
+		}
+	} else {
+		dst = arg.Reg
+	}
+
+	load := m.allocateInstr()
+	a := newOperandMem(m.newAmodeImmReg(offsetInParamSlice, paramSlicePtr))
+	switch arg.Type {
+	case ssa.TypeI32:
+		load.asMovzxRmR(extModeLQ, a, dst)
+	case ssa.TypeI64:
+		load.asMov64MR(a, dst)
+	case ssa.TypeF32:
+		load.asXmmUnaryRmR(sseOpcodeMovss, a, dst)
+	case ssa.TypeF64:
+		load.asXmmUnaryRmR(sseOpcodeMovsd, a, dst)
+	case ssa.TypeV128:
+		load.asXmmUnaryRmR(sseOpcodeMovdqu, a, dst)
+	}
+
+	cur = linkInstr(cur, load)
+	if arg.Kind == backend.ABIArgKindStack {
+		// Store back to the stack.
+		store := m.allocateInstr()
+		a := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset), rspVReg))
+		switch arg.Type {
+		case ssa.TypeI32:
+			store.asMovRM(dst, a, 4)
+		case ssa.TypeI64:
+			store.asMovRM(dst, a, 8)
+		case ssa.TypeF32:
+			store.asXmmMovRM(sseOpcodeMovss, dst, a)
+		case ssa.TypeF64:
+			store.asXmmMovRM(sseOpcodeMovsd, dst, a)
+		case ssa.TypeV128:
+			store.asXmmMovRM(sseOpcodeMovdqu, dst, a)
+		}
+		cur = linkInstr(cur, store)
+	}
+	return cur
+}
+
+func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, offsetInResultSlice uint32, result *backend.ABIArg, resultStackSlotBeginOffset uint32) *instruction {
+	var r regalloc.VReg
+	if result.Kind == backend.ABIArgKindStack {
+		// Load the value to the temporary.
+		load := m.allocateInstr()
+		offset := resultStackSlotBeginOffset + uint32(result.Offset)
+		a := newOperandMem(m.newAmodeImmReg(offset, rspVReg))
+		switch result.Type {
+		case ssa.TypeI32:
+			r = tmpIntReg
+			load.asMovzxRmR(extModeLQ, a, r)
+		case ssa.TypeI64:
+			r = tmpIntReg
+			load.asMov64MR(a, r)
+		case ssa.TypeF32:
+			r = tmpXmmReg
+			load.asXmmUnaryRmR(sseOpcodeMovss, a, r)
+		case ssa.TypeF64:
+			r = tmpXmmReg
+			load.asXmmUnaryRmR(sseOpcodeMovsd, a, r)
+		case ssa.TypeV128:
+			r = tmpXmmReg
+			load.asXmmUnaryRmR(sseOpcodeMovdqu, a, r)
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, load)
+	} else {
+		r = result.Reg
+	}
+
+	store := m.allocateInstr()
+	a := newOperandMem(m.newAmodeImmReg(offsetInResultSlice, resultSlicePtr))
+	switch result.Type {
+	case ssa.TypeI32:
+		store.asMovRM(r, a, 4)
+	case ssa.TypeI64:
+		store.asMovRM(r, a, 8)
+	case ssa.TypeF32:
+		store.asXmmMovRM(sseOpcodeMovss, r, a)
+	case ssa.TypeF64:
+		store.asXmmMovRM(sseOpcodeMovsd, r, a)
+	case ssa.TypeV128:
+		store.asXmmMovRM(sseOpcodeMovdqu, r, a)
+	}
+
+	return linkInstr(cur, store)
+}
@@ -0,0 +1,440 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+var calleeSavedVRegs = []regalloc.VReg{
+	rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
+	xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
+}
+
+// CompileGoFunctionTrampoline implements backend.Machine.
+func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
+	argBegin := 1 // Skips exec context by default.
+	if needModuleContextPtr {
+		argBegin++
+	}
+
+	abi := &backend.FunctionABI{}
+	abi.Init(sig, intArgResultRegs, floatArgResultRegs)
+	m.currentABI = abi
+
+	cur := m.allocateNop()
+	m.rootInstr = cur
+
+	// Execution context is always the first argument.
+	execCtrPtr := raxVReg
+
+	// First we update RBP and RSP just like the normal prologue.
+	//
+	//                   (high address)                     (high address)
+	//       RBP ----> +-----------------+                +-----------------+
+	//                 |     .......     |                |     .......     |
+	//                 |      ret Y      |                |      ret Y      |
+	//                 |     .......     |                |     .......     |
+	//                 |      ret 0      |                |      ret 0      |
+	//                 |      arg X      |                |      arg X      |
+	//                 |     .......     |     ====>      |     .......     |
+	//                 |      arg 1      |                |      arg 1      |
+	//                 |      arg 0      |                |      arg 0      |
+	//                 |   Return Addr   |                |   Return Addr   |
+	//       RSP ----> +-----------------+                |    Caller_RBP   |
+	//                    (low address)                   +-----------------+ <----- RSP, RBP
+	//
+	cur = m.setupRBPRSP(cur)
+
+	goSliceSizeAligned, goSliceSizeAlignedUnaligned := backend.GoFunctionCallRequiredStackSize(sig, argBegin)
+	cur = m.insertStackBoundsCheck(goSliceSizeAligned+8 /* size of the Go slice */, cur)
+
+	// Save the callee saved registers.
+	cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
+
+	if needModuleContextPtr {
+		moduleCtrPtr := rbxVReg // Module context is always the second argument.
+		mem := m.newAmodeImmReg(
+			wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.U32(),
+			execCtrPtr)
+		store := m.allocateInstr().asMovRM(moduleCtrPtr, newOperandMem(mem), 8)
+		cur = linkInstr(cur, store)
+	}
+
+	// Now let's advance the RSP to the stack slot for the arguments.
+	//
+	//                (high address)                     (high address)
+	//              +-----------------+               +-----------------+
+	//              |     .......     |               |     .......     |
+	//              |      ret Y      |               |      ret Y      |
+	//              |     .......     |               |     .......     |
+	//              |      ret 0      |               |      ret 0      |
+	//              |      arg X      |               |      arg X      |
+	//              |     .......     |   =======>    |     .......     |
+	//              |      arg 1      |               |      arg 1      |
+	//              |      arg 0      |               |      arg 0      |
+	//              |   Return Addr   |               |   Return Addr   |
+	//              |    Caller_RBP   |               |    Caller_RBP   |
+	//  RBP,RSP --> +-----------------+               +-----------------+ <----- RBP
+	//                 (low address)                  |  arg[N]/ret[M]  |
+	//                                                |    ..........   |
+	//                                                |  arg[1]/ret[1]  |
+	//                                                |  arg[0]/ret[0]  |
+	//                                                +-----------------+ <----- RSP
+	//                                                   (low address)
+	//
+	// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
+	// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
+	// the arguments/return values to/from Go function.
+	cur = m.addRSP(-int32(goSliceSizeAligned), cur)
+
+	// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
+	var offsetInGoSlice int32
+	for i := range abi.Args[argBegin:] {
+		arg := &abi.Args[argBegin+i]
+		var v regalloc.VReg
+		if arg.Kind == backend.ABIArgKindReg {
+			v = arg.Reg
+		} else {
+			// We have saved callee saved registers, so we can use them.
+			if arg.Type.IsInt() {
+				v = r15VReg
+			} else {
+				v = xmm15VReg
+			}
+			mem := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
+			load := m.allocateInstr()
+			switch arg.Type {
+			case ssa.TypeI32:
+				load.asMovzxRmR(extModeLQ, mem, v)
+			case ssa.TypeI64:
+				load.asMov64MR(mem, v)
+			case ssa.TypeF32:
+				load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
+			case ssa.TypeF64:
+				load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
+			case ssa.TypeV128:
+				load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
+			default:
+				panic("BUG")
+			}
+			cur = linkInstr(cur, load)
+		}
+
+		store := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
+		switch arg.Type {
+		case ssa.TypeI32:
+			store.asMovRM(v, mem, 4)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeI64:
+			store.asMovRM(v, mem, 8)
+			offsetInGoSlice += 8
+		case ssa.TypeF32:
+			store.asXmmMovRM(sseOpcodeMovss, v, mem)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeF64:
+			store.asXmmMovRM(sseOpcodeMovsd, v, mem)
+			offsetInGoSlice += 8
+		case ssa.TypeV128:
+			store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+			offsetInGoSlice += 16
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, store)
+	}
+
+	// Finally we push the size of the slice to the stack so the stack looks like:
+	//
+	//          (high address)
+	//       +-----------------+
+	//       |     .......     |
+	//       |      ret Y      |
+	//       |     .......     |
+	//       |      ret 0      |
+	//       |      arg X      |
+	//       |     .......     |
+	//       |      arg 1      |
+	//       |      arg 0      |
+	//       |   Return Addr   |
+	//       |    Caller_RBP   |
+	//       +-----------------+ <----- RBP
+	//       |  arg[N]/ret[M]  |
+	//       |    ..........   |
+	//       |  arg[1]/ret[1]  |
+	//       |  arg[0]/ret[0]  |
+	//       |    slice size   |
+	//       +-----------------+ <----- RSP
+	//         (low address)
+	//
+	// 		push $sliceSize
+	cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandImm32(uint32(goSliceSizeAlignedUnaligned))))
+
+	// Load the exitCode to the register.
+	exitCodeReg := r12VReg // Callee saved which is already saved.
+	cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(exitCode), false))
+
+	saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
+	cur = linkInstr(cur, setExitCode)
+	cur = linkInstr(cur, saveRsp)
+	cur = linkInstr(cur, saveRbp)
+
+	// Ready to exit the execution.
+	cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
+
+	// We don't need the slice size anymore, so pop it.
+	cur = m.addRSP(8, cur)
+
+	// Ready to set up the results.
+	offsetInGoSlice = 0
+	// To avoid overwriting with the execution context pointer by the result, we need to track the offset,
+	// and defer the restoration of the result to the end of this function.
+	var argOverlapWithExecCtxOffset int32 = -1
+	for i := range abi.Rets {
+		r := &abi.Rets[i]
+		var v regalloc.VReg
+		isRegResult := r.Kind == backend.ABIArgKindReg
+		if isRegResult {
+			v = r.Reg
+			if v.RealReg() == execCtrPtr.RealReg() {
+				argOverlapWithExecCtxOffset = offsetInGoSlice
+				offsetInGoSlice += 8 // always uint64 rep.
+				continue
+			}
+		} else {
+			if r.Type.IsInt() {
+				v = r15VReg
+			} else {
+				v = xmm15VReg
+			}
+		}
+
+		load := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
+		switch r.Type {
+		case ssa.TypeI32:
+			load.asMovzxRmR(extModeLQ, mem, v)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeI64:
+			load.asMov64MR(mem, v)
+			offsetInGoSlice += 8
+		case ssa.TypeF32:
+			load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeF64:
+			load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
+			offsetInGoSlice += 8
+		case ssa.TypeV128:
+			load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
+			offsetInGoSlice += 16
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, load)
+
+		if !isRegResult {
+			// We need to store it back to the result slot above rbp.
+			store := m.allocateInstr()
+			mem := newOperandMem(m.newAmodeImmReg(uint32(abi.ArgStackSize+r.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
+			switch r.Type {
+			case ssa.TypeI32:
+				store.asMovRM(v, mem, 4)
+			case ssa.TypeI64:
+				store.asMovRM(v, mem, 8)
+			case ssa.TypeF32:
+				store.asXmmMovRM(sseOpcodeMovss, v, mem)
+			case ssa.TypeF64:
+				store.asXmmMovRM(sseOpcodeMovsd, v, mem)
+			case ssa.TypeV128:
+				store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+			default:
+				panic("BUG")
+			}
+			cur = linkInstr(cur, store)
+		}
+	}
+
+	// Before return, we need to restore the callee saved registers.
+	cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
+
+	if argOverlapWithExecCtxOffset >= 0 {
+		// At this point execCtt is not used anymore, so we can finally store the
+		// result to the register which overlaps with the execution context pointer.
+		mem := newOperandMem(m.newAmodeImmReg(uint32(argOverlapWithExecCtxOffset), rspVReg))
+		load := m.allocateInstr().asMov64MR(mem, execCtrPtr)
+		cur = linkInstr(cur, load)
+	}
+
+	// Finally ready to return.
+	cur = m.revertRBPRSP(cur)
+	linkInstr(cur, m.allocateInstr().asRet())
+
+	m.encodeWithoutSSA(m.rootInstr)
+	return m.c.Buf()
+}
+
+func (m *machine) saveRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
+	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
+	for _, v := range regs {
+		store := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
+		switch v.RegType() {
+		case regalloc.RegTypeInt:
+			store.asMovRM(v, mem, 8)
+		case regalloc.RegTypeFloat:
+			store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, store)
+		offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
+	}
+	return cur
+}
+
+func (m *machine) restoreRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
+	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
+	for _, v := range regs {
+		load := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
+		switch v.RegType() {
+		case regalloc.RegTypeInt:
+			load.asMov64MR(mem, v)
+		case regalloc.RegTypeFloat:
+			load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, load)
+		offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
+	}
+	return cur
+}
+
+func (m *machine) storeReturnAddressAndExit(cur *instruction, execCtx regalloc.VReg) *instruction {
+	readRip := m.allocateInstr()
+	cur = linkInstr(cur, readRip)
+
+	ripReg := r12VReg // Callee saved which is already saved.
+	saveRip := m.allocateInstr().asMovRM(
+		ripReg,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetGoCallReturnAddress.U32(), execCtx)),
+		8,
+	)
+	cur = linkInstr(cur, saveRip)
+
+	exit := m.allocateExitSeq(execCtx)
+	cur = linkInstr(cur, exit)
+
+	nop, l := m.allocateBrTarget()
+	cur = linkInstr(cur, nop)
+	readRip.asLEA(newOperandLabel(l), ripReg)
+	return cur
+}
+
+// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
+// stack space left. Basically this is the all allocatable registers except for RSP and RBP, and RAX which contains the
+// execution context pointer. ExecCtx pointer is always the first argument so we don't need to save it.
+var stackGrowSaveVRegs = []regalloc.VReg{
+	rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
+	rcxVReg, rbxVReg, rsiVReg, rdiVReg, r8VReg, r9VReg, r10VReg, r11VReg,
+	xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
+	xmm0VReg, xmm1VReg, xmm2VReg, xmm3VReg, xmm4VReg, xmm5VReg, xmm6VReg, xmm7VReg,
+}
+
+// CompileStackGrowCallSequence implements backend.Machine.
+func (m *machine) CompileStackGrowCallSequence() []byte {
+	cur := m.allocateNop()
+	m.rootInstr = cur
+
+	cur = m.setupRBPRSP(cur)
+
+	// Execution context is always the first argument.
+	execCtrPtr := raxVReg
+
+	// Save the callee saved and argument registers.
+	cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
+
+	// Load the exitCode to the register.
+	exitCodeReg := r12VReg // Already saved.
+	cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(wazevoapi.ExitCodeGrowStack), false))
+
+	saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
+	cur = linkInstr(cur, setExitCode)
+	cur = linkInstr(cur, saveRsp)
+	cur = linkInstr(cur, saveRbp)
+
+	// Ready to exit the execution.
+	cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
+
+	// After the exit, restore the saved registers.
+	cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
+
+	// Finally ready to return.
+	cur = m.revertRBPRSP(cur)
+	linkInstr(cur, m.allocateInstr().asRet())
+
+	m.encodeWithoutSSA(m.rootInstr)
+	return m.c.Buf()
+}
+
+// insertStackBoundsCheck will insert the instructions after `cur` to check the
+// stack bounds, and if there's no sufficient spaces required for the function,
+// exit the execution and try growing it in Go world.
+func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction {
+	//		add $requiredStackSize, %rsp ;; Temporarily update the sp.
+	// 		cmp ExecutionContextOffsetStackBottomPtr(%rax), %rsp ;; Compare the stack bottom and the sp.
+	// 		ja .ok
+	//		sub $requiredStackSize, %rsp ;; Reverse the temporary update.
+	//      pushq r15 ;; save the temporary.
+	//		mov $requiredStackSize, %r15
+	//		mov %15, ExecutionContextOffsetStackGrowRequiredSize(%rax) ;; Set the required size in the execution context.
+	//      popq r15 ;; restore the temporary.
+	//		callq *ExecutionContextOffsetStackGrowCallTrampolineAddress(%rax) ;; Call the Go function to grow the stack.
+	//		jmp .cont
+	// .ok:
+	//		sub $requiredStackSize, %rsp ;; Reverse the temporary update.
+	// .cont:
+	cur = m.addRSP(-int32(requiredStackSize), cur)
+	cur = linkInstr(cur, m.allocateInstr().asCmpRmiR(true,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackBottomPtr.U32(), raxVReg)),
+		rspVReg, true))
+
+	ja := m.allocateInstr()
+	cur = linkInstr(cur, ja)
+
+	cur = m.addRSP(int32(requiredStackSize), cur)
+
+	// Save the temporary.
+
+	cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r15VReg)))
+	// Load the required size to the temporary.
+	cur = linkInstr(cur, m.allocateInstr().asImm(r15VReg, uint64(requiredStackSize), true))
+	// Set the required size in the execution context.
+	cur = linkInstr(cur, m.allocateInstr().asMovRM(r15VReg,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.U32(), raxVReg)), 8))
+	// Restore the temporary.
+	cur = linkInstr(cur, m.allocateInstr().asPop64(r15VReg))
+	// Call the Go function to grow the stack.
+	cur = linkInstr(cur, m.allocateInstr().asCallIndirect(newOperandMem(m.newAmodeImmReg(
+		wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.U32(), raxVReg)), nil))
+	// Jump to the continuation.
+	jmpToCont := m.allocateInstr()
+	cur = linkInstr(cur, jmpToCont)
+
+	// .ok:
+	okInstr, ok := m.allocateBrTarget()
+	cur = linkInstr(cur, okInstr)
+	ja.asJmpIf(condNBE, newOperandLabel(ok))
+	// On the ok path, we only need to reverse the temporary update.
+	cur = m.addRSP(int32(requiredStackSize), cur)
+
+	// .cont:
+	contInstr, cont := m.allocateBrTarget()
+	cur = linkInstr(cur, contInstr)
+	jmpToCont.asJmp(newOperandLabel(cont))
+
+	return cur
+}
@@ -0,0 +1,168 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type cond byte
+
+const (
+	// condO represents (overflow) condition.
+	condO cond = iota
+	// condNO represents (no overflow) condition.
+	condNO
+	// condB represents (< unsigned) condition.
+	condB
+	// condNB represents (>= unsigned) condition.
+	condNB
+	// condZ represents (zero) condition.
+	condZ
+	// condNZ represents (not-zero) condition.
+	condNZ
+	// condBE represents (<= unsigned) condition.
+	condBE
+	// condNBE represents (> unsigned) condition.
+	condNBE
+	// condS represents (negative) condition.
+	condS
+	// condNS represents (not-negative) condition.
+	condNS
+	// condP represents (parity) condition.
+	condP
+	// condNP represents (not parity) condition.
+	condNP
+	// condL represents (< signed) condition.
+	condL
+	// condNL represents (>= signed) condition.
+	condNL
+	// condLE represents (<= signed) condition.
+	condLE
+	// condNLE represents (> signed) condition.
+	condNLE
+
+	condInvalid
+)
+
+func (c cond) String() string {
+	switch c {
+	case condO:
+		return "o"
+	case condNO:
+		return "no"
+	case condB:
+		return "b"
+	case condNB:
+		return "nb"
+	case condZ:
+		return "z"
+	case condNZ:
+		return "nz"
+	case condBE:
+		return "be"
+	case condNBE:
+		return "nbe"
+	case condS:
+		return "s"
+	case condNS:
+		return "ns"
+	case condL:
+		return "l"
+	case condNL:
+		return "nl"
+	case condLE:
+		return "le"
+	case condNLE:
+		return "nle"
+	case condP:
+		return "p"
+	case condNP:
+		return "np"
+	default:
+		panic("unreachable")
+	}
+}
+
+func condFromSSAIntCmpCond(origin ssa.IntegerCmpCond) cond {
+	switch origin {
+	case ssa.IntegerCmpCondEqual:
+		return condZ
+	case ssa.IntegerCmpCondNotEqual:
+		return condNZ
+	case ssa.IntegerCmpCondSignedLessThan:
+		return condL
+	case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
+		return condNL
+	case ssa.IntegerCmpCondSignedGreaterThan:
+		return condNLE
+	case ssa.IntegerCmpCondSignedLessThanOrEqual:
+		return condLE
+	case ssa.IntegerCmpCondUnsignedLessThan:
+		return condB
+	case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual:
+		return condNB
+	case ssa.IntegerCmpCondUnsignedGreaterThan:
+		return condNBE
+	case ssa.IntegerCmpCondUnsignedLessThanOrEqual:
+		return condBE
+	default:
+		panic("unreachable")
+	}
+}
+
+func condFromSSAFloatCmpCond(origin ssa.FloatCmpCond) cond {
+	switch origin {
+	case ssa.FloatCmpCondGreaterThanOrEqual:
+		return condNB
+	case ssa.FloatCmpCondGreaterThan:
+		return condNBE
+	case ssa.FloatCmpCondEqual, ssa.FloatCmpCondNotEqual, ssa.FloatCmpCondLessThan, ssa.FloatCmpCondLessThanOrEqual:
+		panic(fmt.Sprintf("cond %s must be treated as a special case", origin))
+	default:
+		panic("unreachable")
+	}
+}
+
+func (c cond) encoding() byte {
+	return byte(c)
+}
+
+func (c cond) invert() cond {
+	switch c {
+	case condO:
+		return condNO
+	case condNO:
+		return condO
+	case condB:
+		return condNB
+	case condNB:
+		return condB
+	case condZ:
+		return condNZ
+	case condNZ:
+		return condZ
+	case condBE:
+		return condNBE
+	case condNBE:
+		return condBE
+	case condS:
+		return condNS
+	case condNS:
+		return condS
+	case condP:
+		return condNP
+	case condNP:
+		return condP
+	case condL:
+		return condNL
+	case condNL:
+		return condL
+	case condLE:
+		return condNLE
+	case condNLE:
+		return condLE
+	default:
+		panic("unreachable")
+	}
+}
@@ -0,0 +1,35 @@
+package amd64
+
+// extMode represents the mode of extension in movzx/movsx.
+type extMode byte
+
+const (
+	// extModeBL represents Byte -> Longword.
+	extModeBL extMode = iota
+	// extModeBQ represents Byte -> Quadword.
+	extModeBQ
+	// extModeWL represents Word -> Longword.
+	extModeWL
+	// extModeWQ represents Word -> Quadword.
+	extModeWQ
+	// extModeLQ represents Longword -> Quadword.
+	extModeLQ
+)
+
+// String implements fmt.Stringer.
+func (e extMode) String() string {
+	switch e {
+	case extModeBL:
+		return "bl"
+	case extModeBQ:
+		return "bq"
+	case extModeWL:
+		return "wl"
+	case extModeWQ:
+		return "wq"
+	case extModeLQ:
+		return "lq"
+	default:
+		panic("BUG: invalid ext mode")
+	}
+}
@@ -0,0 +1,71 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
+func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
+	val := instr.Return()
+	valType := val.Type()
+
+	vr = m.c.AllocateVReg(valType)
+	m.insertLoadConstant(instr, vr)
+	return
+}
+
+// InsertLoadConstantBlockArg implements backend.Machine.
+func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
+	m.insertLoadConstant(instr, vr)
+}
+
+func (m *machine) insertLoadConstant(instr *ssa.Instruction, vr regalloc.VReg) {
+	val := instr.Return()
+	valType := val.Type()
+	v := instr.ConstantVal()
+
+	bits := valType.Bits()
+	if bits < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
+		v = v & ((1 << valType.Bits()) - 1)
+	}
+
+	switch valType {
+	case ssa.TypeF32, ssa.TypeF64:
+		m.lowerFconst(vr, v, bits == 64)
+	case ssa.TypeI32, ssa.TypeI64:
+		m.lowerIconst(vr, v, bits == 64)
+	default:
+		panic("BUG")
+	}
+}
+
+func (m *machine) lowerFconst(dst regalloc.VReg, c uint64, _64 bool) {
+	if c == 0 {
+		xor := m.allocateInstr().asZeros(dst)
+		m.insert(xor)
+	} else {
+		var tmpType ssa.Type
+		if _64 {
+			tmpType = ssa.TypeI64
+		} else {
+			tmpType = ssa.TypeI32
+		}
+		tmpInt := m.c.AllocateVReg(tmpType)
+		loadToGP := m.allocateInstr().asImm(tmpInt, c, _64)
+		m.insert(loadToGP)
+
+		movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpInt), dst, _64)
+		m.insert(movToXmm)
+	}
+}
+
+func (m *machine) lowerIconst(dst regalloc.VReg, c uint64, _64 bool) {
+	i := m.allocateInstr()
+	if c == 0 {
+		i.asZeros(dst)
+	} else {
+		i.asImm(dst, c, _64)
+	}
+	m.insert(i)
+}
@@ -0,0 +1,187 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+var addendsMatchOpcodes = [...]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst, ssa.OpcodeIshl}
+
+type addend struct {
+	r     regalloc.VReg
+	off   int64
+	shift byte
+}
+
+func (a addend) String() string {
+	return fmt.Sprintf("addend{r=%s, off=%d, shift=%d}", a.r, a.off, a.shift)
+}
+
+// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
+func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32) (am *amode) {
+	def := m.c.ValueDefinition(ptr)
+
+	if offsetBase&0x80000000 != 0 {
+		// Special casing the huge base offset whose MSB is set. In x64, the immediate is always
+		// sign-extended, but our IR semantics requires the offset base is always unsigned.
+		// Note that this should be extremely rare or even this shouldn't hit in the real application,
+		// therefore we don't need to optimize this case in my opinion.
+
+		a := m.lowerAddend(def)
+		off64 := a.off + int64(offsetBase)
+		offsetBaseReg := m.c.AllocateVReg(ssa.TypeI64)
+		m.lowerIconst(offsetBaseReg, uint64(off64), true)
+		if a.r != regalloc.VRegInvalid {
+			return m.newAmodeRegRegShift(0, offsetBaseReg, a.r, a.shift)
+		} else {
+			return m.newAmodeImmReg(0, offsetBaseReg)
+		}
+	}
+
+	if op := m.c.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op == ssa.OpcodeIadd {
+		add := def.Instr
+		x, y := add.Arg2()
+		xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
+		ax := m.lowerAddend(xDef)
+		ay := m.lowerAddend(yDef)
+		add.MarkLowered()
+		return m.lowerAddendsToAmode(ax, ay, offsetBase)
+	} else {
+		// If it is not an Iadd, then we lower the one addend.
+		a := m.lowerAddend(def)
+		// off is always 0 if r is valid.
+		if a.r != regalloc.VRegInvalid {
+			if a.shift != 0 {
+				tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+				m.lowerIconst(tmpReg, 0, true)
+				return m.newAmodeRegRegShift(offsetBase, tmpReg, a.r, a.shift)
+			}
+			return m.newAmodeImmReg(offsetBase, a.r)
+		} else {
+			off64 := a.off + int64(offsetBase)
+			tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+			m.lowerIconst(tmpReg, uint64(off64), true)
+			return m.newAmodeImmReg(0, tmpReg)
+		}
+	}
+}
+
+func (m *machine) lowerAddendsToAmode(x, y addend, offBase uint32) *amode {
+	if x.r != regalloc.VRegInvalid && x.off != 0 || y.r != regalloc.VRegInvalid && y.off != 0 {
+		panic("invalid input")
+	}
+
+	u64 := uint64(x.off+y.off) + uint64(offBase)
+	if u64 != 0 {
+		if _, ok := asImm32(u64, false); !ok {
+			tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+			m.lowerIconst(tmpReg, u64, true)
+			// Blank u64 as it has been already lowered.
+			u64 = 0
+
+			if x.r == regalloc.VRegInvalid {
+				x.r = tmpReg
+			} else if y.r == regalloc.VRegInvalid {
+				y.r = tmpReg
+			} else {
+				// We already know that either rx or ry is invalid,
+				// so we overwrite it with the temporary register.
+				panic("BUG")
+			}
+		}
+	}
+
+	u32 := uint32(u64)
+	switch {
+	// We assume rx, ry are valid iff offx, offy are 0.
+	case x.r != regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
+		switch {
+		case x.shift != 0 && y.shift != 0:
+			// Cannot absorb two shifted registers, must lower one to a shift instruction.
+			shifted := m.allocateInstr()
+			shifted.asShiftR(shiftROpShiftLeft, newOperandImm32(uint32(x.shift)), x.r, true)
+			m.insert(shifted)
+
+			return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
+		case x.shift != 0 && y.shift == 0:
+			// Swap base and index.
+			x, y = y, x
+			fallthrough
+		default:
+			return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
+		}
+	case x.r == regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
+		x, y = y, x
+		fallthrough
+	case x.r != regalloc.VRegInvalid && y.r == regalloc.VRegInvalid:
+		if x.shift != 0 {
+			zero := m.c.AllocateVReg(ssa.TypeI64)
+			m.lowerIconst(zero, 0, true)
+			return m.newAmodeRegRegShift(u32, zero, x.r, x.shift)
+		}
+		return m.newAmodeImmReg(u32, x.r)
+	default: // Both are invalid: use the offset.
+		tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+		m.lowerIconst(tmpReg, u64, true)
+		return m.newAmodeImmReg(0, tmpReg)
+	}
+}
+
+func (m *machine) lowerAddend(x backend.SSAValueDefinition) addend {
+	if !x.IsFromInstr() {
+		return addend{m.c.VRegOf(x.V), 0, 0}
+	}
+	// Ensure the addend is not referenced in multiple places; we will discard nested Iadds.
+	op := m.c.MatchInstrOneOf(x, addendsMatchOpcodes[:])
+	if op != ssa.OpcodeInvalid && op != ssa.OpcodeIadd {
+		return m.lowerAddendFromInstr(x.Instr)
+	}
+	p := m.getOperand_Reg(x)
+	return addend{p.reg(), 0, 0}
+}
+
+// lowerAddendFromInstr takes an instruction returns a Vreg and an offset that can be used in an address mode.
+// The Vreg is regalloc.VRegInvalid if the addend cannot be lowered to a register.
+// The offset is 0 if the addend can be lowered to a register.
+func (m *machine) lowerAddendFromInstr(instr *ssa.Instruction) addend {
+	instr.MarkLowered()
+	switch op := instr.Opcode(); op {
+	case ssa.OpcodeIconst:
+		u64 := instr.ConstantVal()
+		if instr.Return().Type().Bits() == 32 {
+			return addend{regalloc.VRegInvalid, int64(int32(u64)), 0} // sign-extend.
+		} else {
+			return addend{regalloc.VRegInvalid, int64(u64), 0}
+		}
+	case ssa.OpcodeUExtend, ssa.OpcodeSExtend:
+		input := instr.Arg()
+		inputDef := m.c.ValueDefinition(input)
+		if input.Type().Bits() != 32 {
+			panic("BUG: invalid input type " + input.Type().String())
+		}
+		constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant()
+		switch {
+		case constInst && op == ssa.OpcodeSExtend:
+			return addend{regalloc.VRegInvalid, int64(uint32(inputDef.Instr.ConstantVal())), 0}
+		case constInst && op == ssa.OpcodeUExtend:
+			return addend{regalloc.VRegInvalid, int64(int32(inputDef.Instr.ConstantVal())), 0} // sign-extend!
+		default:
+			r := m.getOperand_Reg(inputDef)
+			return addend{r.reg(), 0, 0}
+		}
+	case ssa.OpcodeIshl:
+		// If the addend is a shift, we can only handle it if the shift amount is a constant.
+		x, amount := instr.Arg2()
+		amountDef := m.c.ValueDefinition(amount)
+		if amountDef.IsFromInstr() && amountDef.Instr.Constant() && amountDef.Instr.ConstantVal() <= 3 {
+			r := m.getOperand_Reg(m.c.ValueDefinition(x))
+			return addend{r.reg(), 0, uint8(amountDef.Instr.ConstantVal())}
+		}
+		r := m.getOperand_Reg(m.c.ValueDefinition(x))
+		return addend{r.reg(), 0, 0}
+	}
+	panic("BUG: invalid opcode")
+}
@@ -0,0 +1,334 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+)
+
+// PostRegAlloc implements backend.Machine.
+func (m *machine) PostRegAlloc() {
+	m.setupPrologue()
+	m.postRegAlloc()
+}
+
+func (m *machine) setupPrologue() {
+	cur := m.rootInstr
+	prevInitInst := cur.next
+
+	// At this point, we have the stack layout as follows:
+	//
+	//                   (high address)
+	//                 +-----------------+ <----- RBP (somewhere in the middle of the stack)
+	//                 |     .......     |
+	//                 |      ret Y      |
+	//                 |     .......     |
+	//                 |      ret 0      |
+	//                 |      arg X      |
+	//                 |     .......     |
+	//                 |      arg 1      |
+	//                 |      arg 0      |
+	//                 |   Return Addr   |
+	//       RSP ----> +-----------------+
+	//                    (low address)
+
+	// First, we push the RBP, and update the RBP to the current RSP.
+	//
+	//                   (high address)                     (high address)
+	//       RBP ----> +-----------------+                +-----------------+
+	//                 |     .......     |                |     .......     |
+	//                 |      ret Y      |                |      ret Y      |
+	//                 |     .......     |                |     .......     |
+	//                 |      ret 0      |                |      ret 0      |
+	//                 |      arg X      |                |      arg X      |
+	//                 |     .......     |     ====>      |     .......     |
+	//                 |      arg 1      |                |      arg 1      |
+	//                 |      arg 0      |                |      arg 0      |
+	//                 |   Return Addr   |                |   Return Addr   |
+	//       RSP ----> +-----------------+                |    Caller_RBP   |
+	//                    (low address)                   +-----------------+ <----- RSP, RBP
+	//
+	cur = m.setupRBPRSP(cur)
+
+	if !m.stackBoundsCheckDisabled {
+		cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur)
+	}
+
+	//
+	//            (high address)
+	//          +-----------------+                  +-----------------+
+	//          |     .......     |                  |     .......     |
+	//          |      ret Y      |                  |      ret Y      |
+	//          |     .......     |                  |     .......     |
+	//          |      ret 0      |                  |      ret 0      |
+	//          |      arg X      |                  |      arg X      |
+	//          |     .......     |                  |     .......     |
+	//          |      arg 1      |                  |      arg 1      |
+	//          |      arg 0      |                  |      arg 0      |
+	//          |      xxxxx      |                  |      xxxxx      |
+	//          |   Return Addr   |                  |   Return Addr   |
+	//          |    Caller_RBP   |      ====>       |    Caller_RBP   |
+	// RBP,RSP->+-----------------+                  +-----------------+ <----- RBP
+	//             (low address)                     |   clobbered M   |
+	//                                               |   clobbered 1   |
+	//                                               |   ...........   |
+	//                                               |   clobbered 0   |
+	//                                               +-----------------+ <----- RSP
+	//
+	if regs := m.clobberedRegs; len(regs) > 0 {
+		for i := range regs {
+			r := regs[len(regs)-1-i] // Reverse order.
+			if r.RegType() == regalloc.RegTypeInt {
+				cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r)))
+			} else {
+				// Push the XMM register is not supported by the PUSH instruction.
+				cur = m.addRSP(-16, cur)
+				push := m.allocateInstr().asXmmMovRM(
+					sseOpcodeMovdqu, r, newOperandMem(m.newAmodeImmReg(0, rspVReg)),
+				)
+				cur = linkInstr(cur, push)
+			}
+		}
+	}
+
+	if size := m.spillSlotSize; size > 0 {
+		// Simply decrease the RSP to allocate the spill slots.
+		// 		sub $size, %rsp
+		cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(size)), rspVReg, true))
+
+		// At this point, we have the stack layout as follows:
+		//
+		//            (high address)
+		//          +-----------------+
+		//          |     .......     |
+		//          |      ret Y      |
+		//          |     .......     |
+		//          |      ret 0      |
+		//          |      arg X      |
+		//          |     .......     |
+		//          |      arg 1      |
+		//          |      arg 0      |
+		//          |   ReturnAddress |
+		//          |   Caller_RBP    |
+		//          +-----------------+ <--- RBP
+		//          |    clobbered M  |
+		//          |   ............  |
+		//          |    clobbered 1  |
+		//          |    clobbered 0  |
+		//          |   spill slot N  |
+		//          |   ............  |
+		//          |   spill slot 0  |
+		//          +-----------------+ <--- RSP
+		//             (low address)
+	}
+
+	linkInstr(cur, prevInitInst)
+}
+
+// postRegAlloc does multiple things while walking through the instructions:
+// 1. Inserts the epilogue code.
+// 2. Removes the redundant copy instruction.
+// 3. Inserts the dec/inc RSP instruction right before/after the call instruction.
+// 4. Lowering that is supposed to be done after regalloc.
+func (m *machine) postRegAlloc() {
+	for cur := m.rootInstr; cur != nil; cur = cur.next {
+		switch k := cur.kind; k {
+		case ret:
+			m.setupEpilogueAfter(cur.prev)
+			continue
+		case fcvtToSintSequence, fcvtToUintSequence:
+			m.pendingInstructions = m.pendingInstructions[:0]
+			if k == fcvtToSintSequence {
+				m.lowerFcvtToSintSequenceAfterRegalloc(cur)
+			} else {
+				m.lowerFcvtToUintSequenceAfterRegalloc(cur)
+			}
+			prev := cur.prev
+			next := cur.next
+			cur := prev
+			for _, instr := range m.pendingInstructions {
+				cur = linkInstr(cur, instr)
+			}
+			linkInstr(cur, next)
+			continue
+		case xmmCMov:
+			m.pendingInstructions = m.pendingInstructions[:0]
+			m.lowerXmmCmovAfterRegAlloc(cur)
+			prev := cur.prev
+			next := cur.next
+			cur := prev
+			for _, instr := range m.pendingInstructions {
+				cur = linkInstr(cur, instr)
+			}
+			linkInstr(cur, next)
+			continue
+		case idivRemSequence:
+			m.pendingInstructions = m.pendingInstructions[:0]
+			m.lowerIDivRemSequenceAfterRegAlloc(cur)
+			prev := cur.prev
+			next := cur.next
+			cur := prev
+			for _, instr := range m.pendingInstructions {
+				cur = linkInstr(cur, instr)
+			}
+			linkInstr(cur, next)
+			continue
+		case call, callIndirect:
+			// At this point, reg alloc is done, therefore we can safely insert dec/inc RPS instruction
+			// right before/after the call instruction. If this is done before reg alloc, the stack slot
+			// can point to the wrong location and therefore results in a wrong value.
+			call := cur
+			next := call.next
+			_, _, _, _, size := backend.ABIInfoFromUint64(call.u2)
+			if size > 0 {
+				dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true)
+				linkInstr(call.prev, dec)
+				linkInstr(dec, call)
+				inc := m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(size), rspVReg, true)
+				linkInstr(call, inc)
+				linkInstr(inc, next)
+			}
+			continue
+		case tailCall, tailCallIndirect:
+			// At this point, reg alloc is done, therefore we can safely insert dec RPS instruction
+			// right before the tail call (jump) instruction. If this is done before reg alloc, the stack slot
+			// can point to the wrong location and therefore results in a wrong value.
+			tailCall := cur
+			_, _, _, _, size := backend.ABIInfoFromUint64(tailCall.u2)
+			if size > 0 {
+				dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true)
+				linkInstr(tailCall.prev, dec)
+				linkInstr(dec, tailCall)
+			}
+			// In a tail call, we insert the epilogue before the jump instruction.
+			m.setupEpilogueAfter(tailCall.prev)
+			// If this has been encoded as a proper tail call, we can remove the trailing instructions
+			// For details, see internal/engine/RATIONALE.md
+			m.removeUntilRet(cur.next)
+			continue
+		}
+
+		// Removes the redundant copy instruction.
+		if cur.IsCopy() && cur.op1.reg().RealReg() == cur.op2.reg().RealReg() {
+			prev, next := cur.prev, cur.next
+			// Remove the copy instruction.
+			prev.next = next
+			if next != nil {
+				next.prev = prev
+			}
+		}
+	}
+}
+
+func (m *machine) setupEpilogueAfter(cur *instruction) {
+	prevNext := cur.next
+
+	// At this point, we have the stack layout as follows:
+	//
+	//            (high address)
+	//          +-----------------+
+	//          |     .......     |
+	//          |      ret Y      |
+	//          |     .......     |
+	//          |      ret 0      |
+	//          |      arg X      |
+	//          |     .......     |
+	//          |      arg 1      |
+	//          |      arg 0      |
+	//          |   ReturnAddress |
+	//          |   Caller_RBP    |
+	//          +-----------------+ <--- RBP
+	//          |    clobbered M  |
+	//          |   ............  |
+	//          |    clobbered 1  |
+	//          |    clobbered 0  |
+	//          |   spill slot N  |
+	//          |   ............  |
+	//          |   spill slot 0  |
+	//          +-----------------+ <--- RSP
+	//             (low address)
+
+	if size := m.spillSlotSize; size > 0 {
+		// Simply increase the RSP to free the spill slots.
+		// 		add $size, %rsp
+		cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(uint32(size)), rspVReg, true))
+	}
+
+	//
+	//             (high address)
+	//            +-----------------+                     +-----------------+
+	//            |     .......     |                     |     .......     |
+	//            |      ret Y      |                     |      ret Y      |
+	//            |     .......     |                     |     .......     |
+	//            |      ret 0      |                     |      ret 0      |
+	//            |      arg X      |                     |      arg X      |
+	//            |     .......     |                     |     .......     |
+	//            |      arg 1      |                     |      arg 1      |
+	//            |      arg 0      |                     |      arg 0      |
+	//            |   ReturnAddress |                     |   ReturnAddress |
+	//            |    Caller_RBP   |                     |    Caller_RBP   |
+	//   RBP ---> +-----------------+      ========>      +-----------------+ <---- RSP, RBP
+	//            |    clobbered M  |
+	//            |   ............  |
+	//            |    clobbered 1  |
+	//            |    clobbered 0  |
+	//   RSP ---> +-----------------+
+	//               (low address)
+	//
+	if regs := m.clobberedRegs; len(regs) > 0 {
+		for _, r := range regs {
+			if r.RegType() == regalloc.RegTypeInt {
+				cur = linkInstr(cur, m.allocateInstr().asPop64(r))
+			} else {
+				// Pop the XMM register is not supported by the POP instruction.
+				pop := m.allocateInstr().asXmmUnaryRmR(
+					sseOpcodeMovdqu, newOperandMem(m.newAmodeImmReg(0, rspVReg)), r,
+				)
+				cur = linkInstr(cur, pop)
+				cur = m.addRSP(16, cur)
+			}
+		}
+	}
+
+	// Now roll back the RSP to RBP, and pop the caller's RBP.
+	cur = m.revertRBPRSP(cur)
+
+	linkInstr(cur, prevNext)
+}
+
+// removeUntilRet removes the instructions starting from `cur` until the first `ret` instruction.
+func (m *machine) removeUntilRet(cur *instruction) {
+	for ; cur != nil; cur = cur.next {
+		prev, next := cur.prev, cur.next
+		prev.next = next
+		if next != nil {
+			next.prev = prev
+		}
+		if cur.kind == ret {
+			return
+		}
+	}
+}
+
+func (m *machine) addRSP(offset int32, cur *instruction) *instruction {
+	if offset == 0 {
+		return cur
+	}
+	opcode := aluRmiROpcodeAdd
+	if offset < 0 {
+		opcode = aluRmiROpcodeSub
+		offset = -offset
+	}
+	return linkInstr(cur, m.allocateInstr().asAluRmiR(opcode, newOperandImm32(uint32(offset)), rspVReg, true))
+}
+
+func (m *machine) setupRBPRSP(cur *instruction) *instruction {
+	cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(rbpVReg)))
+	cur = linkInstr(cur, m.allocateInstr().asMovRR(rspVReg, rbpVReg, true))
+	return cur
+}
+
+func (m *machine) revertRBPRSP(cur *instruction) *instruction {
+	cur = linkInstr(cur, m.allocateInstr().asMovRR(rbpVReg, rspVReg, true))
+	cur = linkInstr(cur, m.allocateInstr().asPop64(rbpVReg))
+	return cur
+}
@@ -0,0 +1,352 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// regAllocFn implements regalloc.Function.
+type regAllocFn struct {
+	ssaB                   ssa.Builder
+	m                      *machine
+	loopNestingForestRoots []ssa.BasicBlock
+	blockIter              int
+}
+
+// PostOrderBlockIteratorBegin implements regalloc.Function.
+func (f *regAllocFn) PostOrderBlockIteratorBegin() *labelPosition {
+	f.blockIter = len(f.m.orderedSSABlockLabelPos) - 1
+	return f.PostOrderBlockIteratorNext()
+}
+
+// PostOrderBlockIteratorNext implements regalloc.Function.
+func (f *regAllocFn) PostOrderBlockIteratorNext() *labelPosition {
+	if f.blockIter < 0 {
+		return nil
+	}
+	b := f.m.orderedSSABlockLabelPos[f.blockIter]
+	f.blockIter--
+	return b
+}
+
+// ReversePostOrderBlockIteratorBegin implements regalloc.Function.
+func (f *regAllocFn) ReversePostOrderBlockIteratorBegin() *labelPosition {
+	f.blockIter = 0
+	return f.ReversePostOrderBlockIteratorNext()
+}
+
+// ReversePostOrderBlockIteratorNext implements regalloc.Function.
+func (f *regAllocFn) ReversePostOrderBlockIteratorNext() *labelPosition {
+	if f.blockIter >= len(f.m.orderedSSABlockLabelPos) {
+		return nil
+	}
+	b := f.m.orderedSSABlockLabelPos[f.blockIter]
+	f.blockIter++
+	return b
+}
+
+// ClobberedRegisters implements regalloc.Function.
+func (f *regAllocFn) ClobberedRegisters(regs []regalloc.VReg) {
+	f.m.clobberedRegs = append(f.m.clobberedRegs[:0], regs...)
+}
+
+// LoopNestingForestRoots implements regalloc.Function.
+func (f *regAllocFn) LoopNestingForestRoots() int {
+	f.loopNestingForestRoots = f.ssaB.LoopNestingForestRoots()
+	return len(f.loopNestingForestRoots)
+}
+
+// LoopNestingForestRoot implements regalloc.Function.
+func (f *regAllocFn) LoopNestingForestRoot(i int) *labelPosition {
+	root := f.loopNestingForestRoots[i]
+	pos := f.m.getOrAllocateSSABlockLabelPosition(root)
+	return pos
+}
+
+// LowestCommonAncestor implements regalloc.Function.
+func (f *regAllocFn) LowestCommonAncestor(blk1, blk2 *labelPosition) *labelPosition {
+	sb := f.ssaB.LowestCommonAncestor(blk1.sb, blk2.sb)
+	pos := f.m.getOrAllocateSSABlockLabelPosition(sb)
+	return pos
+}
+
+// Idom implements regalloc.Function.
+func (f *regAllocFn) Idom(blk *labelPosition) *labelPosition {
+	sb := f.ssaB.Idom(blk.sb)
+	pos := f.m.getOrAllocateSSABlockLabelPosition(sb)
+	return pos
+}
+
+// SwapBefore implements regalloc.Function.
+func (f *regAllocFn) SwapBefore(x1, x2, tmp regalloc.VReg, instr *instruction) {
+	f.m.swap(instr.prev, x1, x2, tmp)
+}
+
+// StoreRegisterBefore implements regalloc.Function.
+func (f *regAllocFn) StoreRegisterBefore(v regalloc.VReg, instr *instruction) {
+	m := f.m
+	m.insertStoreRegisterAt(v, instr, false)
+}
+
+// StoreRegisterAfter implements regalloc.Function.
+func (f *regAllocFn) StoreRegisterAfter(v regalloc.VReg, instr *instruction) {
+	m := f.m
+	m.insertStoreRegisterAt(v, instr, true)
+}
+
+// ReloadRegisterBefore implements regalloc.Function.
+func (f *regAllocFn) ReloadRegisterBefore(v regalloc.VReg, instr *instruction) {
+	m := f.m
+	m.insertReloadRegisterAt(v, instr, false)
+}
+
+// ReloadRegisterAfter implements regalloc.Function.
+func (f *regAllocFn) ReloadRegisterAfter(v regalloc.VReg, instr *instruction) {
+	m := f.m
+	m.insertReloadRegisterAt(v, instr, true)
+}
+
+// InsertMoveBefore implements regalloc.Function.
+func (f *regAllocFn) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
+	f.m.insertMoveBefore(dst, src, instr)
+}
+
+// LoopNestingForestChild implements regalloc.Function.
+func (f *regAllocFn) LoopNestingForestChild(pos *labelPosition, i int) *labelPosition {
+	childSB := pos.sb.LoopNestingForestChildren()[i]
+	return f.m.getOrAllocateSSABlockLabelPosition(childSB)
+}
+
+// Succ implements regalloc.Block.
+func (f *regAllocFn) Succ(pos *labelPosition, i int) *labelPosition {
+	succSB := pos.sb.Succ(i)
+	if succSB.ReturnBlock() {
+		return nil
+	}
+	return f.m.getOrAllocateSSABlockLabelPosition(succSB)
+}
+
+// Pred implements regalloc.Block.
+func (f *regAllocFn) Pred(pos *labelPosition, i int) *labelPosition {
+	predSB := pos.sb.Pred(i)
+	return f.m.getOrAllocateSSABlockLabelPosition(predSB)
+}
+
+// BlockParams implements regalloc.Function.
+func (f *regAllocFn) BlockParams(pos *labelPosition, regs *[]regalloc.VReg) []regalloc.VReg {
+	c := f.m.c
+	*regs = (*regs)[:0]
+	for i := 0; i < pos.sb.Params(); i++ {
+		v := c.VRegOf(pos.sb.Param(i))
+		*regs = append(*regs, v)
+	}
+	return *regs
+}
+
+// ID implements regalloc.Block.
+func (pos *labelPosition) ID() int32 {
+	return int32(pos.sb.ID())
+}
+
+// InstrIteratorBegin implements regalloc.Block.
+func (pos *labelPosition) InstrIteratorBegin() *instruction {
+	ret := pos.begin
+	pos.cur = ret
+	return ret
+}
+
+// InstrIteratorNext implements regalloc.Block.
+func (pos *labelPosition) InstrIteratorNext() *instruction {
+	for {
+		if pos.cur == pos.end {
+			return nil
+		}
+		instr := pos.cur.next
+		pos.cur = instr
+		if instr == nil {
+			return nil
+		} else if instr.addedBeforeRegAlloc {
+			// Only concerned about the instruction added before regalloc.
+			return instr
+		}
+	}
+}
+
+// InstrRevIteratorBegin implements regalloc.Block.
+func (pos *labelPosition) InstrRevIteratorBegin() *instruction {
+	pos.cur = pos.end
+	return pos.cur
+}
+
+// InstrRevIteratorNext implements regalloc.Block.
+func (pos *labelPosition) InstrRevIteratorNext() *instruction {
+	for {
+		if pos.cur == pos.begin {
+			return nil
+		}
+		instr := pos.cur.prev
+		pos.cur = instr
+		if instr == nil {
+			return nil
+		} else if instr.addedBeforeRegAlloc {
+			// Only concerned about the instruction added before regalloc.
+			return instr
+		}
+	}
+}
+
+// FirstInstr implements regalloc.Block.
+func (pos *labelPosition) FirstInstr() *instruction { return pos.begin }
+
+// LastInstrForInsertion implements regalloc.Block.
+func (pos *labelPosition) LastInstrForInsertion() *instruction {
+	return lastInstrForInsertion(pos.begin, pos.end)
+}
+
+// Preds implements regalloc.Block.
+func (pos *labelPosition) Preds() int { return pos.sb.Preds() }
+
+// Entry implements regalloc.Block.
+func (pos *labelPosition) Entry() bool { return pos.sb.EntryBlock() }
+
+// Succs implements regalloc.Block.
+func (pos *labelPosition) Succs() int { return pos.sb.Succs() }
+
+// LoopHeader implements regalloc.Block.
+func (pos *labelPosition) LoopHeader() bool { return pos.sb.LoopHeader() }
+
+// LoopNestingForestChildren implements regalloc.Block.
+func (pos *labelPosition) LoopNestingForestChildren() int {
+	return len(pos.sb.LoopNestingForestChildren())
+}
+
+func (m *machine) insertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
+	typ := src.RegType()
+	if typ != dst.RegType() {
+		panic("BUG: src and dst must have the same type")
+	}
+
+	mov := m.allocateInstr()
+	if typ == regalloc.RegTypeInt {
+		mov.asMovRR(src, dst, true)
+	} else {
+		mov.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandReg(src), dst)
+	}
+
+	cur := instr.prev
+	prevNext := cur.next
+	cur = linkInstr(cur, mov)
+	linkInstr(cur, prevNext)
+}
+
+func (m *machine) insertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
+	if !v.IsRealReg() {
+		panic("BUG: VReg must be backed by real reg to be stored")
+	}
+
+	typ := m.c.TypeOf(v)
+
+	var prevNext, cur *instruction
+	if after {
+		cur, prevNext = instr, instr.next
+	} else {
+		cur, prevNext = instr.prev, instr
+	}
+
+	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
+	store := m.allocateInstr()
+	mem := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
+	switch typ {
+	case ssa.TypeI32:
+		store.asMovRM(v, mem, 4)
+	case ssa.TypeI64:
+		store.asMovRM(v, mem, 8)
+	case ssa.TypeF32:
+		store.asXmmMovRM(sseOpcodeMovss, v, mem)
+	case ssa.TypeF64:
+		store.asXmmMovRM(sseOpcodeMovsd, v, mem)
+	case ssa.TypeV128:
+		store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+	}
+
+	cur = linkInstr(cur, store)
+	return linkInstr(cur, prevNext)
+}
+
+func (m *machine) insertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
+	if !v.IsRealReg() {
+		panic("BUG: VReg must be backed by real reg to be stored")
+	}
+
+	typ := m.c.TypeOf(v)
+	var prevNext, cur *instruction
+	if after {
+		cur, prevNext = instr, instr.next
+	} else {
+		cur, prevNext = instr.prev, instr
+	}
+
+	// Load the value to the temporary.
+	load := m.allocateInstr()
+	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
+	a := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
+	switch typ {
+	case ssa.TypeI32:
+		load.asMovzxRmR(extModeLQ, a, v)
+	case ssa.TypeI64:
+		load.asMov64MR(a, v)
+	case ssa.TypeF32:
+		load.asXmmUnaryRmR(sseOpcodeMovss, a, v)
+	case ssa.TypeF64:
+		load.asXmmUnaryRmR(sseOpcodeMovsd, a, v)
+	case ssa.TypeV128:
+		load.asXmmUnaryRmR(sseOpcodeMovdqu, a, v)
+	default:
+		panic("BUG")
+	}
+
+	cur = linkInstr(cur, load)
+	return linkInstr(cur, prevNext)
+}
+
+func (m *machine) swap(cur *instruction, x1, x2, tmp regalloc.VReg) {
+	if x1.RegType() == regalloc.RegTypeInt {
+		prevNext := cur.next
+		xc := m.allocateInstr().asXCHG(x1, newOperandReg(x2), 8)
+		cur = linkInstr(cur, xc)
+		linkInstr(cur, prevNext)
+	} else {
+		if tmp.Valid() {
+			prevNext := cur.next
+			m.insertMoveBefore(tmp, x1, prevNext)
+			m.insertMoveBefore(x1, x2, prevNext)
+			m.insertMoveBefore(x2, tmp, prevNext)
+		} else {
+			prevNext := cur.next
+			r2 := x2.RealReg()
+			// Temporarily spill x1 to stack.
+			cur = m.insertStoreRegisterAt(x1, cur, true).prev
+			// Then move x2 to x1.
+			cur = linkInstr(cur, m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqa, newOperandReg(x2), x1))
+			linkInstr(cur, prevNext)
+			// Then reload the original value on x1 from stack to r2.
+			m.insertReloadRegisterAt(x1.SetRealReg(r2), cur, true)
+		}
+	}
+}
+
+func lastInstrForInsertion(begin, end *instruction) *instruction {
+	cur := end
+	for cur.kind == nop0 {
+		cur = cur.prev
+		if cur == begin {
+			return end
+		}
+	}
+	switch cur.kind {
+	case jmp:
+		return cur
+	default:
+		return end
+	}
+}
@@ -0,0 +1,992 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+var swizzleMask = [16]byte{
+	0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
+	0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
+}
+
+func (m *machine) lowerSwizzle(x, y ssa.Value, ret ssa.Value) {
+	masklabel := m.getOrAllocateConstLabel(&m.constSwizzleMaskConstIndex, swizzleMask[:])
+
+	// Load mask to maskReg.
+	maskReg := m.c.AllocateVReg(ssa.TypeV128)
+	loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(masklabel)), maskReg)
+	m.insert(loadMask)
+
+	// Copy x and y to tmp registers.
+	xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	tmpDst := m.copyToTmp(xx.reg())
+	yy := m.getOperand_Reg(m.c.ValueDefinition(y))
+	tmpX := m.copyToTmp(yy.reg())
+
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddusb, newOperandReg(maskReg), tmpX))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpX), tmpDst))
+
+	// Copy the result to the destination register.
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerInsertLane(x, y ssa.Value, index byte, ret ssa.Value, lane ssa.VecLane) {
+	// Copy x to tmp.
+	tmpDst := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, m.getOperand_Mem_Reg(m.c.ValueDefinition(x)), tmpDst))
+
+	yy := m.getOperand_Reg(m.c.ValueDefinition(y))
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, index, yy, tmpDst))
+	case ssa.VecLaneI16x8:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, index, yy, tmpDst))
+	case ssa.VecLaneI32x4:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, index, yy, tmpDst))
+	case ssa.VecLaneI64x2:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, index, yy, tmpDst))
+	case ssa.VecLaneF32x4:
+		// In INSERTPS instruction, the destination index is encoded at 4 and 5 bits of the argument.
+		// See https://www.felixcloutier.com/x86/insertps
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeInsertps, index<<4, yy, tmpDst))
+	case ssa.VecLaneF64x2:
+		if index == 0 {
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, yy, tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMovlhps, yy, tmpDst))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerExtractLane(x ssa.Value, index byte, signed bool, ret ssa.Value, lane ssa.VecLane) {
+	// Pextr variants are used to extract a lane from a vector register.
+	xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+
+	tmpDst := m.c.AllocateVReg(ret.Type())
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrb, index, xx, tmpDst))
+		if signed {
+			m.insert(m.allocateInstr().asMovsxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asMovzxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
+		}
+	case ssa.VecLaneI16x8:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrw, index, xx, tmpDst))
+		if signed {
+			m.insert(m.allocateInstr().asMovsxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asMovzxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
+		}
+	case ssa.VecLaneI32x4:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrd, index, xx, tmpDst))
+	case ssa.VecLaneI64x2:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, index, xx, tmpDst))
+	case ssa.VecLaneF32x4:
+		if index == 0 {
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovss, xx, tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, index, xx, tmpDst))
+		}
+	case ssa.VecLaneF64x2:
+		if index == 0 {
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, xx, tmpDst))
+		} else {
+			m.copyTo(xx.reg(), tmpDst)
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0b00_00_11_10, newOperandReg(tmpDst), tmpDst))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+var sqmulRoundSat = [16]byte{
+	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+}
+
+func (m *machine) lowerSqmulRoundSat(x, y, ret ssa.Value) {
+	// See https://github.com/WebAssembly/simd/pull/365 for the following logic.
+	maskLabel := m.getOrAllocateConstLabel(&m.constSqmulRoundSatIndex, sqmulRoundSat[:])
+
+	tmp := m.c.AllocateVReg(ssa.TypeV128)
+	loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp)
+	m.insert(loadMask)
+
+	xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	tmpX := m.copyToTmp(xx.reg())
+
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmulhrsw, yy, tmpX))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqw, newOperandReg(tmpX), tmp))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmpX))
+
+	m.copyTo(tmpX, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVUshr(x, y, ret ssa.Value, lane ssa.VecLane) {
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.lowerVUshri8x16(x, y, ret)
+	case ssa.VecLaneI16x8, ssa.VecLaneI32x4, ssa.VecLaneI64x2:
+		m.lowerShr(x, y, ret, lane, false)
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+}
+
+// i8x16LogicalSHRMaskTable is necessary for emulating non-existent packed bytes logical right shifts on amd64.
+// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
+var i8x16LogicalSHRMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
+	0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // for 1 shift
+	0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, // for 2 shift
+	0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, // for 3 shift
+	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, // for 4 shift
+	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // for 5 shift
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, // for 6 shift
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // for 7 shift
+}
+
+func (m *machine) lowerVUshri8x16(x, y, ret ssa.Value) {
+	tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(tmpGpReg, 0x7, false)
+	// Take the modulo 8 of the shift amount.
+	shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, tmpGpReg, false))
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	vecTmp := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), vecTmp, false))
+	m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrlw, newOperandReg(vecTmp), xx))
+
+	maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16LogicalSHRMaskTableIndex, i8x16LogicalSHRMaskTable[:])
+	base := m.c.AllocateVReg(ssa.TypeI64)
+	lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
+	m.insert(lea)
+
+	// Shift tmpGpReg by 4 to multiply the shift amount by 16.
+	m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
+
+	mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
+	loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), vecTmp)
+	m.insert(loadMask)
+
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(vecTmp), xx))
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVSshr(x, y, ret ssa.Value, lane ssa.VecLane) {
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.lowerVSshri8x16(x, y, ret)
+	case ssa.VecLaneI16x8, ssa.VecLaneI32x4:
+		m.lowerShr(x, y, ret, lane, true)
+	case ssa.VecLaneI64x2:
+		m.lowerVSshri64x2(x, y, ret)
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+}
+
+func (m *machine) lowerVSshri8x16(x, y, ret ssa.Value) {
+	shiftAmtReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(shiftAmtReg, 0x7, false)
+	// Take the modulo 8 of the shift amount.
+	shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, shiftAmtReg, false))
+
+	// Copy the x value to two temporary registers.
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	vecTmp := m.c.AllocateVReg(ssa.TypeV128)
+	m.copyTo(xx, vecTmp)
+
+	// Assuming that we have
+	//  xx   = [b1, ..., b16]
+	//  vecTmp = [b1, ..., b16]
+	// at this point, then we use PUNPCKLBW and PUNPCKHBW to produce:
+	//  xx   = [b1, b1, b2, b2, ..., b8, b8]
+	//  vecTmp = [b9, b9, b10, b10, ..., b16, b16]
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpcklbw, newOperandReg(xx), xx))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpckhbw, newOperandReg(vecTmp), vecTmp))
+
+	// Adding 8 to the shift amount, and then move the amount to vecTmp2.
+	vecTmp2 := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(8), shiftAmtReg, false))
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(shiftAmtReg), vecTmp2, false))
+
+	// Perform the word packed arithmetic right shifts on vreg and vecTmp.
+	// This changes these two registers as:
+	//  xx   = [xxx, b1 >> s, xxx, b2 >> s, ..., xxx, b8 >> s]
+	//  vecTmp = [xxx, b9 >> s, xxx, b10 >> s, ..., xxx, b16 >> s]
+	// where xxx is 1 or 0 depending on each byte's sign, and ">>" is the arithmetic shift on a byte.
+	m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), xx))
+	m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), vecTmp))
+
+	// Finally, we can get the result by packing these two word vectors.
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePacksswb, newOperandReg(vecTmp), xx))
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVSshri64x2(x, y, ret ssa.Value) {
+	// Load the shift amount to RCX.
+	shiftAmt := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, shiftAmt, rcxVReg))
+
+	tmpGp := m.c.AllocateVReg(ssa.TypeI64)
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xxReg := m.copyToTmp(_xx.reg())
+
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 0, newOperandReg(xxReg), tmpGp))
+	m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), xxReg))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 1, newOperandReg(xxReg), tmpGp))
+	m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), xxReg))
+
+	m.copyTo(xxReg, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerShr(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	var modulo uint64
+	var shiftOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI16x8:
+		modulo = 0xf
+		if signed {
+			shiftOp = sseOpcodePsraw
+		} else {
+			shiftOp = sseOpcodePsrlw
+		}
+	case ssa.VecLaneI32x4:
+		modulo = 0x1f
+		if signed {
+			shiftOp = sseOpcodePsrad
+		} else {
+			shiftOp = sseOpcodePsrld
+		}
+	case ssa.VecLaneI64x2:
+		modulo = 0x3f
+		if signed {
+			panic("BUG")
+		}
+		shiftOp = sseOpcodePsrlq
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(tmpGpReg, modulo, false)
+	// Take the modulo 8 of the shift amount.
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
+		m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
+	// And move it to a xmm register.
+	tmpVec := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
+
+	// Then do the actual shift.
+	m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVIshl(x, y, ret ssa.Value, lane ssa.VecLane) {
+	var modulo uint64
+	var shiftOp sseOpcode
+	var isI8x16 bool
+	switch lane {
+	case ssa.VecLaneI8x16:
+		isI8x16 = true
+		modulo = 0x7
+		shiftOp = sseOpcodePsllw
+	case ssa.VecLaneI16x8:
+		modulo = 0xf
+		shiftOp = sseOpcodePsllw
+	case ssa.VecLaneI32x4:
+		modulo = 0x1f
+		shiftOp = sseOpcodePslld
+	case ssa.VecLaneI64x2:
+		modulo = 0x3f
+		shiftOp = sseOpcodePsllq
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(tmpGpReg, modulo, false)
+	// Take the modulo 8 of the shift amount.
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
+		m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
+	// And move it to a xmm register.
+	tmpVec := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
+
+	// Then do the actual shift.
+	m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
+
+	if isI8x16 {
+		maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16SHLMaskTableIndex, i8x16SHLMaskTable[:])
+		base := m.c.AllocateVReg(ssa.TypeI64)
+		lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
+		m.insert(lea)
+
+		// Shift tmpGpReg by 4 to multiply the shift amount by 16.
+		m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
+
+		mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
+		loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), tmpVec)
+		m.insert(loadMask)
+
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(tmpVec), xx))
+	}
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+// i8x16SHLMaskTable is necessary for emulating non-existent packed bytes left shifts on amd64.
+// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
+var i8x16SHLMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
+	0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, // for 1 shift
+	0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, // for 2 shift
+	0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, // for 3 shift
+	0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // for 4 shift
+	0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, // for 5 shift
+	0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, // for 6 shift
+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, // for 7 shift
+}
+
+func (m *machine) lowerVRound(x, ret ssa.Value, imm byte, _64 bool) {
+	xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+	var round sseOpcode
+	if _64 {
+		round = sseOpcodeRoundpd
+	} else {
+		round = sseOpcodeRoundps
+	}
+	m.insert(m.allocateInstr().asXmmUnaryRmRImm(round, imm, xx, m.c.VRegOf(ret)))
+}
+
+var (
+	allOnesI8x16              = [16]byte{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}
+	allOnesI16x8              = [16]byte{0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0}
+	extAddPairwiseI16x8uMask1 = [16]byte{0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80}
+	extAddPairwiseI16x8uMask2 = [16]byte{0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00}
+)
+
+func (m *machine) lowerExtIaddPairwise(x, ret ssa.Value, srcLane ssa.VecLane, signed bool) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	switch srcLane {
+	case ssa.VecLaneI8x16:
+		allOneReg := m.c.AllocateVReg(ssa.TypeV128)
+		mask := m.getOrAllocateConstLabel(&m.constAllOnesI8x16Index, allOnesI8x16[:])
+		m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOneReg))
+
+		var resultReg regalloc.VReg
+		if signed {
+			resultReg = allOneReg
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(xx), resultReg))
+		} else {
+			// Interpreter tmp (all ones) as signed byte meaning that all the multiply-add is unsigned.
+			resultReg = xx
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(allOneReg), resultReg))
+		}
+		m.copyTo(resultReg, m.c.VRegOf(ret))
+
+	case ssa.VecLaneI16x8:
+		if signed {
+			allOnesReg := m.c.AllocateVReg(ssa.TypeV128)
+			mask := m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOnesReg))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(allOnesReg), xx))
+			m.copyTo(xx, m.c.VRegOf(ret))
+		} else {
+			maskReg := m.c.AllocateVReg(ssa.TypeV128)
+			mask := m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask1Index, extAddPairwiseI16x8uMask1[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
+
+			// Flip the sign bits on xx.
+			//
+			// Assuming that xx = [w1, ..., w8], now we have,
+			// 	xx[i] = int8(-w1) for i = 0...8
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(maskReg), xx))
+
+			mask = m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
+
+			// For i = 0,..4 (as this results in i32x4 lanes), now we have
+			// xx[i] = int32(-wn + -w(n+1)) = int32(-(wn + w(n+1)))
+			// c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr)
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(maskReg), xx))
+
+			mask = m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask2Index, extAddPairwiseI16x8uMask2[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
+
+			// vr[i] = int32(-(wn + w(n+1))) + int32(math.MaxInt16+1) = int32((wn + w(n+1))) = uint32(wn + w(n+1)).
+			// c.assembler.CompileRegisterToRegister(amd64.PADDD, tmp, vr)
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(maskReg), xx))
+
+			m.copyTo(xx, m.c.VRegOf(ret))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", srcLane))
+	}
+}
+
+func (m *machine) lowerWidenLow(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	var sseOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI8x16:
+		if signed {
+			sseOp = sseOpcodePmovsxbw
+		} else {
+			sseOp = sseOpcodePmovzxbw
+		}
+	case ssa.VecLaneI16x8:
+		if signed {
+			sseOp = sseOpcodePmovsxwd
+		} else {
+			sseOp = sseOpcodePmovzxwd
+		}
+	case ssa.VecLaneI32x4:
+		if signed {
+			sseOp = sseOpcodePmovsxdq
+		} else {
+			sseOp = sseOpcodePmovzxdq
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+	m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, xx, m.c.VRegOf(ret)))
+}
+
+func (m *machine) lowerWidenHigh(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	tmp := m.c.AllocateVReg(ssa.TypeV128)
+	xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	m.copyTo(xx.reg(), tmp)
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePalignr, 8, newOperandReg(tmp), tmp))
+
+	var sseOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI8x16:
+		if signed {
+			sseOp = sseOpcodePmovsxbw
+		} else {
+			sseOp = sseOpcodePmovzxbw
+		}
+	case ssa.VecLaneI16x8:
+		if signed {
+			sseOp = sseOpcodePmovsxwd
+		} else {
+			sseOp = sseOpcodePmovzxwd
+		}
+	case ssa.VecLaneI32x4:
+		if signed {
+			sseOp = sseOpcodePmovsxdq
+		} else {
+			sseOp = sseOpcodePmovzxdq
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, newOperandReg(tmp), m.c.VRegOf(ret)))
+}
+
+func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, ret ssa.Value, lane ssa.VecLane) {
+	tmpDst, tmpGp := m.c.AllocateVReg(ssa.TypeV128), m.c.AllocateVReg(ssa.TypeI64)
+	am := newOperandMem(m.lowerToAddressMode(ptr, offset))
+
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, 0, newOperandReg(tmpGp), tmpDst))
+		tmpZeroVec := m.c.AllocateVReg(ssa.TypeV128)
+		m.insert(m.allocateInstr().asZeros(tmpZeroVec))
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpZeroVec), tmpDst))
+	case ssa.VecLaneI16x8:
+		m.insert(m.allocateInstr().asMovzxRmR(extModeWQ, am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 0, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 1, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
+	case ssa.VecLaneI32x4:
+		m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, 0, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
+	case ssa.VecLaneI64x2:
+		m.insert(m.allocateInstr().asMov64MR(am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), tmpDst))
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+var f64x2CvtFromIMask = [16]byte{
+	0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+}
+
+func (m *machine) lowerVFcvtFromInt(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	switch lane {
+	case ssa.VecLaneF32x4:
+		if signed {
+			xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, xx, m.c.VRegOf(ret)))
+		} else {
+			xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+			// Copy the value to two temporary registers.
+			tmp := m.copyToTmp(xx.reg())
+			tmp2 := m.copyToTmp(xx.reg())
+
+			// Clear the higher 16 bits of each 32-bit element.
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePslld, newOperandImm32(0xa), tmp))
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0xa), tmp))
+
+			// Subtract the higher 16-bits from tmp2: clear the lower 16-bits of tmp2.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubd, newOperandReg(tmp), tmp2))
+
+			// Convert the lower 16-bits in tmp.
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
+
+			// Left shift by one and convert tmp2, meaning that halved conversion result of higher 16-bits in tmp2.
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(1), tmp2))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp2), tmp2))
+
+			// Double the converted halved higher 16bits.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp2), tmp2))
+
+			// Get the conversion result by add tmp (holding lower 16-bit conversion) into tmp2.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp), tmp2))
+
+			m.copyTo(tmp2, m.c.VRegOf(ret))
+		}
+	case ssa.VecLaneF64x2:
+		if signed {
+			xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2pd, xx, m.c.VRegOf(ret)))
+		} else {
+			maskReg := m.c.AllocateVReg(ssa.TypeV128)
+			maskLabel := m.getOrAllocateConstLabel(&m.constF64x2CvtFromIMaskIndex, f64x2CvtFromIMask[:])
+			// maskReg = [0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
+
+			_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+			xx := m.copyToTmp(_xx.reg())
+
+			// Given that we have xx = [d1, d2, d3, d4], this results in
+			//	xx = [d1, [0x00, 0x00, 0x30, 0x43], d2, [0x00, 0x00, 0x30, 0x43]]
+			//     = [float64(uint32(d1)) + 0x1.0p52, float64(uint32(d2)) + 0x1.0p52]
+			//     ^See https://stackoverflow.com/questions/13269523/can-all-32-bit-ints-be-exactly-represented-as-a-double
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeUnpcklps, newOperandReg(maskReg), xx))
+
+			// maskReg = [float64(0x1.0p52), float64(0x1.0p52)]
+			maskLabel = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
+
+			// Now, we get the result as
+			// 	xx = [float64(uint32(d1)), float64(uint32(d2))]
+			// because the following equality always satisfies:
+			//  float64(0x1.0p52 + float64(uint32(x))) - float64(0x1.0p52 + float64(uint32(y))) = float64(uint32(x)) - float64(uint32(y))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubpd, newOperandReg(maskReg), xx))
+
+			m.copyTo(xx, m.c.VRegOf(ret))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+}
+
+var (
+	// i32sMaxOnF64x2 holds math.MaxInt32(=2147483647.0) on two f64 lanes.
+	i32sMaxOnF64x2 = [16]byte{
+		0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
+		0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
+	}
+
+	// i32sMaxOnF64x2 holds math.MaxUint32(=4294967295.0) on two f64 lanes.
+	i32uMaxOnF64x2 = [16]byte{
+		0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
+		0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
+	}
+
+	// twop52 holds two float64(0x1.0p52) on two f64 lanes. 0x1.0p52 is special in the sense that
+	// with this exponent, the mantissa represents a corresponding uint32 number, and arithmetics,
+	// like addition or subtraction, the resulted floating point holds exactly the same
+	// bit representations in 32-bit integer on its mantissa.
+	//
+	// Note: the name twop52 is common across various compiler ecosystem.
+	// 	E.g. https://github.com/llvm/llvm-project/blob/92ab024f81e5b64e258b7c3baaf213c7c26fcf40/compiler-rt/lib/builtins/floatdidf.c#L28
+	// 	E.g. https://opensource.apple.com/source/clang/clang-425.0.24/src/projects/compiler-rt/lib/floatdidf.c.auto.html
+	twop52 = [16]byte{
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
+	}
+)
+
+func (m *machine) lowerVFcvtToIntSat(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	switch lane {
+	case ssa.VecLaneF32x4:
+		if signed {
+			tmp := m.copyToTmp(xx)
+
+			// Assuming we have xx = [v1, v2, v3, v4].
+			//
+			// Set all bits if lane is not NaN on tmp.
+			// tmp[i] = 0xffffffff  if vi != NaN
+			//        = 0           if vi == NaN
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
+
+			// Clear NaN lanes on xx, meaning that
+			// 	xx[i] = vi  if vi != NaN
+			//	        0   if vi == NaN
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp), xx))
+
+			// tmp[i] = ^vi         if vi != NaN
+			//        = 0xffffffff  if vi == NaN
+			// which means that tmp[i] & 0x80000000 != 0 if and only if vi is negative.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeXorps, newOperandReg(xx), tmp))
+
+			// xx[i] = int32(vi)   if vi != NaN and xx is not overflowing.
+			//       = 0x80000000  if vi != NaN and xx is overflowing (See https://www.felixcloutier.com/x86/cvttps2dq)
+			//       = 0           if vi == NaN
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
+
+			// Below, we have to convert 0x80000000 into 0x7FFFFFFF for positive overflowing lane.
+			//
+			// tmp[i] = 0x80000000                         if vi is positive
+			//        = any satisfying any&0x80000000 = 0  if vi is negative or zero.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(xx), tmp))
+
+			// Arithmetic right shifting tmp by 31, meaning that we have
+			// tmp[i] = 0xffffffff if vi is positive, 0 otherwise.
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrad, newOperandImm32(0x1f), tmp))
+
+			// Flipping 0x80000000 if vi is positive, otherwise keep intact.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), xx))
+		} else {
+			tmp := m.c.AllocateVReg(ssa.TypeV128)
+			m.insert(m.allocateInstr().asZeros(tmp))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxps, newOperandReg(tmp), xx))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmp), tmp))
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0x1), tmp))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
+			tmp2 := m.copyToTmp(xx)
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubps, newOperandReg(tmp), tmp2))
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredLE_OS), newOperandReg(tmp2), tmp))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(tmp2), tmp2))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp2))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaxsd, newOperandReg(tmp), tmp2))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(tmp2), xx))
+		}
+
+	case ssa.VecLaneF64x2:
+		tmp2 := m.c.AllocateVReg(ssa.TypeV128)
+		if signed {
+			tmp := m.copyToTmp(xx)
+
+			// Set all bits for non-NaN lanes, zeros otherwise.
+			// I.e. tmp[i] = 0xffffffff_ffffffff if vi != NaN, 0 otherwise.
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmppd, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
+
+			maskLabel := m.getOrAllocateConstLabel(&m.constI32sMaxOnF64x2Index, i32sMaxOnF64x2[:])
+			// Load the 2147483647 into tmp2's each lane.
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp2))
+
+			// tmp[i] = 2147483647 if vi != NaN, 0 otherwise.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp2), tmp))
+
+			// MINPD returns the source register's value as-is, so we have
+			//  xx[i] = vi   if vi != NaN
+			//        = 0    if vi == NaN
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp), xx))
+
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttpd2dq, newOperandReg(xx), xx))
+		} else {
+			tmp := m.c.AllocateVReg(ssa.TypeV128)
+			m.insert(m.allocateInstr().asZeros(tmp))
+
+			//  xx[i] = vi   if vi != NaN && vi > 0
+			//        = 0    if vi == NaN || vi <= 0
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxpd, newOperandReg(tmp), xx))
+
+			// tmp2[i] = float64(math.MaxUint32) = math.MaxUint32
+			maskIndex := m.getOrAllocateConstLabel(&m.constI32uMaxOnF64x2Index, i32uMaxOnF64x2[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
+
+			// xx[i] = vi   if vi != NaN && vi > 0 && vi <= math.MaxUint32
+			//       = 0    otherwise
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp2), xx))
+
+			// Round the floating points into integer.
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeRoundpd, 0x3, newOperandReg(xx), xx))
+
+			// tmp2[i] = float64(0x1.0p52)
+			maskIndex = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
+
+			// xx[i] = float64(0x1.0p52) + float64(uint32(vi)) if vi != NaN && vi > 0 && vi <= math.MaxUint32
+			//       = 0                                       otherwise
+			//
+			// This means that xx[i] holds exactly the same bit of uint32(vi) in its lower 32-bits.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddpd, newOperandReg(tmp2), xx))
+
+			// At this point, we have
+			// 	xx  = [uint32(v0), float64(0x1.0p52), uint32(v1), float64(0x1.0p52)]
+			//  tmp = [0, 0, 0, 0]
+			// as 32x4 lanes. Therefore, SHUFPS with 0b00_00_10_00 results in
+			//	xx = [xx[00], xx[10], tmp[00], tmp[00]] = [xx[00], xx[10], 0, 0]
+			// meaning that for i = 0 and 1, we have
+			//  xx[i] = uint32(vi) if vi != NaN && vi > 0 && vi <= math.MaxUint32
+			//        = 0          otherwise.
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeShufps, 0b00_00_10_00, newOperandReg(tmp), xx))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerNarrow(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+
+	var sseOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI16x8:
+		if signed {
+			sseOp = sseOpcodePacksswb
+		} else {
+			sseOp = sseOpcodePackuswb
+		}
+	case ssa.VecLaneI32x4:
+		if signed {
+			sseOp = sseOpcodePackssdw
+		} else {
+			sseOp = sseOpcodePackusdw
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+	m.insert(m.allocateInstr().asXmmRmR(sseOp, yy, xx))
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerWideningPairwiseDotProductS(x, y, ret ssa.Value) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, yy, xx))
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVIabs(instr *ssa.Instruction) {
+	x, lane := instr.ArgWithLane()
+	rd := m.c.VRegOf(instr.Return())
+
+	if lane == ssa.VecLaneI64x2 {
+		_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+
+		blendReg := xmm0VReg
+		m.insert(m.allocateInstr().asDefineUninitializedReg(blendReg))
+
+		tmp := m.copyToTmp(_xx.reg())
+		xx := m.copyToTmp(_xx.reg())
+
+		// Clear all bits on blendReg.
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(blendReg), blendReg))
+		// Subtract xx from blendMaskReg.
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubq, newOperandReg(xx), blendReg))
+		// Copy the subtracted value ^^ back into tmp.
+		m.copyTo(blendReg, xx)
+
+		m.insert(m.allocateInstr().asBlendvpd(newOperandReg(tmp), xx))
+
+		m.copyTo(xx, rd)
+	} else {
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePabsb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePabsw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePabsd
+		}
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+
+		i := m.allocateInstr()
+		i.asXmmUnaryRmR(vecOp, rn, rd)
+		m.insert(i)
+	}
+}
+
+func (m *machine) lowerVIpopcnt(instr *ssa.Instruction) {
+	x := instr.Arg()
+	rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+	rd := m.c.VRegOf(instr.Return())
+
+	tmp1 := m.c.AllocateVReg(ssa.TypeV128)
+	m.lowerVconst(tmp1, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f)
+
+	// Copy input into tmp2.
+	tmp2 := m.copyToTmp(rn.reg())
+
+	// Given that we have:
+	//  rm = [b1, ..., b16] where bn = hn:ln and hn and ln are higher and lower 4-bits of bn.
+	//
+	// Take PAND on tmp1 and tmp2, so that we mask out all the higher bits.
+	//  tmp2 = [l1, ..., l16].
+	pand := m.allocateInstr()
+	pand.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp2)
+	m.insert(pand)
+
+	// Do logical (packed word) right shift by 4 on rm and PAND against the mask (tmp1); meaning that we have
+	//  tmp3 = [h1, ...., h16].
+	tmp3 := m.copyToTmp(rn.reg())
+	psrlw := m.allocateInstr()
+	psrlw.asXmmRmiReg(sseOpcodePsrlw, newOperandImm32(4), tmp3)
+	m.insert(psrlw)
+
+	pand2 := m.allocateInstr()
+	pand2.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp3)
+	m.insert(pand2)
+
+	// Read the popcntTable into tmp4, and we have
+	//  tmp4 = [0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04]
+	tmp4 := m.c.AllocateVReg(ssa.TypeV128)
+	m.lowerVconst(tmp4, 0x03_02_02_01_02_01_01_00, 0x04_03_03_02_03_02_02_01)
+
+	// Make a copy for later.
+	tmp5 := m.copyToTmp(tmp4)
+
+	//  tmp4 = [popcnt(l1), ..., popcnt(l16)].
+	pshufb := m.allocateInstr()
+	pshufb.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp2), tmp4)
+	m.insert(pshufb)
+
+	pshufb2 := m.allocateInstr()
+	pshufb2.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp3), tmp5)
+	m.insert(pshufb2)
+
+	// tmp4 + tmp5 is the result.
+	paddb := m.allocateInstr()
+	paddb.asXmmRmR(sseOpcodePaddb, newOperandReg(tmp4), tmp5)
+	m.insert(paddb)
+
+	m.copyTo(tmp5, rd)
+}
+
+func (m *machine) lowerVImul(instr *ssa.Instruction) {
+	x, y, lane := instr.Arg2WithLane()
+	rd := m.c.VRegOf(instr.Return())
+	if lane == ssa.VecLaneI64x2 {
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+		rm := m.getOperand_Reg(m.c.ValueDefinition(y))
+		// Assuming that we have
+		//	rm = [p1, p2] = [p1_lo, p1_hi, p2_lo, p2_high]
+		//  rn = [q1, q2] = [q1_lo, q1_hi, q2_lo, q2_high]
+		// where pN and qN are 64-bit (quad word) lane, and pN_lo, pN_hi, qN_lo and qN_hi are 32-bit (double word) lane.
+
+		// Copy rn into tmp1.
+		tmp1 := m.copyToTmp(rn.reg())
+
+		// And do the logical right shift by 32-bit on tmp1, which makes tmp1 = [0, p1_high, 0, p2_high]
+		shift := m.allocateInstr()
+		shift.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp1)
+		m.insert(shift)
+
+		// Execute "pmuludq rm,tmp1", which makes tmp1 = [p1_high*q1_lo, p2_high*q2_lo] where each lane is 64-bit.
+		mul := m.allocateInstr()
+		mul.asXmmRmR(sseOpcodePmuludq, rm, tmp1)
+		m.insert(mul)
+
+		// Copy rm value into tmp2.
+		tmp2 := m.copyToTmp(rm.reg())
+
+		// And do the logical right shift by 32-bit on tmp2, which makes tmp2 = [0, q1_high, 0, q2_high]
+		shift2 := m.allocateInstr()
+		shift2.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp2)
+		m.insert(shift2)
+
+		// Execute "pmuludq rm,tmp2", which makes tmp2 = [p1_lo*q1_high, p2_lo*q2_high] where each lane is 64-bit.
+		mul2 := m.allocateInstr()
+		mul2.asXmmRmR(sseOpcodePmuludq, rn, tmp2)
+		m.insert(mul2)
+
+		// Adds tmp1 and tmp2 and do the logical left shift by 32-bit,
+		// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32, (p2_lo*q2_high+p2_high*q2_lo)<<32]
+		add := m.allocateInstr()
+		add.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp2), tmp1)
+		m.insert(add)
+
+		shift3 := m.allocateInstr()
+		shift3.asXmmRmiReg(sseOpcodePsllq, newOperandImm32(32), tmp1)
+		m.insert(shift3)
+
+		// Copy rm value into tmp3.
+		tmp3 := m.copyToTmp(rm.reg())
+
+		// "pmuludq rm,tmp3" makes tmp3 = [p1_lo*q1_lo, p2_lo*q2_lo] where each lane is 64-bit.
+		mul3 := m.allocateInstr()
+		mul3.asXmmRmR(sseOpcodePmuludq, rn, tmp3)
+		m.insert(mul3)
+
+		// Finally, we get the result by computing tmp1 + tmp3,
+		// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32+p1_lo*q1_lo, (p2_lo*q2_high+p2_high*q2_lo)<<32+p2_lo*q2_lo]
+		add2 := m.allocateInstr()
+		add2.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp3), tmp1)
+		m.insert(add2)
+
+		m.copyTo(tmp1, rd)
+
+	} else {
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePmullw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePmulld
+		default:
+			panic("unsupported: " + lane.String())
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+	}
+}
--- a/Show More
+++ b/Show More