Skip to content

Commit

Permalink
all: add support for reading Go assembly files
Browse files Browse the repository at this point in the history
This is a pretty big change that adds support for Go assembly files.
Go assembly files are different from standard assembly and can't be read
by tools like Clang. They also use a different calling convention. On
top of that, the tools that can read them (the gc toolchain) only output
custom (unstable) object files. In a previous attempt I tried to parse
these files but with this attempt I simply read the output of
`go tool asm -S`, which is a more-or-less stable text format (that,
importantly, includes the raw machine code and all relocations).

As a result:

  * This implements syscall.seek on 386 and arm. The following works
    with this commit: `GOARCH=arm tinygo test os`.
  * This changes the math package, probably speeding up some algorithms
    (although I didn't benchmark them).
  * The crypto/aes package now works.
  * Undefined references to crypto/internal/boring/sig.StandardCrypto in
    Go 1.19 are fixed.

So far, this is only for Linux (on all supported architectures: 386,
amd64, arm, arm64). Adding support for macOS and Windows should be
feasible by implementing the appropriate file formats (MachO and COFF).
It should be relatively easy to also add support for js/wasm. I'm not so
sure about other systems where the GOARCH doesn't match the actual
architecture, like WASI and most/all baremetal systems. It might be
possible to get them to work by lifting ARM machine code to LLVM IR, but
that will be quite some work in itself and certainly something for the
longer term.
  • Loading branch information
aykevl committed Aug 30, 2022
1 parent a255c6e commit 52e7471
Show file tree
Hide file tree
Showing 15 changed files with 1,361 additions and 55 deletions.
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ endif
# compress/lzw fails windows go 1.18 wasi, https://github.com/tinygo-org/tinygo/issues/2762

# Additional standard library packages that pass tests on individual platforms
TEST_PACKAGES_LINUX := \
TEST_PACKAGES_DARWIN := \
archive/zip \
compress/flate \
compress/lzw \
Expand All @@ -346,7 +346,10 @@ TEST_PACKAGES_LINUX := \
testing/fstest \
text/template/parse

TEST_PACKAGES_DARWIN := $(TEST_PACKAGES_LINUX)
TEST_PACKAGES_LINUX := \
$(TEST_PACKAGES_LINUX) \
crypto/aes


TEST_PACKAGES_WINDOWS := \
compress/lzw
Expand Down
42 changes: 41 additions & 1 deletion builder/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"sort"
"strconv"
"strings"
"sync"

"github.com/gofrs/flock"
"github.com/tinygo-org/tinygo/compileopts"
Expand Down Expand Up @@ -86,6 +87,7 @@ type packageAction struct {
OptLevel int // LLVM optimization level (0-3)
SizeLevel int // LLVM optimization for size level (0-2)
UndefinedGlobals []string // globals that are left as external globals (no initializer)
GoAsmReferences map[string]string // symbols that are defined or referenced in Go assembly
}

// Build performs a single package to executable Go build. It takes in a package
Expand Down Expand Up @@ -227,6 +229,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
config.Options.GlobalValues["runtime"]["buildVersion"] = version
}

var goasmObjectFiles []*compileJob
var embedFileObjects []*compileJob
for _, pkg := range lprogram.Sorted() {
pkg := pkg // necessary to avoid a race condition
Expand Down Expand Up @@ -278,6 +281,39 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
embedFileObjects = append(embedFileObjects, job)
}

// References from Go code to assembly functions implemented in Go
// assembly. Example: {"math.archSqrt": "__GoABI0_math.archSqrt"}
goAsmReferences := map[string]string{}
var goAsmReferencesLock sync.Mutex
for _, filename := range pkg.SFiles {
parts := strings.Split(config.Triple(), "-")
if len(parts) < 3 || parts[2] != "linux" {
// Go assembly files are only supported on Linux so far.
continue
}
abspath := filepath.Join(pkg.Dir, filename)
job := &compileJob{
description: "compile Go assembly file " + abspath,
run: func(job *compileJob) error {
result, references, err := compileAsmFile(abspath, dir, pkg.Pkg.Path(), config)

// Add references (both defined and undefined) to the
// goAsmReferences map so that the compiler can create
// wrapper functions.
goAsmReferencesLock.Lock()
for internal, external := range references {
goAsmReferences[internal] = external
}
goAsmReferencesLock.Unlock()

job.result = result
return err
},
}
actionIDDependencies = append(actionIDDependencies, job)
goasmObjectFiles = append(goasmObjectFiles, job)
}

// Action ID jobs need to know the action ID of all the jobs the package
// imports.
var importedPackages []*compileJob
Expand Down Expand Up @@ -312,6 +348,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
OptLevel: optLevel,
SizeLevel: sizeLevel,
UndefinedGlobals: undefinedGlobals,
GoAsmReferences: goAsmReferences,
}
for filePath, hash := range pkg.FileHashes {
actionID.FileHashes[filePath] = hex.EncodeToString(hash)
Expand Down Expand Up @@ -351,7 +388,7 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil

// Compile AST to IR. The compiler.CompilePackage function will
// build the SSA as needed.
mod, errs := compiler.CompilePackage(pkg.ImportPath, pkg, program.Package(pkg.Pkg), machine, compilerConfig, config.DumpSSA())
mod, errs := compiler.CompilePackage(pkg.ImportPath, pkg, program.Package(pkg.Pkg), machine, compilerConfig, goAsmReferences, config.DumpSSA())
defer mod.Context().Dispose()
defer mod.Dispose()
if errs != nil {
Expand Down Expand Up @@ -694,6 +731,9 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil
ldflags = append(ldflags, lprogram.LDFlags...)
}

// Add ELF object files created from Go assembly files.
linkerDependencies = append(linkerDependencies, goasmObjectFiles...)

// Add libc dependencies, if they exist.
linkerDependencies = append(linkerDependencies, libcDependencies...)

Expand Down
281 changes: 281 additions & 0 deletions builder/goasm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
package builder

// This file parses Go-flavored assembly. Specifically, it runs Go assembly
// through `go tool asm -S` and parses the output.

import (
"bytes"
"debug/elf"
"fmt"
"go/scanner"
"go/token"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"

"github.com/tinygo-org/tinygo/compileopts"
"github.com/tinygo-org/tinygo/goenv"
"github.com/tinygo-org/tinygo/objfile"
)

// Regular expression for machine code.
var asmBytesRegexp = regexp.MustCompile(`^ 0x[0-9a-f]{4} [0-9a-f]{2} `)

type goasmReloc struct {
offset uint64
reloc objfile.Reloc
linkName string
addend int64
}

// Compile the given Go assembly file to a standard object file format. Returns
// the path, any definitions or references in the Go assembly file, and any
// errors encountered in the process. The output object file is stored somewhere
// in the temporary directory.
func compileAsmFile(path, tmpdir, importPath string, config *compileopts.Config) (string, map[string]string, error) {
references := make(map[string]string)

// We need to know the Go version to be able to understand numeric
// relocation types.
_, goMinor, err := goenv.GetGorootVersion(goenv.Get("GOROOT"))
if err != nil {
return "", nil, err
}

// Create a temporary file to store the Go object file output.
// We won't be using this file, but it has to be stored somewhere.
goobjfile, err := ioutil.TempFile(tmpdir, "goasm-"+filepath.Base(path)+"-*.go.o")
if err != nil {
return "", nil, err
}
goobjfile.Close()

// Compile the assembly file, and capture stdout.
commandName := filepath.Join(goenv.Get("GOROOT"), "bin", "go")
args := []string{"tool", "asm", "-S", "-p", importPath, "-o", goobjfile.Name(), "-I", filepath.Join(goenv.Get("GOROOT"), "pkg", "include"), path}
cmd := exec.Command(commandName, args...)
if config.Options.PrintCommands != nil {
config.Options.PrintCommands(commandName, args...)
}
var stdout bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = os.Stderr
cmd.Env = append(cmd.Env, "GOOS="+config.GOOS(), "GOARCH="+config.GOARCH())
err = cmd.Run()
if err != nil {
return "", nil, fmt.Errorf("could not invoke Go assembler: %w", err)
}

// Split the stdout text into symbol chunks.
var chunk []string
var chunks [][]string
for {
line, err := stdout.ReadString('\n')
if err != nil {
break
}
if !strings.HasPrefix(line, "\t") {
// Start a new chunk.
if len(chunk) != 0 {
chunks = append(chunks, chunk)
}
chunk = nil
}
chunk = append(chunk, line)
}
if len(chunk) != 0 {
// Add last chunk.
chunks = append(chunks, chunk)
}

// Determine which output file format to use based on the architecture.
var out objfile.ObjectFile
switch config.GOARCH() {
case "386":
out = objfile.NewELF(elf.EM_386)
case "amd64":
out = objfile.NewELF(elf.EM_X86_64)
case "arm":
out = objfile.NewELF(elf.EM_ARM)
case "arm64":
out = objfile.NewELF(elf.EM_AARCH64)
default:
return "", nil, scanner.Error{
Pos: token.Position{
Filename: path,
},
Msg: fmt.Sprintf("unknown GOARCH while creating ELF file: %s", config.GOARCH()),
}
}

// Parse each chunk (equivalent to a single symbol).
localSymbols := make(map[string]struct{})
for _, chunk := range chunks {
header := chunk[0]
lines := chunk[1:]
headerFields := strings.Fields(header)
symbolName := getSymbolName(headerFields[0], importPath)
var section string
switch headerFields[1] {
case "STEXT":
section = "text"
case "SRODATA":
section = "rodata"
default:
return "", nil, fmt.Errorf("unknown section type: %s", headerFields[1])
}
bind := objfile.LinkageODR
for _, flag := range headerFields[2:] {
if flag == "static" {
bind = objfile.LinkageLocal
localSymbols[symbolName] = struct{}{}
}
}
chunkReferences := []string{symbolName}
var buf []byte
var parsedRelocs []goasmReloc
canUseSymbol := true
for _, line := range lines {
switch {
case asmBytesRegexp.MatchString(line):
values := strings.Fields(line[8:55])
for _, value := range values {
n, err := strconv.ParseUint(value, 16, 8)
if err != nil {
return "", nil, scanner.Error{
Pos: token.Position{
Filename: path,
},
Msg: fmt.Sprintf("could not parse Go assembly: %v", err),
}
}
buf = append(buf, uint8(n))
}
case strings.HasPrefix(line, "\trel "):
var offset, size uint64
var typ string
var symaddend string
_, err := fmt.Sscanf(line, "\trel %d+%d t=%s %s", &offset, &size, &typ, &symaddend)
if err != nil {
return "", nil, fmt.Errorf("cannot read relocation %s: %w", strings.TrimSpace(line), err)
}
if size == 0 {
// This can happen for instructions like "CALL AX", possibly
// as a way to signal there is a function pointer call.
continue
}
index := strings.LastIndexByte(symaddend, '+')
if index < 0 {
return "", nil, fmt.Errorf("cannot find addend for relocation %s", strings.TrimSpace(line))
}
symbolName := getSymbolName(symaddend[:index], importPath)
chunkReferences = append(chunkReferences, symbolName)
reloc := getRelocType(typ, goMinor)
if reloc == objfile.RelocNone {
return "", nil, scanner.Error{
Pos: token.Position{
Filename: path,
},
Msg: fmt.Sprintf("unknown relocation type %s in relocation %#v", typ, strings.TrimSpace(line)),
}
}
if reloc == objfile.RelocTLS_LE {
// This relocation seems to be used mostly for goroutine
// stack size checks. This is not yet supported, so don't
// emit this symbol in the output object file.
canUseSymbol = false
break
}
var addend int64
if config.GOARCH() == "arm" {
// The addend is a hexadecimal number on ARM.
addend, err = strconv.ParseInt(symaddend[index+1:], 16, 64)
if reloc == objfile.RelocCALL {
// It appears that the instruction is encoded in the
// addend.
// That seems like a bad idea, so instead write the
// instruction back into the machine code and use a
// conventional addend of -8 (for standard 8-byte ARM PC
// offset).
buf[offset+0] = byte(addend >> 0)
buf[offset+1] = byte(addend >> 8)
buf[offset+2] = byte(addend >> 16)
buf[offset+3] = byte(addend >> 24)
addend = -8
}
} else {
addend, err = strconv.ParseInt(symaddend[index+1:], 10, 64)
}
if err != nil {
return "", nil, fmt.Errorf("cannot read addend for relocation %s: %w", strings.TrimSpace(line), err)
}
parsedRelocs = append(parsedRelocs, goasmReloc{
offset: offset,
reloc: reloc,
linkName: "__GoABI0_" + symbolName,
addend: addend,
})
}
}

// Only add the symbol when it is usable.
if canUseSymbol {
symbolIndex := out.AddSymbol("__GoABI0_"+symbolName, section, bind, buf)
for _, reloc := range parsedRelocs {
out.AddReloc(symbolIndex, reloc.offset, reloc.reloc, reloc.linkName, reloc.addend)
}
for _, name := range chunkReferences {
references[name] = "__GoABI0_" + name
}
}
}

// Some symbols are defined as local in this assembly file but are still
// referenced. They should not be returned as references.
for name := range localSymbols {
delete(references, name)
}

// Write output object file.
objpath := strings.TrimSuffix(goobjfile.Name(), ".go.o") + ".o"
err = os.WriteFile(objpath, out.Bytes(), 0o666)
if err != nil {
return "", nil, fmt.Errorf("failed to write object file for %s: %s", path, err)
}
return objpath, references, err
}

// getSymbol converts symbol names as given by `go tool asm` to those used as
// linkname (without the __GoABI0_ prefix).
func getSymbolName(name, importPath string) string {
symbolName := name
if strings.HasPrefix(symbolName, `"".`) {
symbolName = importPath + "." + symbolName[3:]
}
return symbolName
}

// Return the symbolic relocation type given a numeric relocation type.
// Unfortunately, numeric relocation types vary by Go version.
func getRelocType(t string, goMinor int) objfile.Reloc {
// See: https://github.com/golang/go/blob/master/src/cmd/internal/objabi/reloctype.go
// When adding a new Go version, check whether the relocations changed.
switch goMinor {
case 19, 18:
switch t {
case "1", "3": // R_ADDR, R_ADDRARM64
return objfile.RelocADDR
case "7", "8", "9": // R_CALL, R_CALLARM, R_CALLARM64
return objfile.RelocCALL
case "14": // R_PCREL
return objfile.RelocPCREL
case "15": // R_TLS_LE
return objfile.RelocTLS_LE
}
}
return objfile.RelocNone
}
Loading

0 comments on commit 52e7471

Please sign in to comment.