Plan 9 from Bell Labs’s /usr/web/sources/contrib/ericvh/go-plan9/src/cmd/cgo/gcc.go

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


// Copyright 2009 The Go Authors.  All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Annotate Crefs in Prog with C types by parsing gcc debug output.
// Conversion of debug output to Go types.

package main

import (
	"bytes";
	"debug/dwarf";
	"debug/elf";
	"debug/macho";
	"fmt";
	"go/ast";
	"go/token";
	"os";
	"strconv";
	"strings";
)

func (p *Prog) loadDebugInfo() {
	// Construct a slice of unique names from p.Crefs.
	m := make(map[string]int);
	for _, c := range p.Crefs {
		m[c.Name] = -1
	}
	names := make([]string, 0, len(m));
	for name, _ := range m {
		i := len(names);
		names = names[0 : i+1];
		names[i] = name;
		m[name] = i;
	}

	// Coerce gcc into telling us whether each name is
	// a type, a value, or undeclared.  We compile a function
	// containing the line:
	//	name;
	// If name is a type, gcc will print:
	//	x.c:2: warning: useless type name in empty declaration
	// If name is a value, gcc will print
	//	x.c:2: warning: statement with no effect
	// If name is undeclared, gcc will print
	//	x.c:2: error: 'name' undeclared (first use in this function)
	// A line number directive causes the line number to
	// correspond to the index in the names array.
	var b bytes.Buffer;
	b.WriteString(p.Preamble);
	b.WriteString("void f(void) {\n");
	b.WriteString("#line 0 \"cgo-test\"\n");
	for _, n := range names {
		b.WriteString(n);
		b.WriteString(";\n");
	}
	b.WriteString("}\n");

	kind := make(map[string]string);
	_, stderr := p.gccDebug(b.Bytes());
	if stderr == "" {
		fatal("gcc produced no output")
	}
	for _, line := range strings.Split(stderr, "\n", 0) {
		if len(line) < 9 || line[0:9] != "cgo-test:" {
			continue
		}
		line = line[9:];
		colon := strings.Index(line, ":");
		if colon < 0 {
			continue
		}
		i, err := strconv.Atoi(line[0:colon]);
		if err != nil {
			continue
		}
		what := "";
		switch {
		default:
			continue
		case strings.Index(line, ": useless type name in empty declaration") >= 0:
			what = "type"
		case strings.Index(line, ": statement with no effect") >= 0:
			what = "value"
		case strings.Index(line, "undeclared") >= 0:
			what = "error"
		}
		if old, ok := kind[names[i]]; ok && old != what {
			error(noPos, "inconsistent gcc output about C.%s", names[i])
		}
		kind[names[i]] = what;
	}
	for _, n := range names {
		if _, ok := kind[n]; !ok {
			error(noPos, "could not determine kind of name for C.%s", n)
		}
	}

	if nerrors > 0 {
		fatal("failed to interpret gcc output:\n%s", stderr)
	}

	// Extract the types from the DWARF section of an object
	// from a well-formed C program.  Gcc only generates DWARF info
	// for symbols in the object file, so it is not enough to print the
	// preamble and hope the symbols we care about will be there.
	// Instead, emit
	//	typeof(names[i]) *__cgo__i;
	// for each entry in names and then dereference the type we
	// learn for __cgo__i.
	b.Reset();
	b.WriteString(p.Preamble);
	for i, n := range names {
		fmt.Fprintf(&b, "typeof(%s) *__cgo__%d;\n", n, i)
	}
	d, stderr := p.gccDebug(b.Bytes());
	if d == nil {
		fatal("gcc failed:\n%s\non input:\n%s", stderr, b.Bytes())
	}

	// Scan DWARF info for top-level TagVariable entries with AttrName __cgo__i.
	types := make([]dwarf.Type, len(names));
	r := d.Reader();
	for {
		e, err := r.Next();
		if err != nil {
			fatal("reading DWARF entry: %s", err)
		}
		if e == nil {
			break
		}
		if e.Tag != dwarf.TagVariable {
			goto Continue
		}
		name, _ := e.Val(dwarf.AttrName).(string);
		typOff, _ := e.Val(dwarf.AttrType).(dwarf.Offset);
		if name == "" || typOff == 0 {
			fatal("malformed DWARF TagVariable entry")
		}
		if !strings.HasPrefix(name, "__cgo__") {
			goto Continue
		}
		typ, err := d.Type(typOff);
		if err != nil {
			fatal("loading DWARF type: %s", err)
		}
		t, ok := typ.(*dwarf.PtrType);
		if !ok || t == nil {
			fatal("internal error: %s has non-pointer type", name)
		}
		i, err := strconv.Atoi(name[7:]);
		if err != nil {
			fatal("malformed __cgo__ name: %s", name)
		}
		types[i] = t.Type;

	Continue:
		if e.Tag != dwarf.TagCompileUnit {
			r.SkipChildren()
		}
	}

	// Record types and typedef information in Crefs.
	var conv typeConv;
	conv.Init(p.PtrSize);
	for _, c := range p.Crefs {
		i := m[c.Name];
		c.TypeName = kind[c.Name] == "type";
		f, fok := types[i].(*dwarf.FuncType);
		if c.Context == "call" && !c.TypeName && fok {
			c.FuncType = conv.FuncType(f)
		} else {
			c.Type = conv.Type(types[i])
		}
	}
	p.Typedef = conv.typedef;
}

func concat(a, b []string) []string {
	c := make([]string, len(a)+len(b));
	for i, s := range a {
		c[i] = s
	}
	for i, s := range b {
		c[i+len(a)] = s
	}
	return c;
}

// gccDebug runs gcc -gdwarf-2 over the C program stdin and
// returns the corresponding DWARF data and any messages
// printed to standard error.
func (p *Prog) gccDebug(stdin []byte) (*dwarf.Data, string) {
	machine := "-m32";
	if p.PtrSize == 8 {
		machine = "-m64"
	}

	tmp := "_cgo_.o";
	base := []string{
		"gcc",
		machine,
		"-Wall",	// many warnings
		"-Werror",	// warnings are errors
		"-o" + tmp,	// write object to tmp
		"-gdwarf-2",	// generate DWARF v2 debugging symbols
		"-c",	// do not link
		"-xc",	// input language is C
		"-",	// read input from standard input
	};
	_, stderr, ok := run(stdin, concat(base, p.GccOptions));
	if !ok {
		return nil, string(stderr)
	}

	// Try to parse f as ELF and Mach-O and hope one works.
	var f interface {
		DWARF() (*dwarf.Data, os.Error);
	}
	var err os.Error;
	if f, err = elf.Open(tmp); err != nil {
		if f, err = macho.Open(tmp); err != nil {
			fatal("cannot parse gcc output %s as ELF or Mach-O object", tmp)
		}
	}

	d, err := f.DWARF();
	if err != nil {
		fatal("cannot load DWARF debug information from %s: %s", tmp, err)
	}
	return d, "";
}

// A typeConv is a translator from dwarf types to Go types
// with equivalent memory layout.
type typeConv struct {
	// Cache of already-translated or in-progress types.
	m	map[dwarf.Type]*Type;
	typedef	map[string]ast.Expr;

	// Predeclared types.
	byte					ast.Expr;	// denotes padding
	int8, int16, int32, int64		ast.Expr;
	uint8, uint16, uint32, uint64, uintptr	ast.Expr;
	float32, float64			ast.Expr;
	void					ast.Expr;
	unsafePointer				ast.Expr;
	string					ast.Expr;

	ptrSize	int64;

	tagGen	int;
}

func (c *typeConv) Init(ptrSize int64) {
	c.ptrSize = ptrSize;
	c.m = make(map[dwarf.Type]*Type);
	c.typedef = make(map[string]ast.Expr);
	c.byte = c.Ident("byte");
	c.int8 = c.Ident("int8");
	c.int16 = c.Ident("int16");
	c.int32 = c.Ident("int32");
	c.int64 = c.Ident("int64");
	c.uint8 = c.Ident("uint8");
	c.uint16 = c.Ident("uint16");
	c.uint32 = c.Ident("uint32");
	c.uint64 = c.Ident("uint64");
	c.uintptr = c.Ident("uintptr");
	c.float32 = c.Ident("float32");
	c.float64 = c.Ident("float64");
	c.unsafePointer = c.Ident("unsafe.Pointer");
	c.void = c.Ident("void");
	c.string = c.Ident("string");
}

// base strips away qualifiers and typedefs to get the underlying type
func base(dt dwarf.Type) dwarf.Type {
	for {
		if d, ok := dt.(*dwarf.QualType); ok {
			dt = d.Type;
			continue;
		}
		if d, ok := dt.(*dwarf.TypedefType); ok {
			dt = d.Type;
			continue;
		}
		break;
	}
	return dt;
}

// Map from dwarf text names to aliases we use in package "C".
var cnameMap = map[string]string{
	"long int": "long",
	"long unsigned int": "ulong",
	"unsigned int": "uint",
	"short unsigned int": "ushort",
	"short int": "short",
	"long long int": "longlong",
	"long long unsigned int": "ulonglong",
	"signed char": "schar",
}

// Type returns a *Type with the same memory layout as
// dtype when used as the type of a variable or a struct field.
func (c *typeConv) Type(dtype dwarf.Type) *Type {
	if t, ok := c.m[dtype]; ok {
		if t.Go == nil {
			fatal("type conversion loop at %s", dtype)
		}
		return t;
	}

	t := new(Type);
	t.Size = dtype.Size();
	t.Align = -1;
	t.C = dtype.Common().Name;
	c.m[dtype] = t;
	if t.Size < 0 {
		// Unsized types are [0]byte
		t.Size = 0;
		t.Go = c.Opaque(0);
		if t.C == "" {
			t.C = "void"
		}
		return t;
	}

	switch dt := dtype.(type) {
	default:
		fatal("unexpected type: %s", dtype)

	case *dwarf.AddrType:
		if t.Size != c.ptrSize {
			fatal("unexpected: %d-byte address type - %s", t.Size, dtype)
		}
		t.Go = c.uintptr;
		t.Align = t.Size;

	case *dwarf.ArrayType:
		if dt.StrideBitSize > 0 {
			// Cannot represent bit-sized elements in Go.
			t.Go = c.Opaque(t.Size);
			break;
		}
		gt := &ast.ArrayType{
			Len: c.intExpr(dt.Count),
		};
		t.Go = gt;	// publish before recursive call
		sub := c.Type(dt.Type);
		t.Align = sub.Align;
		gt.Elt = sub.Go;
		t.C = fmt.Sprintf("typeof(%s[%d])", sub.C, dt.Count);

	case *dwarf.CharType:
		if t.Size != 1 {
			fatal("unexpected: %d-byte char type - %s", t.Size, dtype)
		}
		t.Go = c.int8;
		t.Align = 1;

	case *dwarf.EnumType:
		switch t.Size {
		default:
			fatal("unexpected: %d-byte enum type - %s", t.Size, dtype)
		case 1:
			t.Go = c.uint8
		case 2:
			t.Go = c.uint16
		case 4:
			t.Go = c.uint32
		case 8:
			t.Go = c.uint64
		}
		if t.Align = t.Size; t.Align >= c.ptrSize {
			t.Align = c.ptrSize
		}
		t.C = "enum " + dt.EnumName;

	case *dwarf.FloatType:
		switch t.Size {
		default:
			fatal("unexpected: %d-byte float type - %s", t.Size, dtype)
		case 4:
			t.Go = c.float32
		case 8:
			t.Go = c.float64
		}
		if t.Align = t.Size; t.Align >= c.ptrSize {
			t.Align = c.ptrSize
		}

	case *dwarf.FuncType:
		// No attempt at translation: would enable calls
		// directly between worlds, but we need to moderate those.
		t.Go = c.uintptr;
		t.Align = c.ptrSize;

	case *dwarf.IntType:
		if dt.BitSize > 0 {
			fatal("unexpected: %d-bit int type - %s", dt.BitSize, dtype)
		}
		switch t.Size {
		default:
			fatal("unexpected: %d-byte int type - %s", t.Size, dtype)
		case 1:
			t.Go = c.int8
		case 2:
			t.Go = c.int16
		case 4:
			t.Go = c.int32
		case 8:
			t.Go = c.int64
		}
		if t.Align = t.Size; t.Align >= c.ptrSize {
			t.Align = c.ptrSize
		}

	case *dwarf.PtrType:
		t.Align = c.ptrSize;

		// Translate void* as unsafe.Pointer
		if _, ok := base(dt.Type).(*dwarf.VoidType); ok {
			t.Go = c.unsafePointer;
			t.C = "void*";
			break;
		}

		gt := &ast.StarExpr{};
		t.Go = gt;	// publish before recursive call
		sub := c.Type(dt.Type);
		gt.X = sub.Go;
		t.C = sub.C + "*";

	case *dwarf.QualType:
		// Ignore qualifier.
		t = c.Type(dt.Type);
		c.m[dtype] = t;
		return t;

	case *dwarf.StructType:
		// Convert to Go struct, being careful about alignment.
		// Have to give it a name to simulate C "struct foo" references.
		tag := dt.StructName;
		if tag == "" {
			tag = "__" + strconv.Itoa(c.tagGen);
			c.tagGen++;
		} else if t.C == "" {
			t.C = dt.Kind + " " + tag
		}
		name := c.Ident("_C" + dt.Kind + "_" + tag);
		t.Go = name;	// publish before recursive calls
		switch dt.Kind {
		case "union", "class":
			c.typedef[name.Value] = c.Opaque(t.Size);
			if t.C == "" {
				t.C = fmt.Sprintf("typeof(unsigned char[%d])", t.Size)
			}
		case "struct":
			g, csyntax, align := c.Struct(dt);
			if t.C == "" {
				t.C = csyntax
			}
			t.Align = align;
			c.typedef[name.Value] = g;
		}

	case *dwarf.TypedefType:
		// Record typedef for printing.
		if dt.Name == "_GoString_" {
			// Special C name for Go string type.
			// Knows string layout used by compilers: pointer plus length,
			// which rounds up to 2 pointers after alignment.
			t.Go = c.string;
			t.Size = c.ptrSize * 2;
			t.Align = c.ptrSize;
			break;
		}
		name := c.Ident("_C_" + dt.Name);
		t.Go = name;	// publish before recursive call
		sub := c.Type(dt.Type);
		t.Size = sub.Size;
		t.Align = sub.Align;
		if _, ok := c.typedef[name.Value]; !ok {
			c.typedef[name.Value] = sub.Go
		}

	case *dwarf.UcharType:
		if t.Size != 1 {
			fatal("unexpected: %d-byte uchar type - %s", t.Size, dtype)
		}
		t.Go = c.uint8;
		t.Align = 1;

	case *dwarf.UintType:
		if dt.BitSize > 0 {
			fatal("unexpected: %d-bit uint type - %s", dt.BitSize, dtype)
		}
		switch t.Size {
		default:
			fatal("unexpected: %d-byte uint type - %s", t.Size, dtype)
		case 1:
			t.Go = c.uint8
		case 2:
			t.Go = c.uint16
		case 4:
			t.Go = c.uint32
		case 8:
			t.Go = c.uint64
		}
		if t.Align = t.Size; t.Align >= c.ptrSize {
			t.Align = c.ptrSize
		}

	case *dwarf.VoidType:
		t.Go = c.void;
		t.C = "void";
	}

	switch dtype.(type) {
	case *dwarf.AddrType, *dwarf.CharType, *dwarf.IntType, *dwarf.FloatType, *dwarf.UcharType, *dwarf.UintType:
		s := dtype.Common().Name;
		if s != "" {
			if ss, ok := cnameMap[s]; ok {
				s = ss
			}
			s = strings.Join(strings.Split(s, " ", 0), "");	// strip spaces
			name := c.Ident("_C_" + s);
			c.typedef[name.Value] = t.Go;
			t.Go = name;
		}
	}

	if t.C == "" {
		fatal("internal error: did not create C name for %s", dtype)
	}

	return t;
}

// FuncArg returns a Go type with the same memory layout as
// dtype when used as the type of a C function argument.
func (c *typeConv) FuncArg(dtype dwarf.Type) *Type {
	t := c.Type(dtype);
	switch dt := dtype.(type) {
	case *dwarf.ArrayType:
		// Arrays are passed implicitly as pointers in C.
		// In Go, we must be explicit.
		return &Type{
			Size: c.ptrSize,
			Align: c.ptrSize,
			Go: &ast.StarExpr{X: t.Go},
			C: t.C + "*",
		}
	case *dwarf.TypedefType:
		// C has much more relaxed rules than Go for
		// implicit type conversions.  When the parameter
		// is type T defined as *X, simulate a little of the
		// laxness of C by making the argument *X instead of T.
		if ptr, ok := base(dt.Type).(*dwarf.PtrType); ok {
			// Unless the typedef happens to point to void* since
			// Go has special rules around using unsafe.Pointer.
			if _, void := base(ptr.Type).(*dwarf.VoidType); !void {
				return c.Type(ptr)
			}
		}
	}
	return t;
}

// FuncType returns the Go type analogous to dtype.
// There is no guarantee about matching memory layout.
func (c *typeConv) FuncType(dtype *dwarf.FuncType) *FuncType {
	p := make([]*Type, len(dtype.ParamType));
	gp := make([]*ast.Field, len(dtype.ParamType));
	for i, f := range dtype.ParamType {
		// gcc's DWARF generator outputs a single DotDotDotType parameter for
		// function pointers that specify no parameters (e.g. void
		// (*__cgo_0)()).  Treat this special case as void.  This case is
		// invalid according to ISO C anyway (i.e. void (*__cgo_1)(...) is not
		// legal).
		if _, ok := f.(*dwarf.DotDotDotType); ok && i == 0 {
			p, gp = nil, nil;
			break;
		}
		p[i] = c.FuncArg(f);
		gp[i] = &ast.Field{Type: p[i].Go};
	}
	var r *Type;
	var gr []*ast.Field;
	if _, ok := dtype.ReturnType.(*dwarf.VoidType); !ok && dtype.ReturnType != nil {
		r = c.Type(dtype.ReturnType);
		gr = []*ast.Field{&ast.Field{Type: r.Go}};
	}
	return &FuncType{
		Params: p,
		Result: r,
		Go: &ast.FuncType{
			Params: gp,
			Results: gr,
		},
	};
}

// Identifier
func (c *typeConv) Ident(s string) *ast.Ident	{ return &ast.Ident{Value: s} }

// Opaque type of n bytes.
func (c *typeConv) Opaque(n int64) ast.Expr {
	return &ast.ArrayType{
		Len: c.intExpr(n),
		Elt: c.byte,
	}
}

// Expr for integer n.
func (c *typeConv) intExpr(n int64) ast.Expr {
	return &ast.BasicLit{
		Kind: token.INT,
		Value: strings.Bytes(strconv.Itoa64(n)),
	}
}

// Add padding of given size to fld.
func (c *typeConv) pad(fld []*ast.Field, size int64) []*ast.Field {
	n := len(fld);
	fld = fld[0 : n+1];
	fld[n] = &ast.Field{Names: []*ast.Ident{c.Ident("_")}, Type: c.Opaque(size)};
	return fld;
}

// Struct conversion
func (c *typeConv) Struct(dt *dwarf.StructType) (expr *ast.StructType, csyntax string, align int64) {
	csyntax = "struct { ";
	fld := make([]*ast.Field, 0, 2*len(dt.Field)+1);	// enough for padding around every field
	off := int64(0);

	// Mangle struct fields that happen to be named Go keywords into
	// _{keyword}.  Create a map from C ident -> Go ident.  The Go ident will
	// be mangled.  Any existing identifier that already has the same name on
	// the C-side will cause the Go-mangled version to be prefixed with _.
	// (e.g. in a struct with fields '_type' and 'type', the latter would be
	// rendered as '__type' in Go).
	ident := make(map[string]string);
	used := make(map[string]bool);
	for _, f := range dt.Field {
		ident[f.Name] = f.Name;
		used[f.Name] = true;
	}
	for cid, goid := range ident {
		if token.Lookup(strings.Bytes(goid)).IsKeyword() {
			// Avoid keyword
			goid = "_" + goid;

			// Also avoid existing fields
			for _, exist := used[goid]; exist; _, exist = used[goid] {
				goid = "_" + goid
			}

			used[goid] = true;
			ident[cid] = goid;
		}
	}

	for _, f := range dt.Field {
		if f.ByteOffset > off {
			fld = c.pad(fld, f.ByteOffset-off);
			off = f.ByteOffset;
		}
		t := c.Type(f.Type);
		n := len(fld);
		fld = fld[0 : n+1];

		fld[n] = &ast.Field{Names: []*ast.Ident{c.Ident(ident[f.Name])}, Type: t.Go};
		off += t.Size;
		csyntax += t.C + " " + f.Name + "; ";
		if t.Align > align {
			align = t.Align
		}
	}
	if off < dt.ByteSize {
		fld = c.pad(fld, dt.ByteSize-off);
		off = dt.ByteSize;
	}
	if off != dt.ByteSize {
		fatal("struct size calculation error")
	}
	csyntax += "}";
	expr = &ast.StructType{Fields: fld};
	return;
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to webmaster@9p.io.