diff --git a/ast.go b/ast.go index 21d208e8b..fa964eea9 100644 --- a/ast.go +++ b/ast.go @@ -129,16 +129,18 @@ func Parse(line string) interface{} { node = parseVarDecl(line) case "WhileStmt": node = parseWhileStmt(line) + case "NullStmt": + node = nil default: - panic("'" + line + "'") + panic("Unknown node type: '" + line + "'") } return node } func groupsFromRegex(rx, line string) map[string]string { - // We remove tabs and newlines from the regex. This is purely cosmetic - // as the regex input can be quite lone and its nice for the caller to + // We remove tabs and newlines from the regex. This is purely cosmetic, + // as the regex input can be quite long and it's nice for the caller to // be able to format it in a more readable way. fullRegexp := "(?P
[0-9a-fx]+) " + strings.Replace(strings.Replace(rx, "\n", "", -1), "\t", "", -1) diff --git a/ast_test.go b/ast_test.go index bd28b0114..4a408c541 100644 --- a/ast_test.go +++ b/ast_test.go @@ -365,6 +365,24 @@ var nodes = map[string]interface{}{ Referenced: true, Children: []interface{}{}, }, + `0x7f9bc9083d00 line:91:5 'unsigned short'`: &FieldDecl{ + Address: "0x7f9bc9083d00", + Position: "line:91:5, line:97:8", + Position2: "line:91:5", + Name: "", + Type: "unsigned short", + Referenced: false, + Children: []interface{}{}, + }, + `0x30363a0 __val 'int [2]'`: &FieldDecl{ + Address: "0x30363a0", + Position: "col:18, col:29", + Position2: "", + Name: "__val", + Type: "int [2]", + Referenced: false, + Children: []interface{}{}, + }, // FloatingLiteral `0x7febe106f5e8 'double' 1.230000e+00`: &FloatingLiteral{ diff --git a/common.go b/common.go index 1c1ffcbc0..1108e2a5d 100644 --- a/common.go +++ b/common.go @@ -11,6 +11,10 @@ func printLine(out *bytes.Buffer, line string, indent int) { } func renderExpression(node interface{}) []string { + if node == nil { + return []string{"", "unknown54"} + } + if n, ok := node.(ExpressionRenderer); ok { return n.Render() } diff --git a/darwin/common.go b/darwin/common.go index cb966cedf..f427120ac 100644 --- a/darwin/common.go +++ b/darwin/common.go @@ -14,11 +14,11 @@ type _RuneLocale struct { } var _DefaultRuneLocale _RuneLocale = _RuneLocale{ - __runetype: [256]uint32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, }, + __runetype: [256]uint32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}, } func __maskrune(_c C__darwin_ct_rune_t, _f uint32) uint32 { - return _DefaultRuneLocale.__runetype[_c & 0xff] & _f; + return _DefaultRuneLocale.__runetype[_c&0xff] & _f } func __tolower(c C__darwin_ct_rune_t) C__darwin_ct_rune_t { diff --git a/field_decl.go b/field_decl.go index 8eb83d3a7..d8e40e343 100644 --- a/field_decl.go +++ b/field_decl.go @@ -18,9 +18,9 @@ type FieldDecl struct { func parseFieldDecl(line string) *FieldDecl { groups := groupsFromRegex( `<(?P.*)> - (?P [^ ]+)? + (?P col:\d+| line:\d+:\d+)? (?P referenced)? - (?P\w+?) + (?P \w+?)? '(?P.+?)'`, line, ) @@ -29,7 +29,7 @@ func parseFieldDecl(line string) *FieldDecl { Address: groups["address"], Position: groups["position"], Position2: strings.TrimSpace(groups["position2"]), - Name: groups["name"], + Name: strings.TrimSpace(groups["name"]), Type: groups["type"], Referenced: len(groups["referenced"]) > 0, Children: []interface{}{}, @@ -38,7 +38,11 @@ func parseFieldDecl(line string) *FieldDecl { func (n *FieldDecl) Render() []string { fieldType := resolveType(n.Type) - name := strings.Replace(n.Name, "used", "", -1) + name := n.Name + + //if name == "" { + // return []string{"", "unknown71"} + //} // Go does not allow the name of a variable to be called "type". For the // moment I will rename this to avoid the error. @@ -46,11 +50,15 @@ func (n *FieldDecl) Render() []string { name = "type_" } + // It may have a default value. suffix := "" if len(n.Children) > 0 { suffix = fmt.Sprintf(" = %s", renderExpression(n.Children[0])[0]) } + // NULL is a macro that one rendered looks like "(0)" we have to be + // sensitive to catch this as Go would complain that 0 (int) is not + // compatible with the type we are setting it to. if suffix == " = (0)" { suffix = " = nil" } diff --git a/for_stmt.go b/for_stmt.go index 5a7ddfe84..e5b266481 100644 --- a/for_stmt.go +++ b/for_stmt.go @@ -27,13 +27,42 @@ func parseForStmt(line string) *ForStmt { func (n *ForStmt) RenderLine(out *bytes.Buffer, functionName string, indent int, returnType string) { children := n.Children - a := renderExpression(children[0])[0] - b := renderExpression(children[1])[0] - c := renderExpression(children[2])[0] + // There are always 5 children in a ForStmt, for example: + // + // for ( c = 0 ; c < n ; c++ ) { + // doSomething(); + // } + // + // 1. initExpression = BinaryStmt: c = 0 + // 2. Not sure what this is for, but it's always nil. There is a panic + // below in case we discover what it is used for (pun intended). + // 3. conditionalExpression = BinaryStmt: c < n + // 4. stepExpression = BinaryStmt: c++ + // 5. body = CompoundStmt: { CallExpr } - printLine(out, fmt.Sprintf("for %s; %s; %s {", a, b, c), indent) + if len(children) != 5 { + panic(fmt.Sprintf("Expected 5 children in ForStmt, got %#v", children)) + } + + // TODO: The second child of a ForStmt appears to always be null. + // Are there any cases where it is used? + if children[1] != nil { + panic("non-nil child 1 in ForStmt") + } + + init := renderExpression(children[0])[0] + conditional := renderExpression(children[2])[0] + step := renderExpression(children[3])[0] + body := children[4] + + if init == "" && conditional == "" && step == "" { + printLine(out, "for {", indent) + } else { + printLine(out, fmt.Sprintf("for %s; %s; %s {", + init, conditional, step), indent) + } - Render(out, children[3], functionName, indent+1, returnType) + Render(out, body, functionName, indent+1, returnType) printLine(out, "}", indent) } diff --git a/if_stmt.go b/if_stmt.go index 230531a6c..9a986038d 100644 --- a/if_stmt.go +++ b/if_stmt.go @@ -27,14 +27,55 @@ func parseIfStmt(line string) *IfStmt { func (n *IfStmt) RenderLine(out *bytes.Buffer, functionName string, indent int, returnType string) { children := n.Children - e := renderExpression(children[0]) - printLine(out, fmt.Sprintf("if %s {", cast(e[0], e[1], "bool")), indent) + // There is always 4 or 5 children in an IfStmt. For example: + // + // if (i == 0) { + // return 0; + // } else { + // return 1; + // } + // + // 1. Not sure what this is for. This gets removed. + // 2. Not sure what this is for. + // 3. conditional = BinaryOperator: i == 0 + // 4. body = CompoundStmt: { return 0; } + // 5. elseBody = CompoundStmt: { return 1; } + // + // elseBody will be nil if there is no else clause. - Render(out, children[1], functionName, indent+1, returnType) + // On linux I have seen only 4 children for an IfStmt with the same + // definitions above, but missing the first argument. Since we don't + // know what the first argument is for anyway we will just remove it on + // Mac if necessary. + if len(children) == 5 && children[0] != nil { + panic("non-nil child 0 in ForStmt") + } + if len(children) == 5 { + children = children[1:] + } + + // From here on there must be 4 children. + if len(children) != 4 { + panic(fmt.Sprintf("Expected 4 children in IfStmt, got %#v", children)) + } + + // Maybe we will discover what the nil value is? + if children[0] != nil { + panic("non-nil child 0 in ForStmt") + } + + conditional := renderExpression(children[1]) + + // The condition in Go must always be a bool. + boolCondition := cast(conditional[0], conditional[1], "bool") + + printLine(out, fmt.Sprintf("if %s {", boolCondition), indent) + + Render(out, children[2], functionName, indent+1, returnType) - if len(children) > 2 { + if children[3] != nil { printLine(out, "} else {", indent) - Render(out, children[2], functionName, indent+1, returnType) + Render(out, children[3], functionName, indent+1, returnType) } printLine(out, "}", indent) diff --git a/main.go b/main.go index 62c12db17..5a4e5f72b 100644 --- a/main.go +++ b/main.go @@ -2,6 +2,7 @@ package main import ( "bytes" + "flag" "fmt" "io/ioutil" "os" @@ -11,6 +12,10 @@ import ( "strings" ) +var ( + printAst = flag.Bool("print-ast", false, "Print AST before translated Go code.") +) + func readAST(data []byte) []string { uncolored := regexp.MustCompile(`\x1b\[[\d;]+m`).ReplaceAll(data, []byte{}) return strings.Split(string(uncolored), "\n") @@ -23,15 +28,14 @@ func convertLinesToNodes(lines []string) []interface{} { continue } - // This will need to be handled more gracefully... I'm not even - // sure what this means? - if strings.Index(line, "<<>>") >= 0 { - continue - } + // It is tempting to discard null AST nodes, but these may + // have semantic importance: for example, they represent omitted + // for-loop conditions, as in for(;;). + line = strings.Replace(line, "<<>>", "NullStmt", 1) indentAndType := regexp.MustCompile("^([|\\- `]*)(\\w+)").FindStringSubmatch(line) if len(indentAndType) == 0 { - panic(fmt.Sprintf("Can not understand line '%s'", line)) + panic(fmt.Sprintf("Cannot understand line '%s'", line)) } offset := len(indentAndType[1]) @@ -125,7 +129,7 @@ func Start(args []string) string { } // 1. Compile it first (checking for errors) - cFilePath := args[1] + cFilePath := args[0] // 2. Preprocess pp, err := exec.Command("clang", "-E", cFilePath).Output() @@ -140,6 +144,12 @@ func Start(args []string) string { Check(err) lines := readAST(ast_pp) + if *printAst { + for _, l := range lines { + fmt.Println(l) + } + fmt.Println() + } nodes := convertLinesToNodes(lines) tree := buildTree(nodes, 0) @@ -168,5 +178,16 @@ func Start(args []string) string { } func main() { - fmt.Print(Start(os.Args)) + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: %s \n", os.Args[0]) + flag.PrintDefaults() + } + flag.Parse() + + if flag.NArg() < 1 { + flag.Usage() + os.Exit(1) + } + + fmt.Print(Start(flag.Args())) } diff --git a/main_test.go b/main_test.go index d810f0aed..433d536aa 100644 --- a/main_test.go +++ b/main_test.go @@ -17,6 +17,6 @@ func TestIntegrationScripts(t *testing.T) { } for _, file := range files { - Start([]string{"", file}) + Start([]string{file}) } } diff --git a/noarch/functions.go b/noarch/functions.go index f3080a450..7066b794e 100644 --- a/noarch/functions.go +++ b/noarch/functions.go @@ -4,41 +4,41 @@ package noarch type __builtin_va_list int64 func BoolToInt(x bool) int { - if x { - return 1 - } + if x { + return 1 + } - return 0 + return 0 } func __bool_to_uint32(x bool) int { - if x { - return 1 - } + if x { + return 1 + } - return 0 + return 0 } func __not_uint32(x uint32) uint32 { - if x == 0 { - return 1 - } + if x == 0 { + return 1 + } - return 0 + return 0 } func NotInt(x int) int { - if x == 0 { - return 1 - } + if x == 0 { + return 1 + } - return 0 + return 0 } -func Ternary(a bool, b, c func () interface{}) interface{} { - if a { - return b() - } +func Ternary(a bool, b, c func() interface{}) interface{} { + if a { + return b() + } - return c() + return c() } diff --git a/tests/misc/for.c b/tests/misc/for.c new file mode 100644 index 000000000..a4e53faeb --- /dev/null +++ b/tests/misc/for.c @@ -0,0 +1,28 @@ +#include + +int main() +{ + int i = 0; + + // Missing init + for (; i < 10; i++) + printf("%d\n", i); + + // CompountStmt + for (i = 0; i < 10; i++) { + printf("%d\n", i); + } + + // Not CompoundStmt + for (i = 0; i < 10; i++) + printf("%d\n", i); + + // Infinite loop + int j = 0; + for (;;) { + printf("infinite loop\n"); + j++; + if (j > 10) + break; + } +} diff --git a/tests/misc/if.c b/tests/misc/if.c index f9be0147d..dc32f437b 100644 --- a/tests/misc/if.c +++ b/tests/misc/if.c @@ -3,9 +3,16 @@ int main() { int x = 1; - + + // Without else if ( x == 1 ) printf("x is equal to one.\n"); + + // With else + if ( x != 1 ) + printf("x is not equal to one.\n"); + else + printf("x is equal to one.\n"); return 0; } diff --git a/while_stmt.go b/while_stmt.go index f9befaac6..86e5a676b 100644 --- a/while_stmt.go +++ b/while_stmt.go @@ -25,7 +25,9 @@ func parseWhileStmt(line string) *WhileStmt { } func (n *WhileStmt) RenderLine(out *bytes.Buffer, functionName string, indent int, returnType string) { - children := n.Children + // TODO: The first child of a WhileStmt appears to always be null. + // Are there any cases where it is used? + children := n.Children[1:] e := renderExpression(children[0]) printLine(out, fmt.Sprintf("for %s {", cast(e[0], e[1], "bool")), indent)