diff --git a/pkg/ast/ast.go b/pkg/ast/ast.go index 98ca5d0..f60bd7b 100644 --- a/pkg/ast/ast.go +++ b/pkg/ast/ast.go @@ -1,9 +1,14 @@ package ast -import "code.jmug.me/jmug/interpreter-in-go/pkg/token" +import ( + "bytes" + + "code.jmug.me/jmug/interpreter-in-go/pkg/token" +) type Node interface { TokenLiteral() string + String() string } type Statement interface { @@ -27,6 +32,14 @@ func (p *Program) TokenLiteral() string { return "" } +func (p *Program) String() string { + var out bytes.Buffer + for _, stmt := range p.Statements { + out.WriteString(stmt.String()) + } + return out.String() +} + type LetStatement struct { Token token.Token // TODO: This is a little redundant, figure out if I can get rid of it. Name *Identifier @@ -37,6 +50,16 @@ func (ls *LetStatement) statementNode() {} func (ls *LetStatement) TokenLiteral() string { return ls.Token.Literal } +func (ls *LetStatement) String() string { + var out bytes.Buffer + out.WriteString(ls.TokenLiteral() + " ") + out.WriteString(ls.Name.String() + " = ") + if ls.Value != nil { + out.WriteString(ls.Value.String()) + } + out.WriteString(";") + return out.String() +} type ReturnStatement struct { Token token.Token // TODO: This is a little redundant, figure out if I can get rid of it. @@ -47,7 +70,40 @@ func (rs *ReturnStatement) statementNode() {} func (rs *ReturnStatement) TokenLiteral() string { return rs.Token.Literal } +func (rs *ReturnStatement) String() string { + var out bytes.Buffer + out.WriteString(rs.TokenLiteral()) + if rs.ReturnValue != nil { + out.WriteString(" " + rs.ReturnValue.String()) + } + out.WriteString(";") + return out.String() +} +// ExpressionStatement is a simple wrapper of an expression in a statement +// This is common in scripting languages and allows you to have a source line +// that is solely an expression, think of the Python REPL and how you can +// type `1 + 1` and get a result. +type ExpressionStatement struct { + Token token.Token // The first token in the expression. + Expression Expression +} + +func (es *ExpressionStatement) statementNode() {} +func (es *ExpressionStatement) TokenLiteral() string { + return es.Token.Literal +} +func (es *ExpressionStatement) String() string { + if es.Expression != nil { + return es.Expression.String() + } + return "" +} + +// Identifier is treated as an expression because in certain +// circumstances they can return values (think `let some = other` where `other` +// is actually an expression returning a value) and this makes them easier to +// handle (according to the author). type Identifier struct { Token token.Token Value string @@ -57,3 +113,33 @@ func (i *Identifier) expressionNode() {} func (i *Identifier) TokenLiteral() string { return i.Token.Literal } +func (i *Identifier) String() string { + return i.Value +} + +type IntegerLiteral struct { + Token token.Token + Value int64 +} + +func (il *IntegerLiteral) expressionNode() {} +func (il *IntegerLiteral) TokenLiteral() string { + return il.Token.Literal +} +func (il *IntegerLiteral) String() string { + return il.Token.Literal +} + +type PrefixExpression struct { + Token token.Token + Operator string + Right Expression +} + +func (pe *PrefixExpression) expressionNode() {} +func (pe *PrefixExpression) TokenLiteral() string { + return pe.Token.Literal +} +func (pe *PrefixExpression) String() string { + return "(" + pe.Operator + pe.Right.String() + ")" +} diff --git a/pkg/ast/ast_test.go b/pkg/ast/ast_test.go new file mode 100644 index 0000000..3912b37 --- /dev/null +++ b/pkg/ast/ast_test.go @@ -0,0 +1,29 @@ +package ast + +import ( + "testing" + + "code.jmug.me/jmug/interpreter-in-go/pkg/token" +) + +func TestString(t *testing.T) { + program := &Program{ + Statements: []Statement{ + &LetStatement{ + Token: token.Token{Type: token.LET, Literal: "let"}, + Name: &Identifier{ + Token: token.Token{Type: token.IDENT, Literal: "myVar"}, + Value: "myVar", + }, + Value: &Identifier{ + Token: token.Token{Type: token.IDENT, Literal: "anotherVar"}, + Value: "anotherVar", + }, + }, + }, + } + + if program.String() != "let myVar = anotherVar;" { + t.Errorf("program.String() wrong. got=%q", program.String()) + } +} diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go index 4292fd8..a96b23a 100644 --- a/pkg/parser/parser.go +++ b/pkg/parser/parser.go @@ -2,24 +2,38 @@ package parser import ( "fmt" + "strconv" "code.jmug.me/jmug/interpreter-in-go/pkg/ast" "code.jmug.me/jmug/interpreter-in-go/pkg/lexer" "code.jmug.me/jmug/interpreter-in-go/pkg/token" ) +type ( + prefixParseFn func() ast.Expression + infixParseFn func(ast.Expression) ast.Expression +) + type Parser struct { - l *lexer.Lexer - errors []string - curToken token.Token - peekToken token.Token + l *lexer.Lexer + errors []string + curToken token.Token + peekToken token.Token + prefixParseFns map[token.TokenType]prefixParseFn + infixParseFns map[token.TokenType]infixParseFn } func New(l *lexer.Lexer) *Parser { p := &Parser{ - l: l, - errors: []string{}, + l: l, + errors: []string{}, + prefixParseFns: map[token.TokenType]prefixParseFn{}, + infixParseFns: map[token.TokenType]infixParseFn{}, } + p.registerPrefix(token.IDENT, p.parseIdentifier) + p.registerPrefix(token.INT, p.parseIntegerLiteral) + p.registerPrefix(token.MINUS, p.parsePrefixExpression) + p.registerPrefix(token.BANG, p.parsePrefixExpression) // TODO: figure out why this can't be done from `parseProgram` p.nextToken() p.nextToken() @@ -34,6 +48,9 @@ func (p *Parser) ParseProgram() *ast.Program { if stmt != nil { program.Statements = append(program.Statements, stmt) } + // NOTE: For now, this is not only eating the semicolon, it is also + // eating every and all tokens until parse statement finds something + // it deems valid. p.nextToken() } return program @@ -46,7 +63,7 @@ func (p *Parser) parseStatement() ast.Statement { case token.RETURN: return p.parseReturnStatement() } - return nil + return p.parseExpressionStatement() } func (p *Parser) parseLetStatement() ast.Statement { @@ -59,7 +76,7 @@ func (p *Parser) parseLetStatement() ast.Statement { return nil } // TODO: Skipping until we find the semicolon to avoid parsing the expression. - if !p.curTokenIs(token.SEMICOLON) { + for !p.curTokenIs(token.SEMICOLON) { p.nextToken() } return stmt @@ -69,12 +86,61 @@ func (p *Parser) parseReturnStatement() ast.Statement { stmt := &ast.ReturnStatement{Token: p.curToken} p.nextToken() // TODO: Skipping until we find the semicolon to avoid parsing the expression. - if !p.curTokenIs(token.SEMICOLON) { + for !p.curTokenIs(token.SEMICOLON) { p.nextToken() } return stmt } +func (p *Parser) parseExpressionStatement() ast.Statement { + stmt := &ast.ExpressionStatement{Token: p.curToken} + stmt.Expression = p.parseExpression(LOWEST) + // The semicolon is optional for expression statements so they're easier + // to type on the REPL. NOTE: It is weird that the last token parsed by + // parseExpression does not get consumed. + if p.peekTokenIs(token.SEMICOLON) { + p.nextToken() + } + return stmt +} + +func (p *Parser) parseExpression(precedence int) ast.Expression { + // TODO: Could this be replaced with an `ok` check? + prefix := p.prefixParseFns[p.curToken.Type] + if prefix == nil { + p.noPrefixParseFnError(p.curToken.Type) + return nil + } + leftExpr := prefix() + return leftExpr + +} + +func (p *Parser) parseIdentifier() ast.Expression { + return &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal} +} + +func (p *Parser) parseIntegerLiteral() ast.Expression { + exp := &ast.IntegerLiteral{Token: p.curToken} + literal, err := strconv.ParseInt(p.curToken.Literal, 0, 64) + if err != nil { + p.errors = append(p.errors, fmt.Sprintf("could not parse %q as an integer", p.curToken.Literal)) + return nil + } + exp.Value = literal + return exp +} + +func (p *Parser) parsePrefixExpression() ast.Expression { + exp := &ast.PrefixExpression{ + Token: p.curToken, + Operator: p.curToken.Literal, + } + p.nextToken() + exp.Right = p.parseExpression(PREFIX) + return exp +} + func (p *Parser) curTokenIs(typ token.TokenType) bool { return p.curToken.Type == typ } @@ -114,3 +180,18 @@ func (p *Parser) peekError(typ token.TokenType) { ), ) } + +func (p *Parser) noPrefixParseFnError(t token.TokenType) { + p.errors = append( + p.errors, + fmt.Sprintf("no prefix parse function found for %q", t), + ) +} + +func (p *Parser) registerPrefix(typ token.TokenType, fn prefixParseFn) { + p.prefixParseFns[typ] = fn +} + +func (p *Parser) registerInfix(typ token.TokenType, fn infixParseFn) { + p.infixParseFns[typ] = fn +} diff --git a/pkg/parser/parser_test.go b/pkg/parser/parser_test.go index eb30679..2e6fbdf 100644 --- a/pkg/parser/parser_test.go +++ b/pkg/parser/parser_test.go @@ -1,6 +1,7 @@ package parser import ( + "fmt" "testing" "code.jmug.me/jmug/interpreter-in-go/pkg/ast" @@ -98,6 +99,128 @@ return 993322; } } +func TestIdentifierExpressions(t *testing.T) { + input := "foobar;" + l := lexer.New(input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + if len(program.Statements) != 1 { + t.Fatalf("program has not enough statements. got=%d", + len(program.Statements)) + } + stmt, ok := program.Statements[0].(*ast.ExpressionStatement) + if !ok { + t.Fatalf("program.Statements[0] is not ast.ExpressionStatement. got=%T", + program.Statements[0]) + } + + ident, ok := stmt.Expression.(*ast.Identifier) + if !ok { + t.Fatalf("exp not *ast.Identifier. got=%T", stmt.Expression) + } + if ident.Value != "foobar" { + t.Errorf("ident.Value not %s. got=%s", "foobar", ident.Value) + } + if ident.TokenLiteral() != "foobar" { + t.Errorf("ident.TokenLiteral not %s. got=%s", "foobar", + ident.TokenLiteral()) + } +} + +func TestIntegerLiteralExpression(t *testing.T) { + input := "5;" + + l := lexer.New(input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("program has not enough statements. got=%d", + len(program.Statements)) + } + stmt, ok := program.Statements[0].(*ast.ExpressionStatement) + if !ok { + t.Fatalf("program.Statements[0] is not ast.ExpressionStatement. got=%T", + program.Statements[0]) + } + + literal, ok := stmt.Expression.(*ast.IntegerLiteral) + if !ok { + t.Fatalf("exp not *ast.IntegerLiteral. got=%T", stmt.Expression) + } + if literal.Value != 5 { + t.Errorf("literal.Value not %d. got=%d", 5, literal.Value) + } + if literal.TokenLiteral() != "5" { + t.Errorf("literal.TokenLiteral not %s. got=%s", "5", + literal.TokenLiteral()) + } +} + +func TestParsingPrefixExpressions(t *testing.T) { + prefixTests := []struct { + input string + operator string + integerValue int64 + }{ + {"!5;", "!", 5}, + {"-15;", "-", 15}, + } + + for _, tt := range prefixTests { + l := lexer.New(tt.input) + p := New(l) + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 1 { + t.Fatalf("program.Statements does not contain %d statements. got=%d\n", + 1, len(program.Statements)) + } + + stmt, ok := program.Statements[0].(*ast.ExpressionStatement) + if !ok { + t.Fatalf("program.Statements[0] is not ast.ExpressionStatement. got=%T", + program.Statements[0]) + } + + exp, ok := stmt.Expression.(*ast.PrefixExpression) + if !ok { + t.Fatalf("stmt is not ast.PrefixExpression. got=%T", stmt.Expression) + } + if exp.Operator != tt.operator { + t.Fatalf("exp.Operator is not '%s'. got=%s", + tt.operator, exp.Operator) + } + if !testIntegerLiteral(t, exp.Right, tt.integerValue) { + return + } + } +} + +func testIntegerLiteral(t *testing.T, il ast.Expression, value int64) bool { + integ, ok := il.(*ast.IntegerLiteral) + if !ok { + t.Errorf("il not *ast.IntegerLiteral. got=%T", il) + return false + } + + if integ.Value != value { + t.Errorf("integ.Value not %d. got=%d", value, integ.Value) + return false + } + + if integ.TokenLiteral() != fmt.Sprintf("%d", value) { + t.Errorf("integ.TokenLiteral not %d. got=%s", value, + integ.TokenLiteral()) + return false + } + + return true +} + func checkParserErrors(t *testing.T, p *Parser) { errors := p.Errors() if len(errors) == 0 { diff --git a/pkg/parser/precedence.go b/pkg/parser/precedence.go new file mode 100644 index 0000000..a9e935f --- /dev/null +++ b/pkg/parser/precedence.go @@ -0,0 +1,12 @@ +package parser + +const ( + _ int = iota + LOWEST + EQUALS // == + LESSGREATER // > or < + SUM // + + PRODUCT // * + PREFIX // -X or !X + CALL // myFunction(X) +)