From 577fad2da680bf2416761e1fc5edb2a01c72622b Mon Sep 17 00:00:00 2001 From: jmug Date: Thu, 2 Jan 2025 19:46:45 -0800 Subject: [PATCH] Parsing statements (skipping expressions). Signed-off-by: jmug --- pkg/ast/ast.go | 59 +++++++++++++++++++ pkg/parser/parser.go | 116 ++++++++++++++++++++++++++++++++++++++ pkg/parser/parser_test.go | 112 ++++++++++++++++++++++++++++++++++++ 3 files changed, 287 insertions(+) create mode 100644 pkg/ast/ast.go create mode 100644 pkg/parser/parser.go create mode 100644 pkg/parser/parser_test.go diff --git a/pkg/ast/ast.go b/pkg/ast/ast.go new file mode 100644 index 0000000..98ca5d0 --- /dev/null +++ b/pkg/ast/ast.go @@ -0,0 +1,59 @@ +package ast + +import "code.jmug.me/jmug/interpreter-in-go/pkg/token" + +type Node interface { + TokenLiteral() string +} + +type Statement interface { + Node + statementNode() +} + +type Expression interface { + Node + expressionNode() +} + +type Program struct { + Statements []Statement +} + +func (p *Program) TokenLiteral() string { + if len(p.Statements) > 0 { + return p.Statements[0].TokenLiteral() + } + return "" +} + +type LetStatement struct { + Token token.Token // TODO: This is a little redundant, figure out if I can get rid of it. + Name *Identifier + Value Expression +} + +func (ls *LetStatement) statementNode() {} +func (ls *LetStatement) TokenLiteral() string { + return ls.Token.Literal +} + +type ReturnStatement struct { + Token token.Token // TODO: This is a little redundant, figure out if I can get rid of it. + ReturnValue Expression +} + +func (rs *ReturnStatement) statementNode() {} +func (rs *ReturnStatement) TokenLiteral() string { + return rs.Token.Literal +} + +type Identifier struct { + Token token.Token + Value string +} + +func (i *Identifier) expressionNode() {} +func (i *Identifier) TokenLiteral() string { + return i.Token.Literal +} diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go new file mode 100644 index 0000000..4292fd8 --- /dev/null +++ b/pkg/parser/parser.go @@ -0,0 +1,116 @@ +package parser + +import ( + "fmt" + + "code.jmug.me/jmug/interpreter-in-go/pkg/ast" + "code.jmug.me/jmug/interpreter-in-go/pkg/lexer" + "code.jmug.me/jmug/interpreter-in-go/pkg/token" +) + +type Parser struct { + l *lexer.Lexer + errors []string + curToken token.Token + peekToken token.Token +} + +func New(l *lexer.Lexer) *Parser { + p := &Parser{ + l: l, + errors: []string{}, + } + // TODO: figure out why this can't be done from `parseProgram` + p.nextToken() + p.nextToken() + return p +} + +func (p *Parser) ParseProgram() *ast.Program { + program := &ast.Program{} + program.Statements = []ast.Statement{} + for !p.curTokenIs(token.EOF) { + stmt := p.parseStatement() + if stmt != nil { + program.Statements = append(program.Statements, stmt) + } + p.nextToken() + } + return program +} + +func (p *Parser) parseStatement() ast.Statement { + switch p.curToken.Type { + case token.LET: + return p.parseLetStatement() + case token.RETURN: + return p.parseReturnStatement() + } + return nil +} + +func (p *Parser) parseLetStatement() ast.Statement { + stmt := &ast.LetStatement{Token: p.curToken} + if !p.expectPeek(token.IDENT) { + return nil + } + stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal} + if !p.expectPeek(token.ASSIGN) { + return nil + } + // TODO: Skipping until we find the semicolon to avoid parsing the expression. + if !p.curTokenIs(token.SEMICOLON) { + p.nextToken() + } + return stmt +} + +func (p *Parser) parseReturnStatement() ast.Statement { + stmt := &ast.ReturnStatement{Token: p.curToken} + p.nextToken() + // TODO: Skipping until we find the semicolon to avoid parsing the expression. + if !p.curTokenIs(token.SEMICOLON) { + p.nextToken() + } + return stmt +} + +func (p *Parser) curTokenIs(typ token.TokenType) bool { + return p.curToken.Type == typ +} + +func (p *Parser) peekTokenIs(typ token.TokenType) bool { + return p.peekToken.Type == typ +} + +// NOTE: I'll leave the name as-is to avoid deviating from the book (maybe a +// rename at the end?), but I think `nextTokenIfPeek` would be a much better +// name for this. +func (p *Parser) expectPeek(typ token.TokenType) bool { + if p.peekTokenIs(typ) { + p.nextToken() + return true + } + p.peekError(typ) + return false +} + +func (p *Parser) nextToken() { + p.curToken = p.peekToken + p.peekToken = p.l.NextToken() +} + +func (p *Parser) Errors() []string { + return p.errors +} + +func (p *Parser) peekError(typ token.TokenType) { + p.errors = append( + p.errors, + fmt.Sprintf( + "expected next token to be %q, got %q instead", + typ, + p.peekToken.Type, + ), + ) +} diff --git a/pkg/parser/parser_test.go b/pkg/parser/parser_test.go new file mode 100644 index 0000000..eb30679 --- /dev/null +++ b/pkg/parser/parser_test.go @@ -0,0 +1,112 @@ +package parser + +import ( + "testing" + + "code.jmug.me/jmug/interpreter-in-go/pkg/ast" + "code.jmug.me/jmug/interpreter-in-go/pkg/lexer" +) + +func TestLetStatements(t *testing.T) { + input := ` +let x = 5; +let y = 10; +let foobar = 838383; + ` + l := lexer.New(input) + p := New(l) + + program := p.ParseProgram() + checkParserErrors(t, p) + if program == nil { + t.Fatalf("ParseProgram() returned nil") + } + if len(program.Statements) != 3 { + t.Fatalf("program.Statements does not contain 3 statements. got=%d", + len(program.Statements)) + } + + tests := []struct { + expectedIdentifier string + }{ + {"x"}, + {"y"}, + {"foobar"}, + } + + for i, tt := range tests { + stmt := program.Statements[i] + if !testLetStatement(t, stmt, tt.expectedIdentifier) { + return + } + } +} + +func testLetStatement(t *testing.T, s ast.Statement, name string) bool { + if s.TokenLiteral() != "let" { + t.Errorf("s.TokenLiteral not 'let'. got=%q", s.TokenLiteral()) + return false + } + + letStmt, ok := s.(*ast.LetStatement) + if !ok { + t.Errorf("s not *ast.LetStatement. got=%T", s) + return false + } + + if letStmt.Name.Value != name { + t.Errorf("letStmt.Name.Value not '%s'. got=%s", name, letStmt.Name.Value) + return false + } + + if letStmt.Name.TokenLiteral() != name { + t.Errorf("letStmt.Name.TokenLiteral() not '%s'. got=%s", + name, letStmt.Name.TokenLiteral()) + return false + } + + return true +} + +func TestReturnStatements(t *testing.T) { + input := ` +return 5; +return 10; +return 993322; +` + l := lexer.New(input) + p := New(l) + + program := p.ParseProgram() + checkParserErrors(t, p) + + if len(program.Statements) != 3 { + t.Fatalf("program.Statements does not contain 3 statements. got=%d", + len(program.Statements)) + } + + for _, stmt := range program.Statements { + returnStmt, ok := stmt.(*ast.ReturnStatement) + if !ok { + t.Errorf("stmt not *ast.ReturnStatement. got=%T", stmt) + continue + } + if returnStmt.TokenLiteral() != "return" { + t.Errorf("returnStmt.TokenLiteral not 'return', got %q", + returnStmt.TokenLiteral()) + } + } +} + +func checkParserErrors(t *testing.T, p *Parser) { + errors := p.Errors() + if len(errors) == 0 { + return + } + + t.Errorf("parser has %d errors", len(errors)) + for _, msg := range errors { + t.Errorf("parser error: %q", msg) + } + t.FailNow() +}