Parsing statements (skipping expressions).

Signed-off-by: jmug <u.g.a.mariano@gmail.com>
This commit is contained in:
Mariano Uvalle 2025-01-02 19:46:45 -08:00
parent 04dfd62600
commit 577fad2da6
3 changed files with 287 additions and 0 deletions

116
pkg/parser/parser.go Normal file
View file

@ -0,0 +1,116 @@
package parser
import (
"fmt"
"code.jmug.me/jmug/interpreter-in-go/pkg/ast"
"code.jmug.me/jmug/interpreter-in-go/pkg/lexer"
"code.jmug.me/jmug/interpreter-in-go/pkg/token"
)
type Parser struct {
l *lexer.Lexer
errors []string
curToken token.Token
peekToken token.Token
}
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
// TODO: figure out why this can't be done from `parseProgram`
p.nextToken()
p.nextToken()
return p
}
func (p *Parser) ParseProgram() *ast.Program {
program := &ast.Program{}
program.Statements = []ast.Statement{}
for !p.curTokenIs(token.EOF) {
stmt := p.parseStatement()
if stmt != nil {
program.Statements = append(program.Statements, stmt)
}
p.nextToken()
}
return program
}
func (p *Parser) parseStatement() ast.Statement {
switch p.curToken.Type {
case token.LET:
return p.parseLetStatement()
case token.RETURN:
return p.parseReturnStatement()
}
return nil
}
func (p *Parser) parseLetStatement() ast.Statement {
stmt := &ast.LetStatement{Token: p.curToken}
if !p.expectPeek(token.IDENT) {
return nil
}
stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}
if !p.expectPeek(token.ASSIGN) {
return nil
}
// TODO: Skipping until we find the semicolon to avoid parsing the expression.
if !p.curTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
}
func (p *Parser) parseReturnStatement() ast.Statement {
stmt := &ast.ReturnStatement{Token: p.curToken}
p.nextToken()
// TODO: Skipping until we find the semicolon to avoid parsing the expression.
if !p.curTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
}
func (p *Parser) curTokenIs(typ token.TokenType) bool {
return p.curToken.Type == typ
}
func (p *Parser) peekTokenIs(typ token.TokenType) bool {
return p.peekToken.Type == typ
}
// NOTE: I'll leave the name as-is to avoid deviating from the book (maybe a
// rename at the end?), but I think `nextTokenIfPeek` would be a much better
// name for this.
func (p *Parser) expectPeek(typ token.TokenType) bool {
if p.peekTokenIs(typ) {
p.nextToken()
return true
}
p.peekError(typ)
return false
}
func (p *Parser) nextToken() {
p.curToken = p.peekToken
p.peekToken = p.l.NextToken()
}
func (p *Parser) Errors() []string {
return p.errors
}
func (p *Parser) peekError(typ token.TokenType) {
p.errors = append(
p.errors,
fmt.Sprintf(
"expected next token to be %q, got %q instead",
typ,
p.peekToken.Type,
),
)
}

112
pkg/parser/parser_test.go Normal file
View file

@ -0,0 +1,112 @@
package parser
import (
"testing"
"code.jmug.me/jmug/interpreter-in-go/pkg/ast"
"code.jmug.me/jmug/interpreter-in-go/pkg/lexer"
)
func TestLetStatements(t *testing.T) {
input := `
let x = 5;
let y = 10;
let foobar = 838383;
`
l := lexer.New(input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if program == nil {
t.Fatalf("ParseProgram() returned nil")
}
if len(program.Statements) != 3 {
t.Fatalf("program.Statements does not contain 3 statements. got=%d",
len(program.Statements))
}
tests := []struct {
expectedIdentifier string
}{
{"x"},
{"y"},
{"foobar"},
}
for i, tt := range tests {
stmt := program.Statements[i]
if !testLetStatement(t, stmt, tt.expectedIdentifier) {
return
}
}
}
func testLetStatement(t *testing.T, s ast.Statement, name string) bool {
if s.TokenLiteral() != "let" {
t.Errorf("s.TokenLiteral not 'let'. got=%q", s.TokenLiteral())
return false
}
letStmt, ok := s.(*ast.LetStatement)
if !ok {
t.Errorf("s not *ast.LetStatement. got=%T", s)
return false
}
if letStmt.Name.Value != name {
t.Errorf("letStmt.Name.Value not '%s'. got=%s", name, letStmt.Name.Value)
return false
}
if letStmt.Name.TokenLiteral() != name {
t.Errorf("letStmt.Name.TokenLiteral() not '%s'. got=%s",
name, letStmt.Name.TokenLiteral())
return false
}
return true
}
func TestReturnStatements(t *testing.T) {
input := `
return 5;
return 10;
return 993322;
`
l := lexer.New(input)
p := New(l)
program := p.ParseProgram()
checkParserErrors(t, p)
if len(program.Statements) != 3 {
t.Fatalf("program.Statements does not contain 3 statements. got=%d",
len(program.Statements))
}
for _, stmt := range program.Statements {
returnStmt, ok := stmt.(*ast.ReturnStatement)
if !ok {
t.Errorf("stmt not *ast.ReturnStatement. got=%T", stmt)
continue
}
if returnStmt.TokenLiteral() != "return" {
t.Errorf("returnStmt.TokenLiteral not 'return', got %q",
returnStmt.TokenLiteral())
}
}
}
func checkParserErrors(t *testing.T, p *Parser) {
errors := p.Errors()
if len(errors) == 0 {
return
}
t.Errorf("parser has %d errors", len(errors))
for _, msg := range errors {
t.Errorf("parser error: %q", msg)
}
t.FailNow()
}