Parsing identifiers, integer literals and prefix expressions (all as expression statements)

Signed-off-by: jmug <u.g.a.mariano@gmail.com>
This commit is contained in:
Mariano Uvalle 2025-01-03 19:38:03 -08:00
parent 577fad2da6
commit f286a88039
5 changed files with 341 additions and 10 deletions

View file

@ -2,24 +2,38 @@ package parser
import (
"fmt"
"strconv"
"code.jmug.me/jmug/interpreter-in-go/pkg/ast"
"code.jmug.me/jmug/interpreter-in-go/pkg/lexer"
"code.jmug.me/jmug/interpreter-in-go/pkg/token"
)
type (
prefixParseFn func() ast.Expression
infixParseFn func(ast.Expression) ast.Expression
)
type Parser struct {
l *lexer.Lexer
errors []string
curToken token.Token
peekToken token.Token
l *lexer.Lexer
errors []string
curToken token.Token
peekToken token.Token
prefixParseFns map[token.TokenType]prefixParseFn
infixParseFns map[token.TokenType]infixParseFn
}
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
l: l,
errors: []string{},
prefixParseFns: map[token.TokenType]prefixParseFn{},
infixParseFns: map[token.TokenType]infixParseFn{},
}
p.registerPrefix(token.IDENT, p.parseIdentifier)
p.registerPrefix(token.INT, p.parseIntegerLiteral)
p.registerPrefix(token.MINUS, p.parsePrefixExpression)
p.registerPrefix(token.BANG, p.parsePrefixExpression)
// TODO: figure out why this can't be done from `parseProgram`
p.nextToken()
p.nextToken()
@ -34,6 +48,9 @@ func (p *Parser) ParseProgram() *ast.Program {
if stmt != nil {
program.Statements = append(program.Statements, stmt)
}
// NOTE: For now, this is not only eating the semicolon, it is also
// eating every and all tokens until parse statement finds something
// it deems valid.
p.nextToken()
}
return program
@ -46,7 +63,7 @@ func (p *Parser) parseStatement() ast.Statement {
case token.RETURN:
return p.parseReturnStatement()
}
return nil
return p.parseExpressionStatement()
}
func (p *Parser) parseLetStatement() ast.Statement {
@ -59,7 +76,7 @@ func (p *Parser) parseLetStatement() ast.Statement {
return nil
}
// TODO: Skipping until we find the semicolon to avoid parsing the expression.
if !p.curTokenIs(token.SEMICOLON) {
for !p.curTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
@ -69,12 +86,61 @@ func (p *Parser) parseReturnStatement() ast.Statement {
stmt := &ast.ReturnStatement{Token: p.curToken}
p.nextToken()
// TODO: Skipping until we find the semicolon to avoid parsing the expression.
if !p.curTokenIs(token.SEMICOLON) {
for !p.curTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
}
func (p *Parser) parseExpressionStatement() ast.Statement {
stmt := &ast.ExpressionStatement{Token: p.curToken}
stmt.Expression = p.parseExpression(LOWEST)
// The semicolon is optional for expression statements so they're easier
// to type on the REPL. NOTE: It is weird that the last token parsed by
// parseExpression does not get consumed.
if p.peekTokenIs(token.SEMICOLON) {
p.nextToken()
}
return stmt
}
func (p *Parser) parseExpression(precedence int) ast.Expression {
// TODO: Could this be replaced with an `ok` check?
prefix := p.prefixParseFns[p.curToken.Type]
if prefix == nil {
p.noPrefixParseFnError(p.curToken.Type)
return nil
}
leftExpr := prefix()
return leftExpr
}
func (p *Parser) parseIdentifier() ast.Expression {
return &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}
}
func (p *Parser) parseIntegerLiteral() ast.Expression {
exp := &ast.IntegerLiteral{Token: p.curToken}
literal, err := strconv.ParseInt(p.curToken.Literal, 0, 64)
if err != nil {
p.errors = append(p.errors, fmt.Sprintf("could not parse %q as an integer", p.curToken.Literal))
return nil
}
exp.Value = literal
return exp
}
func (p *Parser) parsePrefixExpression() ast.Expression {
exp := &ast.PrefixExpression{
Token: p.curToken,
Operator: p.curToken.Literal,
}
p.nextToken()
exp.Right = p.parseExpression(PREFIX)
return exp
}
func (p *Parser) curTokenIs(typ token.TokenType) bool {
return p.curToken.Type == typ
}
@ -114,3 +180,18 @@ func (p *Parser) peekError(typ token.TokenType) {
),
)
}
func (p *Parser) noPrefixParseFnError(t token.TokenType) {
p.errors = append(
p.errors,
fmt.Sprintf("no prefix parse function found for %q", t),
)
}
func (p *Parser) registerPrefix(typ token.TokenType, fn prefixParseFn) {
p.prefixParseFns[typ] = fn
}
func (p *Parser) registerInfix(typ token.TokenType, fn infixParseFn) {
p.infixParseFns[typ] = fn
}