2025-01-02 19:46:45 -08:00
|
|
|
package parser
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"fmt"
|
2025-01-03 19:38:03 -08:00
|
|
|
"strconv"
|
2025-01-02 19:46:45 -08:00
|
|
|
|
|
|
|
|
"code.jmug.me/jmug/interpreter-in-go/pkg/ast"
|
|
|
|
|
"code.jmug.me/jmug/interpreter-in-go/pkg/lexer"
|
|
|
|
|
"code.jmug.me/jmug/interpreter-in-go/pkg/token"
|
|
|
|
|
)
|
|
|
|
|
|
2025-01-03 19:38:03 -08:00
|
|
|
type (
|
|
|
|
|
prefixParseFn func() ast.Expression
|
|
|
|
|
infixParseFn func(ast.Expression) ast.Expression
|
|
|
|
|
)
|
|
|
|
|
|
2025-01-02 19:46:45 -08:00
|
|
|
type Parser struct {
|
2025-01-03 19:38:03 -08:00
|
|
|
l *lexer.Lexer
|
|
|
|
|
errors []string
|
|
|
|
|
curToken token.Token
|
|
|
|
|
peekToken token.Token
|
|
|
|
|
prefixParseFns map[token.TokenType]prefixParseFn
|
|
|
|
|
infixParseFns map[token.TokenType]infixParseFn
|
2025-01-02 19:46:45 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func New(l *lexer.Lexer) *Parser {
|
|
|
|
|
p := &Parser{
|
2025-01-03 19:38:03 -08:00
|
|
|
l: l,
|
|
|
|
|
errors: []string{},
|
|
|
|
|
prefixParseFns: map[token.TokenType]prefixParseFn{},
|
|
|
|
|
infixParseFns: map[token.TokenType]infixParseFn{},
|
2025-01-02 19:46:45 -08:00
|
|
|
}
|
2025-01-04 17:56:10 -08:00
|
|
|
// Prefix registrations
|
2025-01-03 19:38:03 -08:00
|
|
|
p.registerPrefix(token.IDENT, p.parseIdentifier)
|
|
|
|
|
p.registerPrefix(token.INT, p.parseIntegerLiteral)
|
|
|
|
|
p.registerPrefix(token.MINUS, p.parsePrefixExpression)
|
|
|
|
|
p.registerPrefix(token.BANG, p.parsePrefixExpression)
|
2025-01-04 19:16:22 -08:00
|
|
|
p.registerPrefix(token.TRUE, p.parseBoolean)
|
|
|
|
|
p.registerPrefix(token.FALSE, p.parseBoolean)
|
2025-01-05 11:37:13 -08:00
|
|
|
p.registerPrefix(token.LPAREN, p.parseGroupedExpression)
|
2025-01-05 15:35:00 -08:00
|
|
|
p.registerPrefix(token.IF, p.parseIfExpression)
|
2025-01-05 16:13:08 -08:00
|
|
|
p.registerPrefix(token.FUNCTION, p.parseFunctionLiteral)
|
2025-01-04 17:56:10 -08:00
|
|
|
// Infix registrations
|
|
|
|
|
p.registerInfix(token.PLUS, p.parseInfixExpression)
|
|
|
|
|
p.registerInfix(token.MINUS, p.parseInfixExpression)
|
|
|
|
|
p.registerInfix(token.ASTERISK, p.parseInfixExpression)
|
|
|
|
|
p.registerInfix(token.SLASH, p.parseInfixExpression)
|
|
|
|
|
p.registerInfix(token.GT, p.parseInfixExpression)
|
|
|
|
|
p.registerInfix(token.LT, p.parseInfixExpression)
|
|
|
|
|
p.registerInfix(token.EQ, p.parseInfixExpression)
|
|
|
|
|
p.registerInfix(token.NOT_EQ, p.parseInfixExpression)
|
2025-01-02 19:46:45 -08:00
|
|
|
// TODO: figure out why this can't be done from `parseProgram`
|
|
|
|
|
p.nextToken()
|
|
|
|
|
p.nextToken()
|
|
|
|
|
return p
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) ParseProgram() *ast.Program {
|
|
|
|
|
program := &ast.Program{}
|
|
|
|
|
program.Statements = []ast.Statement{}
|
|
|
|
|
for !p.curTokenIs(token.EOF) {
|
|
|
|
|
stmt := p.parseStatement()
|
|
|
|
|
if stmt != nil {
|
|
|
|
|
program.Statements = append(program.Statements, stmt)
|
|
|
|
|
}
|
2025-01-03 19:38:03 -08:00
|
|
|
// NOTE: For now, this is not only eating the semicolon, it is also
|
|
|
|
|
// eating every and all tokens until parse statement finds something
|
|
|
|
|
// it deems valid.
|
2025-01-02 19:46:45 -08:00
|
|
|
p.nextToken()
|
|
|
|
|
}
|
|
|
|
|
return program
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) parseStatement() ast.Statement {
|
|
|
|
|
switch p.curToken.Type {
|
|
|
|
|
case token.LET:
|
|
|
|
|
return p.parseLetStatement()
|
|
|
|
|
case token.RETURN:
|
|
|
|
|
return p.parseReturnStatement()
|
|
|
|
|
}
|
2025-01-03 19:38:03 -08:00
|
|
|
return p.parseExpressionStatement()
|
2025-01-02 19:46:45 -08:00
|
|
|
}
|
|
|
|
|
|
2025-01-05 15:35:00 -08:00
|
|
|
func (p *Parser) parseBlockStatement() *ast.BlockStatement {
|
|
|
|
|
block := &ast.BlockStatement{Token: p.curToken}
|
|
|
|
|
block.Statements = []ast.Statement{}
|
|
|
|
|
p.nextToken()
|
|
|
|
|
for !p.curTokenIs(token.RBRACE) && !p.curTokenIs(token.EOF) {
|
|
|
|
|
stmt := p.parseStatement()
|
|
|
|
|
if stmt != nil {
|
|
|
|
|
block.Statements = append(block.Statements, stmt)
|
|
|
|
|
}
|
|
|
|
|
// Consume the semicolon.
|
|
|
|
|
p.nextToken()
|
|
|
|
|
}
|
|
|
|
|
return block
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-02 19:46:45 -08:00
|
|
|
func (p *Parser) parseLetStatement() ast.Statement {
|
|
|
|
|
stmt := &ast.LetStatement{Token: p.curToken}
|
2025-01-05 15:35:00 -08:00
|
|
|
if !p.nextTokenIfPeekIs(token.IDENT) {
|
2025-01-02 19:46:45 -08:00
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}
|
2025-01-05 15:35:00 -08:00
|
|
|
if !p.nextTokenIfPeekIs(token.ASSIGN) {
|
2025-01-02 19:46:45 -08:00
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
// TODO: Skipping until we find the semicolon to avoid parsing the expression.
|
2025-01-03 19:38:03 -08:00
|
|
|
for !p.curTokenIs(token.SEMICOLON) {
|
2025-01-02 19:46:45 -08:00
|
|
|
p.nextToken()
|
|
|
|
|
}
|
|
|
|
|
return stmt
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) parseReturnStatement() ast.Statement {
|
|
|
|
|
stmt := &ast.ReturnStatement{Token: p.curToken}
|
|
|
|
|
p.nextToken()
|
|
|
|
|
// TODO: Skipping until we find the semicolon to avoid parsing the expression.
|
2025-01-03 19:38:03 -08:00
|
|
|
for !p.curTokenIs(token.SEMICOLON) {
|
|
|
|
|
p.nextToken()
|
|
|
|
|
}
|
|
|
|
|
return stmt
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) parseExpressionStatement() ast.Statement {
|
|
|
|
|
stmt := &ast.ExpressionStatement{Token: p.curToken}
|
|
|
|
|
stmt.Expression = p.parseExpression(LOWEST)
|
|
|
|
|
// The semicolon is optional for expression statements so they're easier
|
|
|
|
|
// to type on the REPL. NOTE: It is weird that the last token parsed by
|
|
|
|
|
// parseExpression does not get consumed.
|
|
|
|
|
if p.peekTokenIs(token.SEMICOLON) {
|
2025-01-02 19:46:45 -08:00
|
|
|
p.nextToken()
|
|
|
|
|
}
|
|
|
|
|
return stmt
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-03 19:38:03 -08:00
|
|
|
func (p *Parser) parseExpression(precedence int) ast.Expression {
|
|
|
|
|
// TODO: Could this be replaced with an `ok` check?
|
|
|
|
|
prefix := p.prefixParseFns[p.curToken.Type]
|
|
|
|
|
if prefix == nil {
|
|
|
|
|
p.noPrefixParseFnError(p.curToken.Type)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
2025-01-04 17:56:10 -08:00
|
|
|
curExpr := prefix()
|
|
|
|
|
for !p.peekTokenIs(token.SEMICOLON) && precedence < p.peekPrecedence() {
|
|
|
|
|
infix := p.infixParseFns[p.peekToken.Type]
|
|
|
|
|
if infix == nil {
|
|
|
|
|
return curExpr
|
|
|
|
|
}
|
|
|
|
|
p.nextToken()
|
|
|
|
|
curExpr = infix(curExpr)
|
|
|
|
|
}
|
|
|
|
|
return curExpr
|
2025-01-03 19:38:03 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) parseIdentifier() ast.Expression {
|
|
|
|
|
return &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) parseIntegerLiteral() ast.Expression {
|
|
|
|
|
exp := &ast.IntegerLiteral{Token: p.curToken}
|
|
|
|
|
literal, err := strconv.ParseInt(p.curToken.Literal, 0, 64)
|
|
|
|
|
if err != nil {
|
|
|
|
|
p.errors = append(p.errors, fmt.Sprintf("could not parse %q as an integer", p.curToken.Literal))
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
exp.Value = literal
|
|
|
|
|
return exp
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-04 19:16:22 -08:00
|
|
|
func (p *Parser) parseBoolean() ast.Expression {
|
|
|
|
|
return &ast.Boolean{Token: p.curToken, Value: p.curTokenIs(token.TRUE)}
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-03 19:38:03 -08:00
|
|
|
func (p *Parser) parsePrefixExpression() ast.Expression {
|
|
|
|
|
exp := &ast.PrefixExpression{
|
|
|
|
|
Token: p.curToken,
|
|
|
|
|
Operator: p.curToken.Literal,
|
|
|
|
|
}
|
|
|
|
|
p.nextToken()
|
|
|
|
|
exp.Right = p.parseExpression(PREFIX)
|
|
|
|
|
return exp
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-04 17:56:10 -08:00
|
|
|
func (p *Parser) parseInfixExpression(left ast.Expression) ast.Expression {
|
|
|
|
|
exp := &ast.InfixExpression{
|
|
|
|
|
Token: p.curToken,
|
|
|
|
|
Operator: p.curToken.Literal,
|
|
|
|
|
Left: left,
|
|
|
|
|
}
|
|
|
|
|
precedence := p.curPrecedence()
|
|
|
|
|
p.nextToken()
|
|
|
|
|
exp.Right = p.parseExpression(precedence)
|
|
|
|
|
return exp
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-05 11:37:13 -08:00
|
|
|
func (p *Parser) parseGroupedExpression() ast.Expression {
|
|
|
|
|
p.nextToken()
|
|
|
|
|
exp := p.parseExpression(LOWEST)
|
2025-01-05 15:35:00 -08:00
|
|
|
if !p.nextTokenIfPeekIs(token.RPAREN) {
|
2025-01-05 11:37:13 -08:00
|
|
|
// TODO: Would probably be good to emit an error here?
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
return exp
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-05 15:35:00 -08:00
|
|
|
func (p *Parser) parseIfExpression() ast.Expression {
|
|
|
|
|
exp := &ast.IfExpression{Token: p.curToken}
|
|
|
|
|
if !p.nextTokenIfPeekIs(token.LPAREN) {
|
|
|
|
|
// TODO: Would be good to emit an error here.
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
p.nextToken()
|
|
|
|
|
exp.Condition = p.parseExpression(LOWEST)
|
|
|
|
|
if !p.nextTokenIfPeekIs(token.RPAREN) {
|
|
|
|
|
// TODO: Would be good to emit an error here.
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
if !p.nextTokenIfPeekIs(token.LBRACE) {
|
|
|
|
|
// TODO: Would be good to emit an error here.
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
exp.Consequence = p.parseBlockStatement()
|
|
|
|
|
if p.peekTokenIs(token.ELSE) {
|
|
|
|
|
p.nextToken()
|
|
|
|
|
if !p.nextTokenIfPeekIs(token.LBRACE) {
|
|
|
|
|
// TODO: Would be good to emit an error here.
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
exp.Alternative = p.parseBlockStatement()
|
|
|
|
|
}
|
|
|
|
|
// We don't consume the RBRACE because it acts as our "end of statement"
|
|
|
|
|
// token, and it's consumed by parseProgram.
|
|
|
|
|
return exp
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-05 16:13:08 -08:00
|
|
|
func (p *Parser) parseFunctionLiteral() ast.Expression {
|
|
|
|
|
fn := &ast.FunctionLiteral{Token: p.curToken}
|
|
|
|
|
if !p.nextTokenIfPeekIs(token.LPAREN) {
|
|
|
|
|
// TODO: Would be good to emit an error here.
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
fn.Parameters = p.parseFunctionParameters()
|
|
|
|
|
if !p.nextTokenIfPeekIs(token.LBRACE) {
|
|
|
|
|
// TODO: Would be good to emit an error here.
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
fn.Body = p.parseBlockStatement()
|
|
|
|
|
return fn
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) parseFunctionParameters() []*ast.Identifier {
|
|
|
|
|
params := []*ast.Identifier{}
|
|
|
|
|
if p.peekTokenIs(token.RPAREN) {
|
|
|
|
|
p.nextToken()
|
|
|
|
|
return params
|
|
|
|
|
}
|
|
|
|
|
// Consume the LPAREN
|
|
|
|
|
p.nextToken()
|
|
|
|
|
params = append(params, &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal})
|
|
|
|
|
for p.peekTokenIs(token.COMMA) {
|
|
|
|
|
// Consume the previous identifier.
|
|
|
|
|
p.nextToken()
|
|
|
|
|
// Consume the comma.
|
|
|
|
|
p.nextToken()
|
|
|
|
|
params = append(params, &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal})
|
|
|
|
|
}
|
|
|
|
|
if !p.nextTokenIfPeekIs(token.RPAREN) {
|
|
|
|
|
// TODO: Would be good to emit an error here.
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
return params
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-02 19:46:45 -08:00
|
|
|
func (p *Parser) curTokenIs(typ token.TokenType) bool {
|
|
|
|
|
return p.curToken.Type == typ
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) peekTokenIs(typ token.TokenType) bool {
|
|
|
|
|
return p.peekToken.Type == typ
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NOTE: I'll leave the name as-is to avoid deviating from the book (maybe a
|
|
|
|
|
// rename at the end?), but I think `nextTokenIfPeek` would be a much better
|
|
|
|
|
// name for this.
|
2025-01-05 15:35:00 -08:00
|
|
|
func (p *Parser) nextTokenIfPeekIs(typ token.TokenType) bool {
|
2025-01-02 19:46:45 -08:00
|
|
|
if p.peekTokenIs(typ) {
|
|
|
|
|
p.nextToken()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
p.peekError(typ)
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) nextToken() {
|
|
|
|
|
p.curToken = p.peekToken
|
|
|
|
|
p.peekToken = p.l.NextToken()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) Errors() []string {
|
|
|
|
|
return p.errors
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) peekError(typ token.TokenType) {
|
|
|
|
|
p.errors = append(
|
|
|
|
|
p.errors,
|
|
|
|
|
fmt.Sprintf(
|
|
|
|
|
"expected next token to be %q, got %q instead",
|
|
|
|
|
typ,
|
|
|
|
|
p.peekToken.Type,
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
}
|
2025-01-03 19:38:03 -08:00
|
|
|
|
|
|
|
|
func (p *Parser) noPrefixParseFnError(t token.TokenType) {
|
|
|
|
|
p.errors = append(
|
|
|
|
|
p.errors,
|
|
|
|
|
fmt.Sprintf("no prefix parse function found for %q", t),
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) registerPrefix(typ token.TokenType, fn prefixParseFn) {
|
|
|
|
|
p.prefixParseFns[typ] = fn
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (p *Parser) registerInfix(typ token.TokenType, fn infixParseFn) {
|
|
|
|
|
p.infixParseFns[typ] = fn
|
|
|
|
|
}
|