package parser import ( "fmt" "strconv" "code.jmug.me/jmug/compiler-in-go/pkg/ast" "code.jmug.me/jmug/compiler-in-go/pkg/lexer" "code.jmug.me/jmug/compiler-in-go/pkg/token" ) type ( prefixParseFn func() ast.Expression infixParseFn func(ast.Expression) ast.Expression ) type Parser struct { l *lexer.Lexer errors []string curToken token.Token peekToken token.Token prefixParseFns map[token.TokenType]prefixParseFn infixParseFns map[token.TokenType]infixParseFn } func New(l *lexer.Lexer) *Parser { p := &Parser{ l: l, errors: []string{}, prefixParseFns: map[token.TokenType]prefixParseFn{}, infixParseFns: map[token.TokenType]infixParseFn{}, } // Prefix registrations p.registerPrefix(token.IDENT, p.parseIdentifier) p.registerPrefix(token.INT, p.parseIntegerLiteral) p.registerPrefix(token.MINUS, p.parsePrefixExpression) p.registerPrefix(token.BANG, p.parsePrefixExpression) p.registerPrefix(token.TRUE, p.parseBoolean) p.registerPrefix(token.FALSE, p.parseBoolean) p.registerPrefix(token.LPAREN, p.parseGroupedExpression) p.registerPrefix(token.IF, p.parseIfExpression) p.registerPrefix(token.FUNCTION, p.parseFunctionLiteral) p.registerPrefix(token.STRING, p.parseStringLiteral) p.registerPrefix(token.LBRACKET, p.parseArrayLiteral) p.registerPrefix(token.LBRACE, p.parseHashLiteral) // Infix registrations p.registerInfix(token.PLUS, p.parseInfixExpression) p.registerInfix(token.MINUS, p.parseInfixExpression) p.registerInfix(token.ASTERISK, p.parseInfixExpression) p.registerInfix(token.SLASH, p.parseInfixExpression) p.registerInfix(token.GT, p.parseInfixExpression) p.registerInfix(token.LT, p.parseInfixExpression) p.registerInfix(token.EQ, p.parseInfixExpression) p.registerInfix(token.NOT_EQ, p.parseInfixExpression) p.registerInfix(token.LPAREN, p.parseCallExpression) p.registerInfix(token.LBRACKET, p.parseIndexExpression) // TODO: figure out why this can't be done from `parseProgram` p.nextToken() p.nextToken() return p } func (p *Parser) ParseProgram() *ast.Program { program := &ast.Program{} program.Statements = []ast.Statement{} for !p.curTokenIs(token.EOF) { stmt := p.parseStatement() if stmt != nil { program.Statements = append(program.Statements, stmt) } // NOTE: For now, this is not only eating the semicolon, it is also // eating every and all tokens until parse statement finds something // it deems valid. p.nextToken() } return program } func (p *Parser) parseStatement() ast.Statement { switch p.curToken.Type { case token.LET: return p.parseLetStatement() case token.RETURN: return p.parseReturnStatement() } return p.parseExpressionStatement() } func (p *Parser) parseBlockStatement() *ast.BlockStatement { block := &ast.BlockStatement{Token: p.curToken} block.Statements = []ast.Statement{} p.nextToken() for !p.curTokenIs(token.RBRACE) && !p.curTokenIs(token.EOF) { stmt := p.parseStatement() if stmt != nil { block.Statements = append(block.Statements, stmt) } // Consume the last token in the statement. p.nextToken() } return block } func (p *Parser) parseLetStatement() ast.Statement { stmt := &ast.LetStatement{Token: p.curToken} if !p.nextTokenIfPeekIs(token.IDENT) { return nil } stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal} if !p.nextTokenIfPeekIs(token.ASSIGN) { return nil } // Consume the assign. p.nextToken() stmt.Value = p.parseExpression(LOWEST) if p.peekTokenIs(token.SEMICOLON) { p.nextToken() } return stmt } func (p *Parser) parseReturnStatement() ast.Statement { stmt := &ast.ReturnStatement{Token: p.curToken} p.nextToken() stmt.ReturnValue = p.parseExpression(LOWEST) if p.peekTokenIs(token.SEMICOLON) { p.nextToken() } return stmt } func (p *Parser) parseExpressionStatement() ast.Statement { stmt := &ast.ExpressionStatement{Token: p.curToken} stmt.Expression = p.parseExpression(LOWEST) // The semicolon is optional for expression statements so they're easier // to type on the REPL. NOTE: It is weird that the last token parsed by // parseExpression does not get consumed. if p.peekTokenIs(token.SEMICOLON) { p.nextToken() } return stmt } func (p *Parser) parseExpression(precedence int) ast.Expression { // TODO: Could this be replaced with an `ok` check? prefix := p.prefixParseFns[p.curToken.Type] if prefix == nil { p.noPrefixParseFnError(p.curToken.Type) return nil } curExpr := prefix() for !p.peekTokenIs(token.SEMICOLON) && precedence < p.peekPrecedence() { infix := p.infixParseFns[p.peekToken.Type] if infix == nil { return curExpr } p.nextToken() curExpr = infix(curExpr) } return curExpr } func (p *Parser) parseIdentifier() ast.Expression { return &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal} } func (p *Parser) parseIntegerLiteral() ast.Expression { exp := &ast.IntegerLiteral{Token: p.curToken} literal, err := strconv.ParseInt(p.curToken.Literal, 0, 64) if err != nil { p.errors = append(p.errors, fmt.Sprintf("could not parse %q as an integer", p.curToken.Literal)) return nil } exp.Value = literal return exp } func (p *Parser) parseBoolean() ast.Expression { return &ast.Boolean{Token: p.curToken, Value: p.curTokenIs(token.TRUE)} } func (p *Parser) parsePrefixExpression() ast.Expression { exp := &ast.PrefixExpression{ Token: p.curToken, Operator: p.curToken.Literal, } p.nextToken() exp.Right = p.parseExpression(PREFIX) return exp } func (p *Parser) parseInfixExpression(left ast.Expression) ast.Expression { exp := &ast.InfixExpression{ Token: p.curToken, Operator: p.curToken.Literal, Left: left, } precedence := p.curPrecedence() p.nextToken() exp.Right = p.parseExpression(precedence) return exp } func (p *Parser) parseGroupedExpression() ast.Expression { p.nextToken() exp := p.parseExpression(LOWEST) if !p.nextTokenIfPeekIs(token.RPAREN) { // TODO: Would probably be good to emit an error here? return nil } return exp } func (p *Parser) parseIfExpression() ast.Expression { exp := &ast.IfExpression{Token: p.curToken} if !p.nextTokenIfPeekIs(token.LPAREN) { // TODO: Would be good to emit an error here. return nil } p.nextToken() exp.Condition = p.parseExpression(LOWEST) if !p.nextTokenIfPeekIs(token.RPAREN) { // TODO: Would be good to emit an error here. return nil } if !p.nextTokenIfPeekIs(token.LBRACE) { // TODO: Would be good to emit an error here. return nil } exp.Consequence = p.parseBlockStatement() if p.peekTokenIs(token.ELSE) { p.nextToken() if !p.nextTokenIfPeekIs(token.LBRACE) { // TODO: Would be good to emit an error here. return nil } exp.Alternative = p.parseBlockStatement() } // We don't consume the RBRACE because it acts as our "end of statement" // token, and it's consumed by parseProgram. return exp } func (p *Parser) parseFunctionLiteral() ast.Expression { fn := &ast.FunctionLiteral{Token: p.curToken} if !p.nextTokenIfPeekIs(token.LPAREN) { // TODO: Would be good to emit an error here. return nil } fn.Parameters = p.parseFunctionParameters() if !p.nextTokenIfPeekIs(token.LBRACE) { // TODO: Would be good to emit an error here. return nil } fn.Body = p.parseBlockStatement() return fn } func (p *Parser) parseFunctionParameters() []*ast.Identifier { params := []*ast.Identifier{} if p.peekTokenIs(token.RPAREN) { p.nextToken() return params } // Consume the LPAREN p.nextToken() params = append(params, &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}) for p.peekTokenIs(token.COMMA) { // Consume the previous identifier. p.nextToken() // Consume the comma. p.nextToken() params = append(params, &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}) } if !p.nextTokenIfPeekIs(token.RPAREN) { // TODO: Would be good to emit an error here. return nil } return params } func (p *Parser) parseCallExpression(function ast.Expression) ast.Expression { call := &ast.CallExpression{Token: p.curToken, Function: function} call.Arguments = p.parseExpressionList(token.RPAREN) return call } func (p *Parser) parseExpressionList(end token.TokenType) []ast.Expression { args := []ast.Expression{} if p.peekTokenIs(end) { p.nextToken() return args } // Consume the LPAREN p.nextToken() args = append(args, p.parseExpression(LOWEST)) for p.peekTokenIs(token.COMMA) { // Consume last token of the previous expression. p.nextToken() // Consume the comma. p.nextToken() args = append(args, p.parseExpression(LOWEST)) } if !p.nextTokenIfPeekIs(end) { // TODO: Would be good to emit an error here. return nil } return args } func (p *Parser) parseStringLiteral() ast.Expression { return &ast.StringLiteral{Token: p.curToken, Value: p.curToken.Literal} } func (p *Parser) parseArrayLiteral() ast.Expression { array := &ast.ArrayLiteral{Token: p.curToken} array.Elements = p.parseExpressionList(token.RBRACKET) return array } func (p *Parser) parseIndexExpression(left ast.Expression) ast.Expression { ie := &ast.IndexExpression{Token: p.curToken, Left: left} p.nextToken() ie.Index = p.parseExpression(LOWEST) if !p.nextTokenIfPeekIs(token.RBRACKET) { return nil } return ie } func (p *Parser) parseHashLiteral() ast.Expression { hash := &ast.HashLiteral{ Token: p.curToken, Pairs: map[ast.Expression]ast.Expression{}, } for !p.peekTokenIs(token.RBRACE) { p.nextToken() k := p.parseExpression(LOWEST) if !p.nextTokenIfPeekIs(token.COLON) { return nil } p.nextToken() v := p.parseExpression(LOWEST) hash.Pairs[k] = v if !p.peekTokenIs(token.RBRACE) && !p.nextTokenIfPeekIs(token.COMMA) { break } } if !p.nextTokenIfPeekIs(token.RBRACE) { return nil } return hash } func (p *Parser) curTokenIs(typ token.TokenType) bool { return p.curToken.Type == typ } func (p *Parser) peekTokenIs(typ token.TokenType) bool { return p.peekToken.Type == typ } // NOTE: I'll leave the name as-is to avoid deviating from the book (maybe a // rename at the end?), but I think `nextTokenIfPeek` would be a much better // name for this. func (p *Parser) nextTokenIfPeekIs(typ token.TokenType) bool { if p.peekTokenIs(typ) { p.nextToken() return true } p.peekError(typ) return false } func (p *Parser) nextToken() { p.curToken = p.peekToken p.peekToken = p.l.NextToken() } func (p *Parser) Errors() []string { return p.errors } func (p *Parser) peekError(typ token.TokenType) { p.errors = append( p.errors, fmt.Sprintf( "expected next token to be %q, got %q instead", typ, p.peekToken.Type, ), ) } func (p *Parser) noPrefixParseFnError(t token.TokenType) { p.errors = append( p.errors, fmt.Sprintf("no prefix parse function found for %q", t), ) } func (p *Parser) registerPrefix(typ token.TokenType, fn prefixParseFn) { p.prefixParseFns[typ] = fn } func (p *Parser) registerInfix(typ token.TokenType, fn infixParseFn) { p.infixParseFns[typ] = fn }