diff --git a/README.md b/README.md index c803962..20eda18 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ # crafting-interpreters -Code for the book Crafting Interpreters by Robert Nystrom +Code for the book "Crafting Interpreters" by Robert Nystrom -Porting the Java code for the first version fo the interpreter into Go. +Porting the Java code for the first version of the interpreter into Go. diff --git a/golox/cmd/golox/main.go b/golox/cmd/golox/main.go index 0014a11..246275f 100644 --- a/golox/cmd/golox/main.go +++ b/golox/cmd/golox/main.go @@ -19,6 +19,12 @@ func main() { if errors.Is(err, runner.ErrInvalidScriptFile) { fmt.Println(err) os.Exit(1) + } else if errors.Is(err, runner.ErrScriptNotRunnable) { + fmt.Println(err) + os.Exit(65) + } else if err != nil { + fmt.Printf("Unexpected error: %v\n", err) + os.Exit(1) } default: runner.RunPrompt() diff --git a/golox/internal/errors/errors.go b/golox/internal/errors/errors.go index cf6a9f0..244b9dd 100644 --- a/golox/internal/errors/errors.go +++ b/golox/internal/errors/errors.go @@ -2,7 +2,7 @@ package errors import "fmt" -var hadError = false +var HadError = false func EmitError(line int, message string) { report(line, "", message) @@ -15,9 +15,5 @@ func report(line int, where, message string) { where, message, ) - hadError = true -} - -func HadError() bool { - return hadError + HadError = true } diff --git a/golox/internal/runner/runner.go b/golox/internal/runner/runner.go index 9ee498a..17bbea2 100644 --- a/golox/internal/runner/runner.go +++ b/golox/internal/runner/runner.go @@ -5,9 +5,13 @@ import ( "errors" "fmt" "os" + + lerrors "github.com/AYM1607/crafting-interpreters/golox/internal/errors" + "github.com/AYM1607/crafting-interpreters/golox/internal/scanner" ) var ErrInvalidScriptFile = errors.New("could not read script file") +var ErrScriptNotRunnable = errors.New("could not run script") func RunPrompt() { s := bufio.NewScanner(os.Stdin) @@ -15,7 +19,9 @@ func RunPrompt() { for s.Scan() { line := s.Text() Run(line) - // TODO: resed hadError wherever it is set. + // TODO: Understand the implications of this. The book implies that it's + // to allow the users to keep issuing commands even if they make a mistake. + lerrors.HadError = false fmt.Print("> ") } } @@ -26,12 +32,14 @@ func RunFile(path string) error { return errors.Join(ErrInvalidScriptFile, err) } Run(string(fBytes)) - // TODO: check hadError and exit with a 65 code if so. + if lerrors.HadError { + return ErrScriptNotRunnable + } return nil } func Run(source string) { - s := NewScanner(source) + s := scanner.NewScanner(source) tokens := s.ScanTokens() for _, t := range tokens { diff --git a/golox/internal/runner/scanner.go b/golox/internal/runner/scanner.go deleted file mode 100644 index 4d114a7..0000000 --- a/golox/internal/runner/scanner.go +++ /dev/null @@ -1,80 +0,0 @@ -package runner - -type Scanner struct { - source string - - // State. - tokens []Token - start int - current int - line int -} - -func NewScanner(source string) *Scanner { - return &Scanner{ - source: source, - tokens: []Token{}, - - start: 0, - current: 0, - line: 1, - } -} - -func (s *Scanner) ScanTokens() []Token { - for !s.isAtEnd() { - s.start = s.current - s.scanToken() - } - - s.tokens = append(s.tokens, NewToken(EOF, "", nil, s.line)) - return s.tokens -} - -func (s *Scanner) scanToken() { - c := s.advance() - switch c { - case '(': - s.addToken(LPAREN) - case ')': - s.addToken(RPAREN) - case '{': - s.addToken(LBRACE) - case '}': - s.addToken(RBRACE) - case ',': - s.addToken(COMMA) - case '.': - s.addToken(DOT) - case '-': - s.addToken(MINUS) - case '+': - s.addToken(PLUS) - case ';': - s.addToken(SEMI) - case '*': - s.addToken(STAR) - } -} - -func (s *Scanner) advance() byte { - idx := s.current - s.current += 1 - return s.source[idx] -} - -func (s *Scanner) addToken(typ TokenType) { - s.addTokenWithLiteral(typ, nil) -} - -func (s *Scanner) addTokenWithLiteral(typ TokenType, literal interface{}) { - lexme := s.source[s.start:s.current] - s.tokens = append( - s.tokens, - NewToken(typ, lexme, literal, s.line), - ) -} - -func (s *Scanner) isAtEnd() bool { - return s.current >= len(s.source) -} diff --git a/golox/internal/scanner/scanner.go b/golox/internal/scanner/scanner.go new file mode 100644 index 0000000..93e7415 --- /dev/null +++ b/golox/internal/scanner/scanner.go @@ -0,0 +1,271 @@ +package scanner + +import ( + "strconv" + + lerrors "github.com/AYM1607/crafting-interpreters/golox/internal/errors" + "github.com/AYM1607/crafting-interpreters/golox/internal/types" +) + +type Scanner struct { + source string + + // State. + tokens []types.Token + start int + current int + line int +} + +func NewScanner(source string) *Scanner { + return &Scanner{ + source: source, + tokens: []types.Token{}, + + start: 0, + current: 0, + line: 1, + } +} + +func (s *Scanner) ScanTokens() []types.Token { + for !s.isAtEnd() { + s.start = s.current + s.scanToken() + } + + s.tokens = append(s.tokens, types.NewToken(types.EOF, "", nil, s.line)) + return s.tokens +} + +func (s *Scanner) scanToken() { + c := s.advance() + switch c { + case '(': + s.addToken(types.LPAREN) + case ')': + s.addToken(types.RPAREN) + case '{': + s.addToken(types.LBRACE) + case '}': + s.addToken(types.RBRACE) + case ',': + s.addToken(types.COMMA) + case '.': + s.addToken(types.DOT) + case '-': + s.addToken(types.MINUS) + case '+': + s.addToken(types.PLUS) + case ';': + s.addToken(types.SEMI) + case '*': + s.addToken(types.STAR) + case '!': + tok := types.BANG + if s.match('=') { + tok = types.BANG_EQUAL + } + s.addToken(tok) + case '=': + tok := types.EQUAL + if s.match('=') { + tok = types.EQUAL_EQUAL + } + s.addToken(tok) + case '<': + tok := types.LT + if s.match('=') { + tok = types.LTE + } + s.addToken(tok) + case '>': + tok := types.GT + if s.match('=') { + tok = types.GTE + } + s.addToken(tok) + case '/': + if s.match('/') { + // Consume all characters in a line comment. + for s.peek() != '\n' && !s.isAtEnd() { + s.advance() + } + } else if s.match('*') { + s.scanInlineComment() + } else { + s.addToken(types.SLASH) + } + case '"': + s.scanString() + // Ignore whitespace. + case ' ': + case '\t': + case '\r': + // Handle new lines. + case '\n': + s.line += 1 + default: + // NOTE: adding this here to avoid listing all digits in a case. + if isDigit(c) { + s.scanNumber() + return + } + if isIdentAlpha(c) { + s.scanIdentifier() + return + } + lerrors.EmitError(s.line, "Unexpected character.") + } +} + +// advance consumes a single character from the source. +func (s *Scanner) advance() byte { + idx := s.current + s.current += 1 + return s.source[idx] +} + +// match returns true if the given byte is equal to the next one in source, +// it consumes the character if so. +func (s *Scanner) match(c byte) bool { + if s.isAtEnd() { + return false + } + if s.source[s.current] != c { + return false + } + + // Next character in the source matches. + s.current += 1 + return true +} + +func (s *Scanner) peek() byte { + if s.isAtEnd() { + return 0 + } + return s.source[s.current] +} + +func (s *Scanner) peekNex() byte { + idx := s.current + 1 + if idx >= len(s.source) { + return 0 + } + return s.source[idx] +} + +func (s *Scanner) scanString() { + for s.peek() != '"' && !s.isAtEnd() { + // Lox allows multi-line strings. + if s.peek() == '\n' { + s.line += 1 + } + s.advance() + } + + if s.isAtEnd() { + lerrors.EmitError(s.line, "Unterminated string.") + return + } + + // Consume the closing " + s.advance() + + // Trim enclosing quotes + val := s.source[s.start+1 : s.current-1] + s.addTokenWithLiteral(types.STRING, val) +} + +func (s *Scanner) scanNumber() { + // Consume all digits preceding a dot (if any) + for isDigit(s.peek()) { + s.advance() + } + + // Look for a decimal part. + // Only literals in the form 123 and 123.123 are allowed. + if s.peek() == '.' && isDigit(s.peekNex()) { + // Only consume the dot if we're sure the format is valid. + s.advance() + + // Consume the rest of the digis. + for isDigit(s.peek()) { + s.advance() + } + } + // NOTE: Ignoring error because we're sure the string follows the float + // format. This should probably still report it but will leave as-is + // for now. + val, _ := strconv.ParseFloat( + s.source[s.start:s.current], + 64, + ) + s.addTokenWithLiteral( + types.NUMBER, + val, + ) +} + +func (s *Scanner) scanIdentifier() { + for isIdentAlphaNumeric(s.peek()) { + s.advance() + } + l := s.source[s.start:s.current] + typ := types.IDENT + if kTyp, ok := types.KeywordTypes[l]; ok { + typ = kTyp + } + s.addToken(typ) +} + +func (s *Scanner) scanInlineComment() { + depth := 1 + closed := false + for !s.isAtEnd() && depth >= 1 { + p := s.peek() + pn := s.peekNex() + switch { + case p == '\n': + s.line += 1 + case p == '/' && pn == '*': + // Consume the extra character. + s.advance() + depth += 1 + case p == '*' && pn == '/': + // Consume the extra character. + s.advance() + depth -= 1 + if depth == 0 { + closed = true + } + } + // Always consume at least one character. + s.advance() + } + + // Only report an error if the last nested (could just be one) comment + // did not close. + if s.isAtEnd() && !closed { + lerrors.EmitError(s.line, "Unterminated comment.") + } +} + +// addToken produces a single token without a literal value. +func (s *Scanner) addToken(typ types.TokenType) { + s.addTokenWithLiteral(typ, nil) +} + +// addTokenWithLiteral produces a single token with the given literal value. +func (s *Scanner) addTokenWithLiteral(typ types.TokenType, literal interface{}) { + lexme := s.source[s.start:s.current] + s.tokens = append( + s.tokens, + types.NewToken(typ, lexme, literal, s.line), + ) +} + +func (s *Scanner) isAtEnd() bool { + return s.current >= len(s.source) +} diff --git a/golox/internal/scanner/util.go b/golox/internal/scanner/util.go new file mode 100644 index 0000000..b9bad58 --- /dev/null +++ b/golox/internal/scanner/util.go @@ -0,0 +1,15 @@ +package scanner + +func isIdentAlphaNumeric(c byte) bool { + return isIdentAlpha(c) || isDigit(c) +} + +func isIdentAlpha(c byte) bool { + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_' +} + +func isDigit(c byte) bool { + return c >= '0' && c <= '9' +} diff --git a/golox/internal/types/const.go b/golox/internal/types/const.go new file mode 100644 index 0000000..4339d50 --- /dev/null +++ b/golox/internal/types/const.go @@ -0,0 +1,20 @@ +package types + +var KeywordTypes = map[string]TokenType{ + "and": AND, + "class": CLASS, + "else": ELSE, + "false": FALSE, + "for": FOR, + "fun": FUN, + "if": IF, + "nil": NIL, + "or": OR, + "print": PRINT, + "return": RETURN, + "super": SUPER, + "this": THIS, + "true": TRUE, + "var": VAR, + "while": WHILE, +} diff --git a/golox/internal/runner/token.go b/golox/internal/types/token.go similarity index 96% rename from golox/internal/runner/token.go rename to golox/internal/types/token.go index eb37604..e04d59a 100644 --- a/golox/internal/runner/token.go +++ b/golox/internal/types/token.go @@ -1,4 +1,4 @@ -package runner +package types import "fmt" diff --git a/golox/internal/runner/token_type.go b/golox/internal/types/token_type.go similarity index 98% rename from golox/internal/runner/token_type.go rename to golox/internal/types/token_type.go index 2f4950f..d2010a7 100644 --- a/golox/internal/runner/token_type.go +++ b/golox/internal/types/token_type.go @@ -1,4 +1,4 @@ -package runner +package types type TokenType string diff --git a/golox/test-invalid.lox b/golox/test-invalid.lox new file mode 100644 index 0000000..a4f056a --- /dev/null +++ b/golox/test-invalid.lox @@ -0,0 +1 @@ +{,{.(;)-}}*@ diff --git a/golox/test.lox b/golox/test.lox index da977f1..a6662bd 100644 --- a/golox/test.lox +++ b/golox/test.lox @@ -1 +1,4 @@ -{,{.(;)-}}* +// this is a comment +(( )){} // grouping stuff +!*+-/=<> <= == // operators +"some string literal"