Refactor to multiple packages.

Implement c-style nested comments.
Update README.md
2023-05-10 03:58:54 +00:00 · 2023-05-07 01:25:59 +00:00 · 2023-05-06 17:48:24 -07:00 · 2023-05-07 00:46:22 +00:00 · 2023-05-07 00:28:15 +00:00 · 2023-05-07 00:00:35 +00:00
12 changed files with 334 additions and 94 deletions
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
 # crafting-interpreters
-Code for the book Crafting Interpreters by Robert Nystrom
+Code for the book "Crafting Interpreters" by Robert Nystrom

-Porting the Java code for the first version fo the interpreter into Go.
+Porting the Java code for the first version of the interpreter into Go.
--- a/golox/cmd/golox/main.go
+++ b/golox/cmd/golox/main.go
@ -19,6 +19,12 @@ func main() {
 		if errors.Is(err, runner.ErrInvalidScriptFile) {
 			fmt.Println(err)
 			os.Exit(1)
+		} else if errors.Is(err, runner.ErrScriptNotRunnable) {
+			fmt.Println(err)
+			os.Exit(65)
+		} else if err != nil {
+			fmt.Printf("Unexpected error: %v\n", err)
+			os.Exit(1)
 		}
 	default:
 		runner.RunPrompt()
--- a/golox/internal/errors/errors.go
+++ b/golox/internal/errors/errors.go
@ -2,7 +2,7 @@ package errors

 import "fmt"

-var hadError = false
+var HadError = false

 func EmitError(line int, message string) {
 	report(line, "", message)
@ -15,9 +15,5 @@ func report(line int, where, message string) {
 		where,
 		message,
 	)
-	hadError = true
-}
-
-func HadError() bool {
-	return hadError
+	HadError = true
 }
--- a/golox/internal/runner/runner.go
+++ b/golox/internal/runner/runner.go
@ -5,9 +5,13 @@ import (
 	"errors"
 	"fmt"
 	"os"
+
+	lerrors "github.com/AYM1607/crafting-interpreters/golox/internal/errors"
+	"github.com/AYM1607/crafting-interpreters/golox/internal/scanner"
 )

 var ErrInvalidScriptFile = errors.New("could not read script file")
+var ErrScriptNotRunnable = errors.New("could not run script")

 func RunPrompt() {
 	s := bufio.NewScanner(os.Stdin)
@ -15,7 +19,9 @@ func RunPrompt() {
 	for s.Scan() {
 		line := s.Text()
 		Run(line)
-		// TODO: resed hadError wherever it is set.
+		// TODO: Understand the implications of this. The book implies that it's
+		// to allow the users to keep issuing commands even if they make a mistake.
+		lerrors.HadError = false
 		fmt.Print("> ")
 	}
 }
@ -26,12 +32,14 @@ func RunFile(path string) error {
 		return errors.Join(ErrInvalidScriptFile, err)
 	}
 	Run(string(fBytes))
-	// TODO: check hadError and exit with a 65 code if so.
+	if lerrors.HadError {
+		return ErrScriptNotRunnable
+	}
 	return nil
 }

 func Run(source string) {
-	s := NewScanner(source)
+	s := scanner.NewScanner(source)
 	tokens := s.ScanTokens()

 	for _, t := range tokens {
--- a/golox/internal/runner/scanner.go
+++ b/golox/internal/runner/scanner.go
@ -1,80 +0,0 @@
-package runner
-
-type Scanner struct {
-	source string
-
-	// State.
-	tokens  []Token
-	start   int
-	current int
-	line    int
-}
-
-func NewScanner(source string) *Scanner {
-	return &Scanner{
-		source: source,
-		tokens: []Token{},
-
-		start:   0,
-		current: 0,
-		line:    1,
-	}
-}
-
-func (s *Scanner) ScanTokens() []Token {
-	for !s.isAtEnd() {
-		s.start = s.current
-		s.scanToken()
-	}
-
-	s.tokens = append(s.tokens, NewToken(EOF, "", nil, s.line))
-	return s.tokens
-}
-
-func (s *Scanner) scanToken() {
-	c := s.advance()
-	switch c {
-	case '(':
-		s.addToken(LPAREN)
-	case ')':
-		s.addToken(RPAREN)
-	case '{':
-		s.addToken(LBRACE)
-	case '}':
-		s.addToken(RBRACE)
-	case ',':
-		s.addToken(COMMA)
-	case '.':
-		s.addToken(DOT)
-	case '-':
-		s.addToken(MINUS)
-	case '+':
-		s.addToken(PLUS)
-	case ';':
-		s.addToken(SEMI)
-	case '*':
-		s.addToken(STAR)
-	}
-}
-
-func (s *Scanner) advance() byte {
-	idx := s.current
-	s.current += 1
-	return s.source[idx]
-}
-
-func (s *Scanner) addToken(typ TokenType) {
-	s.addTokenWithLiteral(typ, nil)
-}
-
-func (s *Scanner) addTokenWithLiteral(typ TokenType, literal interface{}) {
-	lexme := s.source[s.start:s.current]
-	s.tokens = append(
-		s.tokens,
-		NewToken(typ, lexme, literal, s.line),
-	)
-}
-
-func (s *Scanner) isAtEnd() bool {
-	return s.current >= len(s.source)
-}
--- a/golox/internal/scanner/scanner.go
+++ b/golox/internal/scanner/scanner.go
@ -0,0 +1,271 @@
+package scanner
+
+import (
+	"strconv"
+
+	lerrors "github.com/AYM1607/crafting-interpreters/golox/internal/errors"
+	"github.com/AYM1607/crafting-interpreters/golox/internal/types"
+)
+
+type Scanner struct {
+	source string
+
+	// State.
+	tokens  []types.Token
+	start   int
+	current int
+	line    int
+}
+
+func NewScanner(source string) *Scanner {
+	return &Scanner{
+		source: source,
+		tokens: []types.Token{},
+
+		start:   0,
+		current: 0,
+		line:    1,
+	}
+}
+
+func (s *Scanner) ScanTokens() []types.Token {
+	for !s.isAtEnd() {
+		s.start = s.current
+		s.scanToken()
+	}
+
+	s.tokens = append(s.tokens, types.NewToken(types.EOF, "", nil, s.line))
+	return s.tokens
+}
+
+func (s *Scanner) scanToken() {
+	c := s.advance()
+	switch c {
+	case '(':
+		s.addToken(types.LPAREN)
+	case ')':
+		s.addToken(types.RPAREN)
+	case '{':
+		s.addToken(types.LBRACE)
+	case '}':
+		s.addToken(types.RBRACE)
+	case ',':
+		s.addToken(types.COMMA)
+	case '.':
+		s.addToken(types.DOT)
+	case '-':
+		s.addToken(types.MINUS)
+	case '+':
+		s.addToken(types.PLUS)
+	case ';':
+		s.addToken(types.SEMI)
+	case '*':
+		s.addToken(types.STAR)
+	case '!':
+		tok := types.BANG
+		if s.match('=') {
+			tok = types.BANG_EQUAL
+		}
+		s.addToken(tok)
+	case '=':
+		tok := types.EQUAL
+		if s.match('=') {
+			tok = types.EQUAL_EQUAL
+		}
+		s.addToken(tok)
+	case '<':
+		tok := types.LT
+		if s.match('=') {
+			tok = types.LTE
+		}
+		s.addToken(tok)
+	case '>':
+		tok := types.GT
+		if s.match('=') {
+			tok = types.GTE
+		}
+		s.addToken(tok)
+	case '/':
+		if s.match('/') {
+			// Consume all characters in a line comment.
+			for s.peek() != '\n' && !s.isAtEnd() {
+				s.advance()
+			}
+		} else if s.match('*') {
+			s.scanInlineComment()
+		} else {
+			s.addToken(types.SLASH)
+		}
+	case '"':
+		s.scanString()
+	// Ignore whitespace.
+	case ' ':
+	case '\t':
+	case '\r':
+	// Handle new lines.
+	case '\n':
+		s.line += 1
+	default:
+		// NOTE: adding this here to avoid listing all digits in a case.
+		if isDigit(c) {
+			s.scanNumber()
+			return
+		}
+		if isIdentAlpha(c) {
+			s.scanIdentifier()
+			return
+		}
+		lerrors.EmitError(s.line, "Unexpected character.")
+	}
+}
+
+// advance consumes a single character from the source.
+func (s *Scanner) advance() byte {
+	idx := s.current
+	s.current += 1
+	return s.source[idx]
+}
+
+// match returns true if the given byte is equal to the next one in source,
+// it consumes the character if so.
+func (s *Scanner) match(c byte) bool {
+	if s.isAtEnd() {
+		return false
+	}
+	if s.source[s.current] != c {
+		return false
+	}
+
+	// Next character in the source matches.
+	s.current += 1
+	return true
+}
+
+func (s *Scanner) peek() byte {
+	if s.isAtEnd() {
+		return 0
+	}
+	return s.source[s.current]
+}
+
+func (s *Scanner) peekNex() byte {
+	idx := s.current + 1
+	if idx >= len(s.source) {
+		return 0
+	}
+	return s.source[idx]
+}
+
+func (s *Scanner) scanString() {
+	for s.peek() != '"' && !s.isAtEnd() {
+		// Lox allows multi-line strings.
+		if s.peek() == '\n' {
+			s.line += 1
+		}
+		s.advance()
+	}
+
+	if s.isAtEnd() {
+		lerrors.EmitError(s.line, "Unterminated string.")
+		return
+	}
+
+	// Consume the closing "
+	s.advance()
+
+	// Trim enclosing quotes
+	val := s.source[s.start+1 : s.current-1]
+	s.addTokenWithLiteral(types.STRING, val)
+}
+
+func (s *Scanner) scanNumber() {
+	// Consume all digits preceding a dot (if any)
+	for isDigit(s.peek()) {
+		s.advance()
+	}
+
+	// Look for a decimal part.
+	// Only literals in the form 123 and 123.123 are allowed.
+	if s.peek() == '.' && isDigit(s.peekNex()) {
+		// Only consume the dot if we're sure the format is valid.
+		s.advance()
+
+		// Consume the rest of the digis.
+		for isDigit(s.peek()) {
+			s.advance()
+		}
+	}
+	// NOTE: Ignoring error because we're sure the string follows the float
+	// format. This should probably still report it but will leave as-is
+	// for now.
+	val, _ := strconv.ParseFloat(
+		s.source[s.start:s.current],
+		64,
+	)
+	s.addTokenWithLiteral(
+		types.NUMBER,
+		val,
+	)
+}
+
+func (s *Scanner) scanIdentifier() {
+	for isIdentAlphaNumeric(s.peek()) {
+		s.advance()
+	}
+	l := s.source[s.start:s.current]
+	typ := types.IDENT
+	if kTyp, ok := types.KeywordTypes[l]; ok {
+		typ = kTyp
+	}
+	s.addToken(typ)
+}
+
+func (s *Scanner) scanInlineComment() {
+	depth := 1
+	closed := false
+	for !s.isAtEnd() && depth >= 1 {
+		p := s.peek()
+		pn := s.peekNex()
+		switch {
+		case p == '\n':
+			s.line += 1
+		case p == '/' && pn == '*':
+			// Consume the extra character.
+			s.advance()
+			depth += 1
+		case p == '*' && pn == '/':
+			// Consume the extra character.
+			s.advance()
+			depth -= 1
+			if depth == 0 {
+				closed = true
+			}
+		}
+		// Always consume at least one character.
+		s.advance()
+	}
+
+	// Only report an error if the last nested (could just be one) comment
+	// did not close.
+	if s.isAtEnd() && !closed {
+		lerrors.EmitError(s.line, "Unterminated comment.")
+	}
+}
+
+// addToken produces a single token without a literal value.
+func (s *Scanner) addToken(typ types.TokenType) {
+	s.addTokenWithLiteral(typ, nil)
+}
+
+// addTokenWithLiteral produces a single token with the given literal value.
+func (s *Scanner) addTokenWithLiteral(typ types.TokenType, literal interface{}) {
+	lexme := s.source[s.start:s.current]
+	s.tokens = append(
+		s.tokens,
+		types.NewToken(typ, lexme, literal, s.line),
+	)
+}
+
+func (s *Scanner) isAtEnd() bool {
+	return s.current >= len(s.source)
+}
--- a/golox/internal/scanner/util.go
+++ b/golox/internal/scanner/util.go
@ -0,0 +1,15 @@
+package scanner
+
+func isIdentAlphaNumeric(c byte) bool {
+	return isIdentAlpha(c) || isDigit(c)
+}
+
+func isIdentAlpha(c byte) bool {
+	return (c >= 'a' && c <= 'z') ||
+		(c >= 'A' && c <= 'Z') ||
+		c == '_'
+}
+
+func isDigit(c byte) bool {
+	return c >= '0' && c <= '9'
+}
--- a/golox/internal/types/const.go
+++ b/golox/internal/types/const.go
@ -0,0 +1,20 @@
+package types
+
+var KeywordTypes = map[string]TokenType{
+	"and":    AND,
+	"class":  CLASS,
+	"else":   ELSE,
+	"false":  FALSE,
+	"for":    FOR,
+	"fun":    FUN,
+	"if":     IF,
+	"nil":    NIL,
+	"or":     OR,
+	"print":  PRINT,
+	"return": RETURN,
+	"super":  SUPER,
+	"this":   THIS,
+	"true":   TRUE,
+	"var":    VAR,
+	"while":  WHILE,
+}
--- a/golox/internal/runner/token.go
+++ b/golox/internal/runner/token.go
@ -1,4 +1,4 @@
-package runner
+package types

 import "fmt"

--- a/golox/internal/runner/token_type.go
+++ b/golox/internal/runner/token_type.go
@ -1,4 +1,4 @@
-package runner
+package types

 type TokenType string

--- a/golox/test-invalid.lox
+++ b/golox/test-invalid.lox
@ -0,0 +1 @@
+{,{.(;)-}}*@
--- a/golox/test.lox
+++ b/golox/test.lox
@ -1 +1,4 @@
-{,{.(;)-}}*
+// this is a comment
+((  )){} // grouping stuff
+!*+-/=<> <= == // operators
+"some string literal"
Author	SHA1	Message	Date
AYM1607	02aef95bff	Refactor to multiple packages.	2023-05-10 03:58:54 +00:00
AYM1607	7cdaa49a8e	Implement c-style nested comments.	2023-05-07 01:25:59 +00:00
Mariano Uvalle	57739f8143	Update README.md README typo	2023-05-06 17:48:24 -07:00
AYM1607	fd21194901	Keywords and identifiers	2023-05-07 00:46:22 +00:00
AYM1607	cdab15193a	Number literals	2023-05-07 00:28:15 +00:00
AYM1607	25e3b6068d	String literals	2023-05-07 00:00:35 +00:00
AYM1607	021216c94a	Two character tokens, comments, spaces and new lines.	2023-05-06 23:48:55 +00:00
AYM1607	75ad2792f6	Report invalid characters.	2023-05-06 23:21:11 +00:00
AYM1607	b00c450600	Wire up errors and error state.	2023-05-06 23:13:13 +00:00
AYM1607	f682b8468f	Export the HadError variable from the errors package.	2023-05-06 23:05:25 +00:00