From 725e4dc5bf7853ac9634dc5c612f4a907b244c09 Mon Sep 17 00:00:00 2001 From: Tommy Parnell Date: Sat, 30 Dec 2017 16:14:51 -0500 Subject: [PATCH] init --- Readme.md | 15 +++++ ast/ast.go | 46 ++++++++++++++ lexer/lexer.go | 137 ++++++++++++++++++++++++++++++++++++++++++ lexer/lexer_test.go | 126 ++++++++++++++++++++++++++++++++++++++ main.go | 21 +++++++ parser/parser.go | 91 ++++++++++++++++++++++++++++ parser/parser_test.go | 66 ++++++++++++++++++++ repl/repl.go | 30 +++++++++ token/token.go | 66 ++++++++++++++++++++ 9 files changed, 598 insertions(+) create mode 100644 Readme.md create mode 100644 ast/ast.go create mode 100644 lexer/lexer.go create mode 100644 lexer/lexer_test.go create mode 100644 main.go create mode 100644 parser/parser.go create mode 100644 parser/parser_test.go create mode 100644 repl/repl.go create mode 100644 token/token.go diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..6af820f --- /dev/null +++ b/Readme.md @@ -0,0 +1,15 @@ +These are my source files for working on Thorsten Ball's [Writing an interpreter in go](https://interpreterbook.com/) book. The ultimate goal of this source code is to interpret, and execute code written in a language called monkey. + +An example of monkey is below. Monkey is a c-like language with closures, first class functions, and variable bindings. + +```monkey + +let five = 5; +let ten = 10; + +let add = fn(x, y) { + x + y; +}; +return add(5,5); + +``` \ No newline at end of file diff --git a/ast/ast.go b/ast/ast.go new file mode 100644 index 0000000..799322b --- /dev/null +++ b/ast/ast.go @@ -0,0 +1,46 @@ +package ast + +import "monkey/token" + +type LetStatement struct { + Token token.Token // the token.LET token + Name *Identifier + Value Expression +} + +func (ls *LetStatement) statementNode() {} +func (ls *LetStatement) TokenLiteral() string { return ls.Token.Literal } + +type Identifier struct { + Token token.Token // the token.IDENT token + Value string +} + +func (i *Identifier) expressionNode() {} +func (i *Identifier) TokenLiteral() string { return i.Token.Literal } + +type Node interface { + TokenLiteral() string +} + +type Statement interface { + Node + statementNode() +} + +type Expression interface { + Node + expressionNode() +} + +type Program struct { + Statements []Statement +} + +func (p *Program) TokenLiteral() string { + if len(p.Statements) > 0 { + return p.Statements[0].TokenLiteral() + } else { + return "" + } +} diff --git a/lexer/lexer.go b/lexer/lexer.go new file mode 100644 index 0000000..bb62c8c --- /dev/null +++ b/lexer/lexer.go @@ -0,0 +1,137 @@ +package lexer + +import "monkey/token" + +type Lexer struct { + input string + position int // current position in input (points to current char) + readPosition int // current reading position in input (after current char) + ch byte // current char under examination +} + +func New(input string) *Lexer { + l := &Lexer{input: input} + l.readChar() + return l +} + +func (l *Lexer) NextToken() token.Token { + var tok token.Token + + l.skipWhitespace() + + switch l.ch { + case '=': + if l.peekChar() == '=' { + ch := l.ch + l.readChar() + literal := string(ch) + string(l.ch) + tok = token.Token{Type: token.EQ, Literal: literal} + } else { + tok = newToken(token.ASSIGN, l.ch) + } + case '+': + tok = newToken(token.PLUS, l.ch) + case '-': + tok = newToken(token.MINUS, l.ch) + case '!': + if l.peekChar() == '=' { + ch := l.ch + l.readChar() + literal := string(ch) + string(l.ch) + tok = token.Token{Type: token.NOT_EQ, Literal: literal} + } else { + tok = newToken(token.BANG, l.ch) + } + case '/': + tok = newToken(token.SLASH, l.ch) + case '*': + tok = newToken(token.ASTERISK, l.ch) + case '<': + tok = newToken(token.LT, l.ch) + case '>': + tok = newToken(token.GT, l.ch) + case ';': + tok = newToken(token.SEMICOLON, l.ch) + case ',': + tok = newToken(token.COMMA, l.ch) + case '{': + tok = newToken(token.LBRACE, l.ch) + case '}': + tok = newToken(token.RBRACE, l.ch) + case '(': + tok = newToken(token.LPAREN, l.ch) + case ')': + tok = newToken(token.RPAREN, l.ch) + case 0: + tok.Literal = "" + tok.Type = token.EOF + default: + if isLetter(l.ch) { + tok.Literal = l.readIdentifier() + tok.Type = token.LookupIdent(tok.Literal) + return tok + } else if isDigit(l.ch) { + tok.Type = token.INT + tok.Literal = l.readNumber() + return tok + } else { + tok = newToken(token.ILLEGAL, l.ch) + } + } + + l.readChar() + return tok +} + +func (l *Lexer) skipWhitespace() { + for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { + l.readChar() + } +} + +func (l *Lexer) readChar() { + if l.readPosition >= len(l.input) { + l.ch = 0 + } else { + l.ch = l.input[l.readPosition] + } + l.position = l.readPosition + l.readPosition += 1 +} + +func (l *Lexer) peekChar() byte { + if l.readPosition >= len(l.input) { + return 0 + } else { + return l.input[l.readPosition] + } +} + +func (l *Lexer) readIdentifier() string { + position := l.position + for isLetter(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +func (l *Lexer) readNumber() string { + position := l.position + for isDigit(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +func isLetter(ch byte) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' +} + +func isDigit(ch byte) bool { + return '0' <= ch && ch <= '9' +} + +func newToken(tokenType token.TokenType, ch byte) token.Token { + return token.Token{Type: tokenType, Literal: string(ch)} +} diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go new file mode 100644 index 0000000..0a7f248 --- /dev/null +++ b/lexer/lexer_test.go @@ -0,0 +1,126 @@ +package lexer + +import ( + "testing" + + "monkey/token" +) + +func TestNextToken(t *testing.T) { + input := `let five = 5; +let ten = 10; + +let add = fn(x, y) { + x + y; +}; + +let result = add(five, ten); +!-/*5; +5 < 10 > 5; + +if (5 < 10) { + return true; +} else { + return false; +} + +10 == 10; +10 != 9; +` + + tests := []struct { + expectedType token.TokenType + expectedLiteral string + }{ + {token.LET, "let"}, + {token.IDENT, "five"}, + {token.ASSIGN, "="}, + {token.INT, "5"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "ten"}, + {token.ASSIGN, "="}, + {token.INT, "10"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "add"}, + {token.ASSIGN, "="}, + {token.FUNCTION, "fn"}, + {token.LPAREN, "("}, + {token.IDENT, "x"}, + {token.COMMA, ","}, + {token.IDENT, "y"}, + {token.RPAREN, ")"}, + {token.LBRACE, "{"}, + {token.IDENT, "x"}, + {token.PLUS, "+"}, + {token.IDENT, "y"}, + {token.SEMICOLON, ";"}, + {token.RBRACE, "}"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "result"}, + {token.ASSIGN, "="}, + {token.IDENT, "add"}, + {token.LPAREN, "("}, + {token.IDENT, "five"}, + {token.COMMA, ","}, + {token.IDENT, "ten"}, + {token.RPAREN, ")"}, + {token.SEMICOLON, ";"}, + {token.BANG, "!"}, + {token.MINUS, "-"}, + {token.SLASH, "/"}, + {token.ASTERISK, "*"}, + {token.INT, "5"}, + {token.SEMICOLON, ";"}, + {token.INT, "5"}, + {token.LT, "<"}, + {token.INT, "10"}, + {token.GT, ">"}, + {token.INT, "5"}, + {token.SEMICOLON, ";"}, + {token.IF, "if"}, + {token.LPAREN, "("}, + {token.INT, "5"}, + {token.LT, "<"}, + {token.INT, "10"}, + {token.RPAREN, ")"}, + {token.LBRACE, "{"}, + {token.RETURN, "return"}, + {token.TRUE, "true"}, + {token.SEMICOLON, ";"}, + {token.RBRACE, "}"}, + {token.ELSE, "else"}, + {token.LBRACE, "{"}, + {token.RETURN, "return"}, + {token.FALSE, "false"}, + {token.SEMICOLON, ";"}, + {token.RBRACE, "}"}, + {token.INT, "10"}, + {token.EQ, "=="}, + {token.INT, "10"}, + {token.SEMICOLON, ";"}, + {token.INT, "10"}, + {token.NOT_EQ, "!="}, + {token.INT, "9"}, + {token.SEMICOLON, ";"}, + {token.EOF, ""}, + } + + l := New(input) + + for i, tt := range tests { + tok := l.NextToken() + + if tok.Type != tt.expectedType { + t.Fatalf("tests[%d] - tokentype wrong. expected=%q, got=%q", + i, tt.expectedType, tok.Type) + } + + if tok.Literal != tt.expectedLiteral { + t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q", + i, tt.expectedLiteral, tok.Literal) + } + } +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..941ae23 --- /dev/null +++ b/main.go @@ -0,0 +1,21 @@ +// main.go + +package main + +import ( + "fmt" + "monkey/repl" + "os" + "os/user" +) + +func main() { + user, err := user.Current() + if err != nil { + panic(err) + } + fmt.Printf("Hello %s! This is the Monkey programming language!\n", + user.Username) + fmt.Printf("Feel free to type in commands\n") + repl.Start(os.Stdin, os.Stdout) +} diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..df11aa2 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,91 @@ +package parser + +import ( + "monkey/ast" + "monkey/lexer" + "monkey/token" +) + +type Parser struct { + l *lexer.Lexer + + curToken token.Token + peekToken token.Token +} + +func New(l *lexer.Lexer) *Parser { + p := &Parser{l: l} + + // Read two tokens, so curToken and peekToken are both set + p.nextToken() + p.nextToken() + + return p +} + +func (p *Parser) nextToken() { + p.curToken = p.peekToken + p.peekToken = p.l.NextToken() +} + +func (p *Parser) ParseProgram() *ast.Program { + program := &ast.Program{} + program.Statements = []ast.Statement{} + + for p.curToken.Type != token.EOF { + stmt := p.parseStatement() + if stmt != nil { + program.Statements = append(program.Statements, stmt) + } + p.nextToken() + } + + return program +} +func (p *Parser) parseStatement() ast.Statement { + switch p.curToken.Type { + case token.LET: + return p.parseLetStatement() + default: + return nil + } +} + +func (p *Parser) parseLetStatement() *ast.LetStatement { + stmt := &ast.LetStatement{Token: p.curToken} + + if !p.expectPeek(token.IDENT) { + return nil + } + + stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal} + + if !p.expectPeek(token.ASSIGN) { + return nil + } + + // TODO: We're skipping the expressions until we + // encounter a semicolon + for !p.curTokenIs(token.SEMICOLON) { + p.nextToken() + } + + return stmt +} + +func (p *Parser) curTokenIs(t token.TokenType) bool { + return p.curToken.Type == t +} + +func (p *Parser) peekTokenIs(t token.TokenType) bool { + return p.peekToken.Type == t +} + +func (p *Parser) expectPeek(t token.TokenType) bool { + if p.peekTokenIs(t) { + p.nextToken() + return true + } else { + return false + } +} diff --git a/parser/parser_test.go b/parser/parser_test.go new file mode 100644 index 0000000..cc494ff --- /dev/null +++ b/parser/parser_test.go @@ -0,0 +1,66 @@ +package parser + +import ( + "monkey/ast" + "monkey/lexer" + "testing" +) + +func TestLetStatements(t *testing.T) { + input := ` +let x = 5; +let y = 10; +let foobar = 838383; +` + l := lexer.New(input) + p := New(l) + + program := p.ParseProgram() + if program == nil { + t.Fatalf("ParseProgram() returned nil") + } + if len(program.Statements) != 3 { + t.Fatalf("program.Statements does not contain 3 statements. got=%d", + len(program.Statements)) + } + + tests := []struct { + expectedIdentifier string + }{ + {"x"}, + {"y"}, + {"foobar"}, + } + + for i, tt := range tests { + stmt := program.Statements[i] + if !testLetStatement(t, stmt, tt.expectedIdentifier) { + return + } + } +} + +func testLetStatement(t *testing.T, s ast.Statement, name string) bool { + if s.TokenLiteral() != "let" { + t.Errorf("s.TokenLiteral not 'let'. got=%q", s.TokenLiteral()) + return false + } + + letStmt, ok := s.(*ast.LetStatement) + if !ok { + t.Errorf("s not *ast.LetStatement. got=%T", s) + return false + } + + if letStmt.Name.Value != name { + t.Errorf("letStmt.Name.Value not '%s'. got=%s", name, letStmt.Name.Value) + return false + } + + if letStmt.Name.TokenLiteral() != name { + t.Errorf("s.Name not '%s'. got=%s", name, letStmt.Name) + return false + } + + return true +} diff --git a/repl/repl.go b/repl/repl.go new file mode 100644 index 0000000..337be46 --- /dev/null +++ b/repl/repl.go @@ -0,0 +1,30 @@ +package repl + +import ( + "bufio" + "fmt" + "io" + "monkey/lexer" + "monkey/token" +) + +const PROMPT = ">> " + +func Start(in io.Reader, out io.Writer) { + scanner := bufio.NewScanner(in) + + for { + fmt.Printf(PROMPT) + scanned := scanner.Scan() + if !scanned { + return + } + + line := scanner.Text() + l := lexer.New(line) + + for tok := l.NextToken(); tok.Type != token.EOF; tok = l.NextToken() { + fmt.Printf("%+v\n", tok) + } + } +} diff --git a/token/token.go b/token/token.go new file mode 100644 index 0000000..12158fa --- /dev/null +++ b/token/token.go @@ -0,0 +1,66 @@ +package token + +type TokenType string + +const ( + ILLEGAL = "ILLEGAL" + EOF = "EOF" + + // Identifiers + literals + IDENT = "IDENT" // add, foobar, x, y, ... + INT = "INT" // 1343456 + + // Operators + ASSIGN = "=" + PLUS = "+" + MINUS = "-" + BANG = "!" + ASTERISK = "*" + SLASH = "/" + + LT = "<" + GT = ">" + + EQ = "==" + NOT_EQ = "!=" + + // Delimiters + COMMA = "," + SEMICOLON = ";" + + LPAREN = "(" + RPAREN = ")" + LBRACE = "{" + RBRACE = "}" + + // Keywords + FUNCTION = "FUNCTION" + LET = "LET" + TRUE = "TRUE" + FALSE = "FALSE" + IF = "IF" + ELSE = "ELSE" + RETURN = "RETURN" +) + +type Token struct { + Type TokenType + Literal string +} + +var keywords = map[string]TokenType{ + "fn": FUNCTION, + "let": LET, + "true": TRUE, + "false": FALSE, + "if": IF, + "else": ELSE, + "return": RETURN, +} + +func LookupIdent(ident string) TokenType { + if tok, ok := keywords[ident]; ok { + return tok + } + return IDENT +}