目标
Stringint64
// calc.go
func calc(input string) int64 {
}
calc
// calc_test.go
func TestFinal(t *testing.T) {
tests := []struct{
input string
expected int64
}{
{"5", 5},
{"10", 10},
{"-5", -5},
{"-10", -10},
{"5 + 5 + 5 + 5 - 10", 10},
{"2 * 2 * 2 * 2 * 2", 32},
{"-50 + 100 + -50", 0},
{"5 * 2 + 10", 20},
{"5 + 2 * 10", 25},
{"20 + 2 * -10", 0},
{"50 / 2 * 2 + 10", 60},
{"2 * (5 + 10)", 30},
{"3 * 3 * 3 + 10", 37},
{"3 * (3 * 3) + 10", 37},
{"(5 + 10 * 2 + 15 / 3) * 2 + -10", 50},
}
for _, tt := range tests{
res := Calc(tt.input)
if res != tt.expected{
t.Errorf("Wrong answer, got=%d, want=%d", res, tt.expected)
}
}
}
我们运行这个测试,毫无疑问会失败。不过没关系,我们先把这个测试放到一边,我们从编译器最简单的开始。
把句子变成一个一个单词
1-9 +-*/()-inputEOFILLEGAL
const (
ILLEGAL = "ILLEGAL"
EOF = "EOF"
INT = "INT"
PLUS = "+"
MINUS = "-"
BANG = "!"
ASTERISK = "*"
SLASH = "/"
LPAREN = "("
RPAREN = ")"
)
NextToken()
type Token struct {
Type string //对应我们上面的词元类型
Literal string // 实际的string字符
}
type Lexer struct {
input string // 输入
position int // 当前位置
readPosition int // 将要读取的位置
ch byte //当前字符
}
func (l *Lexer) NextToken() Token {
}
我们不着急实现。照例我们先设计我们的测试。这次我们要达到的目标是我们能够将句子分成特定的词元。
func TestTokenizer(t *testing.T) {
input := `(5 + -10 * 2 + 15 / 3) * 2`
tests := []struct {
expectedType string
expectedLiteral string
}{
{LPAREN, "("},
{INT, "5"},
{PLUS, "+"},
{MINUS, "-"},
{INT, "10"},
{ASTERISK, "*"},
{INT, "2"},
{PLUS, "+"},
{INT, "15"},
{SLASH, "/"},
{INT, "3"},
{RPAREN, ")"},
{ASTERISK, "*"},
{INT, "2"},
}
l := NewLex(input)
for i, tt := range tests {
tok := l.NextToken()
if tok.Type != tt.expectedType {
t.Fatalf("tests[%d] - tokentype wrong. expected=%q, got=%q",
i, tt.expectedType, tok.Type)
}
if tok.Literal != tt.expectedLiteral {
t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q",
i, tt.expectedLiteral, tok.Literal)
}
}
}
NextToken()lexerreadChar
func (l *Lexer) readChar() {
if l.readPosition >= len(l.input) {
l.ch = 0
} else {
l.ch = l.input[l.readPosition]
}
l.position = l.readPosition
l.readPosition += 1
}
skipWhitespace
func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar()
}
}
123
func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}
func (l *Lexer) readNumber() string {
position := l.position
for isDigit(l.ch) {
l.readChar()
}
return l.input[position:l.position]
}
NextTokenswitchToken
func (l *Lexer) NextToken() Token {
var tok Token
l.skipWhitespace()
switch l.ch {
case '(':
tok = newToken(LPAREN, l.ch)
case ')':
tok = newToken(RPAREN, l.ch)
case '+':
tok = newToken(PLUS, l.ch)
case '-':
tok = newToken(MINUS, l.ch)
case '/':
tok = newToken(SLASH, l.ch)
case '*':
tok = newToken(ASTERISK, l.ch)
case 0:
tok.Literal = ""
tok.Type = EOF
default:
if isDigit(l.ch) {
tok.Type = INT
tok.Literal = l.readNumber()
return tok
} else {
tok = newToken(ILLEGAL, l.ch)
}
}
l.readChar()
return tok
}
inputast
把一个一个词元组成语法树
什么是语法/语法树
我爱你我爱你爱你我1 + 21 + 2+ 12
+
/ \
1 2
1+2+12
那么计算表达式的语法是什么
++i-11 + 212
type Expression interface {
String() string
}
String
type IntegerLiteralExpression struct {
Token Token
Value int64
}
func (il *IntegerLiteralExpression) String() string { return il.Token.Literal }
type PrefixExpression struct {
Token Token
Operator string
Right Expression
}
func (pe *PrefixExpression) String() string {
var out bytes.Buffer
out.WriteString("(")
out.WriteString(pe.Operator)
out.WriteString(pe.Right.String())
out.WriteString(")")
return out.String()
}
type InfixExpression struct {
Token Token
Left Expression
Operator string
Right Expression
}
func (ie *InfixExpression) String() string {
var out bytes.Buffer
out.WriteString("(")
out.WriteString(ie.Left.String())
out.WriteString(" ")
out.WriteString(ie.Operator)
out.WriteString(" ")
out.WriteString(ie.Right.String())
out.WriteString(")")
return out.String()
}
解析器
parserexpressionparserlexer
type Parser struct {
l *lexer.Lexer
errors []string
curToken token.Token
peekToken token.Token
prefixParseFns map[token.TokenType]prefixParseFn
infixParseFns map[token.TokenType]infixParseFn
}
// 往结构体里面筛处理方法
func (p *Parser) registerPrefix(tokenType token.TokenType, fn prefixParseFn) {
p.prefixParseFns[tokenType] = fn
}
func (p *Parser) registerInfix(tokenType token.TokenType, fn infixParseFn) {
p.infixParseFns[tokenType] = fn
}
lexerastParseExpression
func (p *Parser) ParseExpression(precedence int) Expression {
}
测试
计算表达式
250ParseExpressionInterLieralExpressionAST250integerLiteral
func TestIntegerLiteralExpression(t *testing.T) {
input := "250"
var expectValue int64 = 250
l := NewLex(input)
p := NewParser(l)
checkParseErrors(t, p)
expression := p.ParseExpression(LOWEST)
testInterLiteral(t, expression, expectValue)
}
func testInterLiteral(t *testing.T, il Expression, value int64) bool {
integ, ok := il.(*IntegerLiteralExpression)
if !ok {
t.Errorf("il not *ast.IntegerLiteral. got=%T", il)
return false
}
if integ.Value != value {
t.Errorf("integ.Value not %d. got=%d", value, integ.Value)
return false
}
return true
}
-250ParseExpressionInterLieralExpression-
func TestParsingPrefixExpression(t *testing.T) {
input := "-15"
expectedOp := "-"
var expectedValue int64 = 15
l := NewLex(input)
p := NewParser(l)
checkParseErrors(t, p)
expression := p.ParseExpression(LOWEST)
exp, ok := expression.(*PrefixExpression)
if !ok {
t.Fatalf("stmt is not PrefixExpression, got=%T", exp)
}
if exp.Operator != expectedOp {
t.Fatalf("exp.Operator is not %s, go=%s", expectedOp, exp.Operator)
}
testInterLiteral(t, exp.Right, expectedValue)
}
5+5
func TestParsingInfixExpression(t *testing.T) {
infixTests := []struct{
input string
leftValue int64
operator string
rightValue int64
}{
{"5 + 5;", 5, "+", 5},
{"5 - 5;", 5, "-", 5},
{"5 * 5;", 5, "*", 5},
{"5 / 5;", 5, "/", 5},
}
for _, tt := range infixTests {
l := NewLex(tt.input)
p := NewParser(l)
checkParseErrors(t, p)
expression := p.ParseExpression(LOWEST)
exp, ok := expression.(*InfixExpression)
if !ok {
t.Fatalf("exp is not InfixExpression, got=%T", exp)
}
if exp.Operator != tt.operator {
t.Fatalf("exp.Operator is not %s, go=%s", tt.operator, exp.Operator)
}
testInterLiteral(t, exp.Left, tt.leftValue)
testInterLiteral(t, exp.Right, tt.rightValue)
}
}
实现
pratt parserinfixParseprefixParsenewParser
func NewParser(l *Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}
p.prefixParseFns = make(map[string]prefixParseFn)
p.infixParseFns = make(map[string]infixParseFn)
p.nextToken()
p.nextToken()
return p
}
当遇到Integer Token
250p.registerPrefix(INT, p.parseIntegerLiteral)IntegerLiteralExpression
func (p *Parser) parseIntegerLiteral() Expression {
lit := &IntegerLiteralExpression{Token: p.curToken}
value, err := strconv.ParseInt(p.curToken.Literal, 0, 64)
if err != nil {
msg := fmt.Sprintf("could not parse %q as integer", p.curToken.Literal)
p.errors = append(p.errors, msg)
return nil
}
lit.Value = value
return lit
}
// 在newParser里面加上
+-*/
-55 -1+ * /parseInfixExpression
// func NewParser
p.registerPrefix(MINUS, p.parsePrefixExpression)
p.registerInfix(MINUS, p.parseInfixExpression)
p.registerInfix(PLUS, p.parseInfixExpression)
p.registerInfix(MINUS, p.parseInfixExpression)
p.registerInfix(SLASH, p.parseInfixExpression)
p.registerInfix(ASTERISK, p.parseInfixExpression)
parsePrefixExpression-ParseExpressionIntegerLiteral
func (p *Parser) parsePrefixExpression() Expression {
expression := &PrefixExpression{
Token: p.curToken,
Operator: p.curToken.Literal,
}
p.nextToken()
expression.Right = p.ParseExpression(PREFIX)
return expression
}
parseInfixExpression1 + 21
func (p *Parser) parseInfixExpression(left Expression) Expression {
expression := &InfixExpression{
Token: p.curToken,
Operator: p.curToken.Literal,
Left: left,
}
precedence := p.curPrecedence()
p.nextToken()
expression.Right = p.ParseExpression(precedence)
return expression
}
优先级
1 + 3 * 4
*
/ \
+ 4
/ \
1 3
+
/ \
1 *
/ \
3 4
按照我们小学教育,我们应该选择下面的解法。也就是说乘法比加法要有更高的优先级。或者说在我们的语法树中乘法要比加法处于更高的位置。我们定义出以下几个级别的优先级,与各符号对应的优先级
const (
_ int = iota
LOWEST
SUM // +, -
PRODUCT // *, /
PREFIX // -X
CALL // (X)
)
var precedences = map[string]int{
PLUS: SUM,
MINUS: SUM,
SLASH: PRODUCT,
ASTERISK: PRODUCT,
LPAREN: CALL,
}
( )
(1 + 5) * 31 + 5parseGroupedExpression
// func NewParser
p.registerPrefix(MINUS, p.parseGroupedExpression)
()()
func (p *Parser) parseGroupedExpression() Expression {
p.nextToken()
exp := p.ParseExpression(LOWEST)
if !p.expectPeek(token.RPAREN){
return nil
}
return exp
}
ParseExpression
tokenparseInfixExpressionparsePrefixExpression
func (p *Parser) ParseExpression(precedence int) Expression {
prefix := p.prefixParseFns[p.curToken.Type]
returnExp := prefix()
for precedence < p.peekPrecedence() {
infix := p.infixParseFns[p.peekToken.Type]
if infix == nil {
return returnExp
}
p.nextToken()
returnExp = infix(returnExp)
}
return returnExp
}
当然还有一些辅助函数,这里不再赘述。运行一下测试,🆗通过啦
执行语法树得到结果
这里我们直接要开始搞定我们最开始的测试啦。首先我们丰富一下主函数。
func Calc(input string) int64 {
lexer := NewLex(input)
parser := NewParser(lexer)
exp := parser.ParseExpression(LOWEST)
return Eval(exp)
}
EvalExpressionExpression
func Eval(exp Expression) int64 {
switch node := exp.(type) {
case *IntegerLiteralExpression:
return node.Value
case *PrefixExpression:
rightV := Eval(node.Right)
return evalPrefixExpression(node.Operator, rightV)
case *InfixExpression:
leftV := Eval(node.Left)
rightV := Eval(node.Right)
return evalInfixExpression(leftV, node.Operator, rightV)
}
return 0
}
func evalPrefixExpression(operator string, right int64) int64{
if operator != "-" {
return 0
}
return -right
}
func evalInfixExpression(left int64, operator string, right int64) int64 {
switch operator {
case "+":
return left + right
case "-":
return left - right
case "*":
return left * right
case "/":
if right != 0{
return left / right
}else{
return 0
}
default:
return 0
}