日本語

Practical: Creating an Expression Parser

By leveraging LuneScript's powerful type system, particularly Algebraic Data Types (ADTs) and pattern matching, let's create an expression parser (calculator).
In this tutorial, we will create a program step-by-step that parses strings like "1 + 2 * 3" and produces calculation results.

Goal

The following features will be implemented.

Step 1: Token Definition

First, we define types for breaking down the input string into meaningful units (tokens). LuneScript's alge (Algebraic Data Type) is perfect for this.

// Token Type Definition
alge Token {
    Num( val: int ),      // Numbers: 123
    Op( val: str ),       // Operators: +, -, *, / (, )
    Eof,                  // End of string
}

Step 2: Lexical Analysis (Lexer)

Create a Lexer class that reads the string and generates Tokens.

class Lexer {
    let text: str;
    let len: int;
    let mut pos: int;

    pub fn __init( text:str ) {
        self.text = text;
        self.len = #text;
        self.pos = 1;
    }

    // Get current character (returns character code 0 if out of range)
    fn getChar(): int {
        if self.pos > self.len { return 0; }
        return unwrap self.text.byte( self.pos ) default 0;
    }

    // Get next token
    pub fn next() mut: Token {
        // Skip whitespace
        while self.getChar() == ?  {
            self.pos = self.pos + 1;
        }
        
        let ch = self.getChar();
        if ch == 0 { return .Eof; }

        // In case of numbers
        if ch >= ?0 and ch <= ?9 {
            let mut numVal = 0;
            while true {
                let cur = self.getChar();
                if cur < ?0 or cur > ?9 { break; }
                numVal = numVal * 10 + ( cur - ?0 );
                self.pos = self.pos + 1;
            }
            return Token.Num( numVal );
        }

        // In case of operators
        self.pos = self.pos + 1;
        return Token.Op( "%c" ( ch ) );
    }
}
? is a character code literal (e.g., ?A is 65).

Step 3: Definition of Abstract Syntax Tree (AST)

Define a tree structure to store parsing results. alge shines here too.

alge Ast {
    Val( val: int ),                      // Numerical value
    Bin( op: str, left: Ast, right: Ast ), // Binary operation (left op right)
}

Step 4: Parsing (Parser)

Convert the token sequence into an AST using Recursive Descent Parsing. To handle precedence, methods are split into Expr (expression), Term (term), and Factor (factor).

class Parser {
    let mut lexer: Lexer;
    let mut cur: Token;

    pub fn __init( text: str ) {
        self.lexer = new Lexer( text );
        self.cur = self.lexer.next();
    }

    // Advance token
    fn next() mut {
        self.cur = self.lexer.next();
    }

    // Factor: Number or (Expression)
    fn parseFactor() mut: Ast {
        match self.cur {
            case .Num( val ) {
                self.next();
                return .Val( val );
            }
            case .Op( op ) {
                if op == "(" {
                    self.next();
                    let node = self.parseExpr();
                    match self.cur {
                       case .Op( nextOp ) {
                           if nextOp == ")" {
                               self.next();
                               return node;
                           }
                       }
                       default {}
                    }
                    print( "Error: missing )" );
                    return node;
                }
            }
            default {
                print( "Error: unexpected token" );
            }
        }
        return .Val( 0 );
    }

    // Term: Multiplication, Division
    fn parseTerm() mut: Ast {
        let mut left = self.parseFactor();
        while true {
            match self.cur {
                case .Op( op ) {
                    if op == "*" or op == "/" {
                        self.next();
                        let right = self.parseFactor();
                        left = .Bin( op, left, right );
                    } else {
                        break;
                    }
                }
                default { break; }
            }
        }
        return left;
    }

    // Expression: Addition, Subtraction
    pub fn parseExpr() mut: Ast {
        let mut left = self.parseTerm();
        while true {
            match self.cur {
                case .Op( op ) {
                    if op == "+" or op == "-" {
                        self.next();
                        let right = self.parseTerm();
                        left = .Bin( op, left, right );
                    } else {
                        break;
                    }
                }
                default { break; }
            }
        }
        return left;
    }
}

Step 5: Evaluation and Execution

Finally, create a function that recursively processes the generated AST to obtain calculation results. Using the match expression allows for concise writing.

fn eval( node: Ast ): int {
    match node {
        case .Val( val ) {
            return val;
        }
        case .Bin( op, left, right ) {
            let l = eval( left );
            let r = eval( right );
            switch op {
                case "+" { return l + r; }
                case "-" { return l - r; }
                case "*" { return l * r; }
                case "/" { return l / r; }
            }
        }
    }
    return 0;
}

// Main process
let input = "1 + 2 * (3 + 4)";
let mut parser = new Parser( input );
let ast = parser.parseExpr();
print( "%s = %d" ( input, eval( ast ) ) );

Executing this outputs 1 + 2 * (3 + 4) = 15!


Complete Code

The following is the combined code for all steps.

alge Token {
    Num( val: int ),
    Op( val: str ),
    Eof,
}

class Lexer {
    let text: str;
    let len: int;
    let mut pos: int;

    pub fn __init( text:str ) {
        self.text = text;
        self.len = #text;
        self.pos = 1;
    }

    fn getChar(): int {
        if self.pos > self.len { return 0; }
        return unwrap self.text.byte( self.pos ) default 0;
    }

    pub fn next() mut: Token {
        while self.getChar() == ?  {
            self.pos = self.pos + 1;
        }
        
        let ch = self.getChar();
        if ch == 0 { return .Eof; }

        if ch >= ?0 and ch <= ?9 {
            let mut numVal = 0;
            while true {
                let cur = self.getChar();
                if cur < ?0 or cur > ?9 { break; }
                numVal = numVal * 10 + ( cur - ?0 );
                self.pos = self.pos + 1;
            }
            return Token.Num( numVal );
        }

        self.pos = self.pos + 1;
        return Token.Op( "%c" ( ch ) );
    }
}

alge Ast {
    Val( val: int ),
    Bin( op: str, left: Ast, right: Ast ),
}

class Parser {
    let mut lexer: Lexer;
    let mut cur: Token;

    pub fn __init( text: str ) {
        self.lexer = new Lexer( text );
        self.cur = self.lexer.next();
    }

    fn next() mut {
        self.cur = self.lexer.next();
    }

    fn parseFactor() mut: Ast {
        match self.cur {
            case .Num( val ) {
                self.next();
                return .Val( val );
            }
            case .Op( op ) {
                if op == "(" {
                    self.next();
                    let node = self.parseExpr();
                    match self.cur {
                       case .Op( nextOp ) {
                           if nextOp == ")" {
                               self.next();
                               return node;
                           }
                       }
                       default {}
                    }
                    print( "Error: missing )" );
                    return node;
                }
            }
            default {
                print( "Error: unexpected token" );
            }
        }
        return .Val( 0 );
    }

    fn parseTerm() mut: Ast {
        let mut left = self.parseFactor();
        while true {
            match self.cur {
                case .Op( op ) {
                    if op == "*" or op == "/" {
                        self.next();
                        let right = self.parseFactor();
                        left = .Bin( op, left, right );
                    } else {
                        break;
                    }
                }
                default { break; }
            }
        }
        return left;
    }

    pub fn parseExpr() mut: Ast {
        let mut left = self.parseTerm();
        while true {
            match self.cur {
                case .Op( op ) {
                    if op == "+" or op == "-" {
                        self.next();
                        let right = self.parseTerm();
                        left = .Bin( op, left, right );
                    } else {
                        break;
                    }
                }
                default { break; }
            }
        }
        return left;
    }
}

fn eval( node: Ast ): int {
    match node {
        case .Val( val ) {
            return val;
        }
        case .Bin( op, left, right ) {
            let l = eval( left );
            let r = eval( right );
            switch op {
                case "+" { return l + r; }
                case "-" { return l - r; }
                case "*" { return l * r; }
                case "/" { return l / r; }
            }
        }
    }
    return 0;
}

// Main process
let input = "1 + 2 * (3 + 4)";
let mut parser = new Parser( input );
let ast = parser.parseExpr();
print( "%s = %d" ( input, eval( ast ) ) );