Practical: Creating an Expression Parser
By leveraging LuneScript's powerful type system, particularly Algebraic Data Types (ADTs) and pattern
matching, let's create an expression parser (calculator).
In this tutorial, we will create a program step-by-step that parses strings like "1 + 2 * 3"
and produces calculation results.
Goal
The following features will be implemented.
- Support for basic arithmetic (
+,-,*,/) and parentheses(). - Integer calculations.
- Interactive execution in REPL (Read-Eval-Print Loop) format.
Step 1: Token Definition
First, we define types for breaking down the input string into meaningful units (tokens). LuneScript's
alge (Algebraic Data Type) is perfect for this.
// Token Type Definition
alge Token {
Num( val: int ), // Numbers: 123
Op( val: str ), // Operators: +, -, *, / (, )
Eof, // End of string
}
Step 2: Lexical Analysis (Lexer)
Create a Lexer class that reads the string and generates Tokens.
class Lexer {
let text: str;
let len: int;
let mut pos: int;
pub fn __init( text:str ) {
self.text = text;
self.len = #text;
self.pos = 1;
}
// Get current character (returns character code 0 if out of range)
fn getChar(): int {
if self.pos > self.len { return 0; }
return unwrap self.text.byte( self.pos ) default 0;
}
// Get next token
pub fn next() mut: Token {
// Skip whitespace
while self.getChar() == ? {
self.pos = self.pos + 1;
}
let ch = self.getChar();
if ch == 0 { return .Eof; }
// In case of numbers
if ch >= ?0 and ch <= ?9 {
let mut numVal = 0;
while true {
let cur = self.getChar();
if cur < ?0 or cur > ?9 { break; }
numVal = numVal * 10 + ( cur - ?0 );
self.pos = self.pos + 1;
}
return Token.Num( numVal );
}
// In case of operators
self.pos = self.pos + 1;
return Token.Op( "%c" ( ch ) );
}
}
? is a character code literal (e.g., ?A is 65).
Step 3: Definition of Abstract Syntax Tree (AST)
Define a tree structure to store parsing results. alge shines here too.
alge Ast {
Val( val: int ), // Numerical value
Bin( op: str, left: Ast, right: Ast ), // Binary operation (left op right)
}
Step 4: Parsing (Parser)
Convert the token sequence into an AST using Recursive Descent Parsing. To handle precedence, methods are
split into Expr (expression), Term (term), and Factor (factor).
class Parser {
let mut lexer: Lexer;
let mut cur: Token;
pub fn __init( text: str ) {
self.lexer = new Lexer( text );
self.cur = self.lexer.next();
}
// Advance token
fn next() mut {
self.cur = self.lexer.next();
}
// Factor: Number or (Expression)
fn parseFactor() mut: Ast {
match self.cur {
case .Num( val ) {
self.next();
return .Val( val );
}
case .Op( op ) {
if op == "(" {
self.next();
let node = self.parseExpr();
match self.cur {
case .Op( nextOp ) {
if nextOp == ")" {
self.next();
return node;
}
}
default {}
}
print( "Error: missing )" );
return node;
}
}
default {
print( "Error: unexpected token" );
}
}
return .Val( 0 );
}
// Term: Multiplication, Division
fn parseTerm() mut: Ast {
let mut left = self.parseFactor();
while true {
match self.cur {
case .Op( op ) {
if op == "*" or op == "/" {
self.next();
let right = self.parseFactor();
left = .Bin( op, left, right );
} else {
break;
}
}
default { break; }
}
}
return left;
}
// Expression: Addition, Subtraction
pub fn parseExpr() mut: Ast {
let mut left = self.parseTerm();
while true {
match self.cur {
case .Op( op ) {
if op == "+" or op == "-" {
self.next();
let right = self.parseTerm();
left = .Bin( op, left, right );
} else {
break;
}
}
default { break; }
}
}
return left;
}
}
Step 5: Evaluation and Execution
Finally, create a function that recursively processes the generated AST to obtain calculation results. Using
the match expression allows for concise writing.
fn eval( node: Ast ): int {
match node {
case .Val( val ) {
return val;
}
case .Bin( op, left, right ) {
let l = eval( left );
let r = eval( right );
switch op {
case "+" { return l + r; }
case "-" { return l - r; }
case "*" { return l * r; }
case "/" { return l / r; }
}
}
}
return 0;
}
// Main process
let input = "1 + 2 * (3 + 4)";
let mut parser = new Parser( input );
let ast = parser.parseExpr();
print( "%s = %d" ( input, eval( ast ) ) );
Executing this outputs 1 + 2 * (3 + 4) = 15!
Complete Code
The following is the combined code for all steps.
alge Token {
Num( val: int ),
Op( val: str ),
Eof,
}
class Lexer {
let text: str;
let len: int;
let mut pos: int;
pub fn __init( text:str ) {
self.text = text;
self.len = #text;
self.pos = 1;
}
fn getChar(): int {
if self.pos > self.len { return 0; }
return unwrap self.text.byte( self.pos ) default 0;
}
pub fn next() mut: Token {
while self.getChar() == ? {
self.pos = self.pos + 1;
}
let ch = self.getChar();
if ch == 0 { return .Eof; }
if ch >= ?0 and ch <= ?9 {
let mut numVal = 0;
while true {
let cur = self.getChar();
if cur < ?0 or cur > ?9 { break; }
numVal = numVal * 10 + ( cur - ?0 );
self.pos = self.pos + 1;
}
return Token.Num( numVal );
}
self.pos = self.pos + 1;
return Token.Op( "%c" ( ch ) );
}
}
alge Ast {
Val( val: int ),
Bin( op: str, left: Ast, right: Ast ),
}
class Parser {
let mut lexer: Lexer;
let mut cur: Token;
pub fn __init( text: str ) {
self.lexer = new Lexer( text );
self.cur = self.lexer.next();
}
fn next() mut {
self.cur = self.lexer.next();
}
fn parseFactor() mut: Ast {
match self.cur {
case .Num( val ) {
self.next();
return .Val( val );
}
case .Op( op ) {
if op == "(" {
self.next();
let node = self.parseExpr();
match self.cur {
case .Op( nextOp ) {
if nextOp == ")" {
self.next();
return node;
}
}
default {}
}
print( "Error: missing )" );
return node;
}
}
default {
print( "Error: unexpected token" );
}
}
return .Val( 0 );
}
fn parseTerm() mut: Ast {
let mut left = self.parseFactor();
while true {
match self.cur {
case .Op( op ) {
if op == "*" or op == "/" {
self.next();
let right = self.parseFactor();
left = .Bin( op, left, right );
} else {
break;
}
}
default { break; }
}
}
return left;
}
pub fn parseExpr() mut: Ast {
let mut left = self.parseTerm();
while true {
match self.cur {
case .Op( op ) {
if op == "+" or op == "-" {
self.next();
let right = self.parseTerm();
left = .Bin( op, left, right );
} else {
break;
}
}
default { break; }
}
}
return left;
}
}
fn eval( node: Ast ): int {
match node {
case .Val( val ) {
return val;
}
case .Bin( op, left, right ) {
let l = eval( left );
let r = eval( right );
switch op {
case "+" { return l + r; }
case "-" { return l - r; }
case "*" { return l * r; }
case "/" { return l / r; }
}
}
}
return 0;
}
// Main process
let input = "1 + 2 * (3 + 4)";
let mut parser = new Parser( input );
let ast = parser.parseExpr();
print( "%s = %d" ( input, eval( ast ) ) );