m grammar
/*
* grammar for mlang
* This is the actual grammar text consumed by self-developed LALR parser generator pgen
* to output a parsing table for mlang parser to parse m source code into ASTs.
* The right side of each production rule enclosed by curly parentheses is semantic action in
* generating AST tree when the rule is applied to reduce to left symbol of the grammar rule.
*/
/*
in semantic action, 100 index indicates a empty list/block node
*/
start = statements
unit_type = "None"
builtin_types = unit_type | "bool" | "int"
| "u8" | "u16" | "u32" | "u64"
| "i8" | "i16" | "i32" | "i64"
| "f32" | "f64" | "char" | "string"
type_item = builtin_types { type_item_node 0 0 } // 0 is built types
| array_type { type_item_node 2 0 } // 2 is array type
| tuple_type { type_item_node 3 0 } // 3 is tuple type
| '&' type_item { type_item_node 4 1 } // 4 is ref type
| "mut" type_item { mark_mut 1 1 1 } // 1 is action code for mark_mut for type node
| IDENT { type_item_node 1 0 } // 1 is type name
ident_type = IDENT ':' type_item { type_expr_item 1 0 2 }
type_expr_item = ident_type
| type_item { type_expr_item 0 0 }
var_name_type = ident_type { }
type_expr_base = type_expr_item
| '(' type_expr ')' { 1 }
type_expr_prod = type_expr_prod '*' type_expr_base { binop 0 1 2 }
| type_expr_base
type_expr = type_expr '|' type_expr_prod { binop 0 1 2 }
| type_expr_prod
type_def = "type" IDENT '=' type_expr { type_node 1 3 }
array_type = type_item array_dims { array_type 0 1 }
array_dims = '[' const_expr ']' { block 1 }
| array_dims '[' const_expr ']' { block 0 2 }
literal_expr = LITERAL_INT
| LITERAL_FLOAT
| LITERAL_CHAR
| LITERAL_STRING
| "True"
| "False"
primary_expr = literal_expr
| IDENT
| '(' expr ')' { 1 }
| '(' expr_list ')' { adt_init 1 1 } // first 1 indicating tuple type
postfix_expr = primary_expr
| postfix_expr '[' expr ']' { indexing 0 0 2 } //first zero indicates indexing with integer
| postfix_expr '.' IDENT { indexing 1 0 2 } // first one indicates indexing with named
| func_call
| postfix_expr "++" { unop 1 0 } // the disorder of operator and operand indicates a postfix op
| postfix_expr "--" { unop 1 0 }
power_expr = postfix_expr "**" unary_expr { binop 0 1 2 }
| postfix_expr
unary_expr = power_expr
| "++" unary_expr { unop 0 1 }
| "--" unary_expr { unop 0 1 }
| "|/" unary_expr { unop 0 1 }
| "not" unary_expr { unop 0 1 }
| [*&+-~] unary_expr { unop 0 1 }
| new_expr
| del_expr
del_expr = "del" IDENT { del_expr 1 }
cast_expr = unary_expr
| '(' type_item ')' cast_expr { cast 1 3 }
multi_expr = cast_expr
| multi_expr [*/%] cast_expr { binop 0 1 2 }
add_expr = add_expr [+-] multi_expr { binop 0 1 2 }
| multi_expr
shift_expr = add_expr
| shift_expr "<<" add_expr { binop 0 1 2 }
| shift_expr ">>" add_expr { binop 0 1 2 }
relation_expr = shift_expr
| relation_expr '<' shift_expr { binop 0 1 2 }
| relation_expr '>' shift_expr { binop 0 1 2 }
| relation_expr "<=" shift_expr { binop 0 1 2 }
| relation_expr ">=" shift_expr { binop 0 1 2 }
eq_expr = relation_expr
| eq_expr "==" relation_expr { binop 0 1 2 }
| eq_expr "!=" relation_expr { binop 0 1 2 }
bit_and_expr = eq_expr
| bit_and_expr '&' eq_expr { binop 0 1 2 }
bit_xor_expr = bit_and_expr
| bit_xor_expr '^' bit_and_expr { binop 0 1 2 }
bit_or_expr = bit_xor_expr
| bit_or_expr '|' bit_xor_expr { binop 0 1 2 }
logic_and_expr = bit_or_expr
| logic_and_expr "and" bit_or_expr { binop 0 1 2 }
logic_or_expr = logic_and_expr
| logic_or_expr "or" logic_and_expr { binop 0 1 2 }
cond_expr = logic_or_expr
| logic_or_expr '?' expr ':' cond_expr { if_cond 0 2 4 }
const_expr = cond_expr
assignee_expr = cond_expr
| initializer
initializer = array_init
| adt_init
new_expr = "new" new_expr_item { new_expr 1 }
new_expr_item = initializer
| type_item
assign_expr = assignee_expr
| unary_expr assign_op assign_expr { assign 0 1 2 }
assign_op = '='
| "*="
| "/="
| "%="
| "+="
| "-="
| "<<="
| ">>="
| "&="
| "^="
| "|="
expr = assign_expr
| match_expr
array_init = '[' array_init_decl ']' { array_init 1 }
| '[' ']' { array_init }
array_init_decl = range_expr
| for_stmt
| expr_list
expr_list = expr_list ',' expr { block 0 2 }
| expr { block 0 }
stmt = func_type
| func_def
| adt_type
| type_def
| expr
| var_decls
| for_stmt
| while_stmt
| cond_stmt
| block
| import_decl
| jump_stmt
jump_stmt = "continue" { jump 0 }
| "break" { jump 0 }
| "return" { jump 0 }
| "return" expr { jump 0 1 }
match_expr = "match" expr "with" NEWLINE match_block_lines { match_expr 1 4 }
match_block_lines = match_block_lines match_block_line { block 0 1 }
| match_block_line
match_block_line = '|' match_case { 1 }
| '|' match_case NEWLINE { 1 }
match_case = pattern "->" expr { match_case 0 2 }
| pattern "when" cond_expr "->" expr { match_case 0 2 4 }
pattern = IDENT //variable pattern
| '-' literal_expr { unop 0 1 }
| literal_expr //constant pattern
| '_' { wild_card }//wildcard pattern
// stmt and newline if it exists
statement = stmt
| stmt NEWLINE
block = NEWLINE INDENT statements DEDENT { 2 }
statements = statements statement { block 0 1 }
| statements ';' statement { block 0 2 }
| statement { block 0 }
adt_type = "struct" IDENT '=' struct_type { struct_type 1 3 }
| "variant" IDENT '=' variant_type { variant_type 1 3 }
struct_type = var_type_list
| NEWLINE INDENT field_decls_lines DEDENT { 2 }
field_decls_lines = field_decls_lines field_decls_line { block 0 1 }
| field_decls_line
field_decls_line = var_type_list
| var_type_list NEWLINE { 0 }
tuple_type = '(' tuple_fields ')' { 1 }
tuple_fields = tuple_fields ',' tuple_field { block 0 2 }
| tuple_field
tuple_field = var_type_decl
| type_item
variant_type = variant_items
| variant_type_block
variant_type_block = NEWLINE INDENT variant_item_lines DEDENT { 2 }
variant_item_lines = variant_item_lines variant_item_line { block 0 1 }
| variant_item_line
variant_item_line = '|' variant_item { 1 }
| '|' variant_item NEWLINE { 1 }
variant_items = variant_items '|' variant_item { block 0 2 }
| variant_item
variant_item = IDENT '(' type_list ')' { variant_type_item 0 0 2 } //tagged union
| ident_type { variant_type_item 1 0 } //untagged union
| IDENT { variant_type_item 2 0 } //enum
| IDENT '=' LITERAL_INT { variant_type_item 3 0 2 } //enum
type_list = type_list ',' type_expr_item { block 0 2 }
| type_expr_item
adt_init = IDENT '{' expr_list '}' { adt_init 0 0 2 } // first 0 indicating struct type
| '{' expr_list '}' { adt_init 0 1 } // first 0 indicating struct type
func_type = "fun" IDENT func_param_decls "->" type_item { func_type 1 4 2 }
import_decl = "from" IDENT "import" memory_decl { import_decl 1 3 }
| "from" IDENT "import" func_type { import_decl 1 3 }
| "from" IDENT "import" var_type_decl { import_decl 1 3 }
memory_decl = "memory" LITERAL_INT { memory_decl 1 }
| "memory" LITERAL_INT ',' LITERAL_INT { memory_decl 1 3 }
block_def = ':' statement { 1 }
func_def = "def" IDENT func_param_decls block_def { function 1 2 3 }
| "def" IDENT func_param_decls "->" type_item block_def { function 1 2 5 4 }
func_call = IDENT call_arg_exprs { call 0 1 }
call_arg_exprs = '(' arg_exprs ')' { 1 }
| '(' ')' { block } // empty block
arg_exprs = arg_exprs ',' expr { block 0 2 }
| expr { block 0 }
cond_stmt = "if" expr block_def { if_cond 1 2 }
| "if" expr block_def elif_stmt { if_cond 1 2 3 }
| "if" expr block_def else_stmt { if_cond 1 2 3 }
elif_stmt = "elif" expr block_def { if_cond 1 2 }
| "elif" expr block_def elif_stmt { if_cond 1 2 3 }
| "elif" expr block_def else_stmt { if_cond 1 2 3 }
else_stmt = "else" block_def { 1 }
for_stmt = "for" param_decl "in" range_expr block_def { for_loop 1 3 4 }
while_stmt = "while" expr block_def { while_loop 1 2 }
range_expr = expr ".." expr ".." expr { range 0 2 4}
| expr ".." expr { range 0 2 }
func_param_decls = '(' param_decls ')' { 1 }
| '(' ')' { block } // empty block
param_decls = param_decls ',' param_decl { block 0 2 }
| param_decl { block 0 }
param_decl = var_decl
| "..."
//first 0 indicating to mark variable, second value indicate mutablility
var_decls = "let" var_decl_inits { mark_mut 0 0 1 }
| "let" var_type_list { mark_mut 0 0 1 }
| "let" "mut" var_decl_inits { mark_mut 0 1 2 }
| "let" "mut" var_type_list { mark_mut 0 1 2 }
var_decl_inits = var_decl_inits ',' var_decl_init { block 0 2 }
| var_decl_init
var_decl_list = var_decl_list ',' var_decl { block 0 2 }
| var_decl
var_type_list = var_type_list ',' var_type_decl { block 0 2 }
| var_type_decl
var_type_decl = ident_type { variable 1 0 } // var name:type, no initial value
var_decl = var_type_decl
| IDENT { variable 0 0 } // variable name only
var_decl_init = var_decl_list '=' expr_block { variable 2 0 2 } // var with init value
expr_block = expr
| NEWLINE INDENT expr DEDENT { 2 }