m grammar

/*
 * grammar for mlang
 * This is the actual grammar text consumed by self-developed LALR parser generator pgen 
 * to output a parsing table for mlang parser to parse m source code into ASTs. 
 * The right side of each production rule enclosed by curly parentheses is semantic action in 
 * generating AST tree when the rule is applied to reduce to left symbol of the grammar rule.
*/

/*
in semantic action, 100 index indicates a empty list/block node
*/

start           = statements                                                               

unit_type       = "None"

builtin_types   = unit_type | "bool" | "int" 
                | "u8" | "u16" | "u32" | "u64" 
                | "i8" | "i16" | "i32" | "i64" 
                | "f32" | "f64" | "char" | "string"

type_item       = builtin_types                                                             { type_item_node 0 0 } // 0 is built types
                | array_type                                                                { type_item_node 2 0 } // 2 is array type
                | tuple_type                                                                { type_item_node 3 0 } // 3 is tuple type
                | '&' type_item                                                             { type_item_node 4 1 } // 4 is ref type
                | "mut" type_item                                                           { mark_mut 1 1 1 } // 1 is action code for mark_mut for type node
                | IDENT                                                                     { type_item_node 1 0 } // 1 is type name

ident_type      = IDENT ':' type_item                                                       { type_expr_item 1 0 2 }

type_expr_item  = ident_type
                | type_item                                                                 { type_expr_item 0 0 }

var_name_type   = ident_type                                                                { }

type_expr_base  = type_expr_item
                | '(' type_expr ')'                                                         { 1 }

type_expr_prod  = type_expr_prod '*' type_expr_base                                         { binop 0 1 2 }                               
                | type_expr_base

type_expr       = type_expr '|' type_expr_prod                                              { binop 0 1 2 }
                | type_expr_prod

type_def        = "type" IDENT '=' type_expr                                                { type_node 1 3 }

array_type      = type_item array_dims                                                      { array_type 0 1 }

array_dims      = '[' const_expr ']'                                                        { block 1 }
                | array_dims '[' const_expr ']'                                             { block 0 2 }

literal_expr    = LITERAL_INT                                                                 
                | LITERAL_FLOAT                                                               
                | LITERAL_CHAR                                                                
                | LITERAL_STRING                                                              
                | "True"                                                              
                | "False"                                                             

primary_expr    = literal_expr                                             
                | IDENT                                                               
                | '(' expr ')'                                                              { 1 }
                | '(' expr_list ')'                                                         { adt_init 1 1 } // first 1 indicating tuple type

postfix_expr    = primary_expr
                | postfix_expr '[' expr ']'                                                 { indexing 0 0 2 } //first zero indicates indexing with integer
                | postfix_expr '.' IDENT                                                    { indexing 1 0 2 } // first one indicates indexing with named
                | func_call
                | postfix_expr "++"                                                         { unop 1 0 } // the disorder of operator and operand indicates a postfix op
                | postfix_expr "--"                                                         { unop 1 0 }

power_expr      = postfix_expr "**" unary_expr                                              { binop 0 1 2 } 
                | postfix_expr                                                        

unary_expr      = power_expr                                                          
                | "++" unary_expr                                                           { unop 0 1 }
                | "--" unary_expr                                                           { unop 0 1 }
                | "|/" unary_expr                                                           { unop 0 1 }
                | "not" unary_expr                                                          { unop 0 1 }
                | [*&+-~] unary_expr                                                        { unop 0 1 }
                | new_expr
                | del_expr

del_expr        = "del" IDENT                                                               { del_expr 1 }

cast_expr       = unary_expr
                | '(' type_item ')' cast_expr                                               { cast 1 3 }

multi_expr      = cast_expr
                | multi_expr [*/%] cast_expr                                                { binop 0 1 2 } 

add_expr        = add_expr [+-] multi_expr                                                  { binop 0 1 2 } 
                | multi_expr

shift_expr      = add_expr
                | shift_expr "<<"  add_expr                                                 { binop 0 1 2 }
                | shift_expr ">>"  add_expr                                                 { binop 0 1 2 }

relation_expr   = shift_expr                                                            
                | relation_expr '<' shift_expr                                              { binop 0 1 2 }
                | relation_expr '>' shift_expr                                              { binop 0 1 2 }
                | relation_expr "<=" shift_expr                                             { binop 0 1 2 }
                | relation_expr ">=" shift_expr                                             { binop 0 1 2 }

eq_expr         = relation_expr
                | eq_expr "==" relation_expr                                                { binop 0 1 2 }
                | eq_expr "!=" relation_expr                                                { binop 0 1 2 }

bit_and_expr    = eq_expr                                                       
                | bit_and_expr '&' eq_expr                                                  { binop 0 1 2 }

bit_xor_expr    = bit_and_expr                                                        
                | bit_xor_expr '^' bit_and_expr                                             { binop 0 1 2 }

bit_or_expr     = bit_xor_expr                                                        
                | bit_or_expr '|' bit_xor_expr                                              { binop 0 1 2 }

logic_and_expr  = bit_or_expr
                | logic_and_expr "and" bit_or_expr                                           { binop 0 1 2 }

logic_or_expr   = logic_and_expr
                | logic_or_expr "or" logic_and_expr                                         { binop 0 1 2 }

cond_expr       = logic_or_expr
                | logic_or_expr '?' expr ':' cond_expr                                      { if_cond 0 2 4 }

const_expr      = cond_expr

assignee_expr   = cond_expr
                | initializer

initializer     = array_init
                | adt_init

new_expr        = "new" new_expr_item                                                       { new_expr 1 } 

new_expr_item   = initializer
                | type_item

assign_expr     = assignee_expr
                | unary_expr assign_op  assign_expr                                         { assign 0 1 2 }

assign_op       = '='
                | "*="
                | "/="
                | "%="
                | "+="
                | "-="
                | "<<="
                | ">>="
                | "&="
                | "^="
                | "|="

expr            = assign_expr
                | match_expr

array_init      = '[' array_init_decl ']'                                                   { array_init 1 }
                | '[' ']'                                                                   { array_init }

array_init_decl = range_expr
                | for_stmt
                | expr_list

expr_list       = expr_list ',' expr                                                        { block 0 2 }
                | expr                                                                      { block 0 }

stmt            = func_type
                | func_def                                                            
                | adt_type
                | type_def                                                            
                | expr                                                                
                | var_decls                                                           
                | for_stmt                   
                | while_stmt
                | cond_stmt    
                | block
                | import_decl
                | jump_stmt

jump_stmt       = "continue"                                                                { jump 0 }
                | "break"                                                                   { jump 0 }
                | "return"                                                                  { jump 0 }
                | "return" expr                                                             { jump 0 1 }

match_expr      = "match" expr "with" NEWLINE match_block_lines                             { match_expr 1 4 }

match_block_lines   = match_block_lines match_block_line                                    { block 0 1 }
                    | match_block_line                                                           

match_block_line    = '|' match_case                                                        { 1 }
                    | '|' match_case NEWLINE                                                { 1 }

match_case      = pattern  "->" expr                                                        { match_case 0 2 }
                | pattern "when" cond_expr "->" expr                                        { match_case 0 2 4 }

pattern         = IDENT              //variable pattern     
                | '-' literal_expr                                                          { unop 0 1 }
                | literal_expr       //constant pattern
                | '_'                                                                       { wild_card }//wildcard pattern                     

// stmt and newline if it exists
statement       = stmt
                | stmt NEWLINE

block           = NEWLINE INDENT statements DEDENT                                          { 2 }
statements      = statements statement                                                      { block 0 1 }
                | statements ';' statement                                                  { block 0 2 }
                | statement                                                                 { block 0 }                                                     

adt_type            = "struct" IDENT '=' struct_type                                        { struct_type 1 3 }
                    | "variant" IDENT '=' variant_type                                      { variant_type 1 3 }

struct_type         = var_type_list
                    | NEWLINE INDENT field_decls_lines DEDENT                               { 2 }

field_decls_lines   = field_decls_lines field_decls_line                                    { block 0 1 }
                    | field_decls_line

field_decls_line    = var_type_list 
                    | var_type_list NEWLINE                                                 { 0 }

tuple_type      = '(' tuple_fields ')'                                                      { 1 }                                            

tuple_fields    = tuple_fields ',' tuple_field                                              { block 0 2 }
                | tuple_field

tuple_field     = var_type_decl
                | type_item

variant_type        = variant_items                  
                    | variant_type_block

variant_type_block  = NEWLINE INDENT variant_item_lines DEDENT                              { 2 }
variant_item_lines  = variant_item_lines variant_item_line                                  { block 0 1 }
                    | variant_item_line                                                           

variant_item_line   = '|' variant_item                                                      { 1 }
                    | '|' variant_item NEWLINE                                              { 1 }

variant_items   = variant_items '|' variant_item                                            { block 0 2 }
                | variant_item

variant_item    = IDENT '(' type_list ')'                                                   { variant_type_item 0 0 2 } //tagged union
                | ident_type                                                                { variant_type_item 1 0 }   //untagged union
                | IDENT                                                                     { variant_type_item 2 0 }   //enum
                | IDENT '=' LITERAL_INT                                                     { variant_type_item 3 0 2 } //enum

type_list       = type_list ',' type_expr_item                                              { block 0 2 }    
                | type_expr_item

adt_init        = IDENT '{' expr_list '}'                                                   { adt_init 0 0 2 } // first 0 indicating struct type
                | '{' expr_list '}'                                                         { adt_init 0 1 } // first 0 indicating struct type

func_type       = "fun" IDENT func_param_decls "->" type_item                               { func_type 1 4 2 }

import_decl     = "from" IDENT "import" memory_decl                                         { import_decl 1 3 }
                | "from" IDENT "import" func_type                                           { import_decl 1 3 }    
                | "from" IDENT "import" var_type_decl                                       { import_decl 1 3 }

memory_decl     = "memory" LITERAL_INT                                                      { memory_decl 1 }
                | "memory" LITERAL_INT ',' LITERAL_INT                                      { memory_decl 1 3 }

block_def       = ':' statement                                                             { 1 }

func_def        = "def" IDENT func_param_decls block_def                                    { function 1 2 3 }
                | "def" IDENT func_param_decls "->" type_item block_def                     { function 1 2 5 4 }

func_call       = IDENT call_arg_exprs                                                      { call 0 1 }

call_arg_exprs  = '(' arg_exprs ')'                                                         { 1 }
                | '(' ')'                                                                   { block } // empty block

arg_exprs       = arg_exprs ',' expr                                                        { block 0 2 }
                | expr                                                                      { block 0 }

cond_stmt       = "if" expr block_def                                                       { if_cond 1 2 }              
                | "if" expr block_def elif_stmt                                             { if_cond 1 2 3 }
                | "if" expr block_def else_stmt                                             { if_cond 1 2 3 }

elif_stmt       = "elif" expr block_def                                                     { if_cond 1 2 }
                | "elif" expr block_def elif_stmt                                           { if_cond 1 2 3 }
                | "elif" expr block_def else_stmt                                           { if_cond 1 2 3 }

else_stmt       = "else" block_def                                                          { 1 }

for_stmt        = "for" param_decl "in" range_expr block_def                                { for_loop 1 3 4 }

while_stmt      = "while" expr block_def                                                    { while_loop 1 2 }

range_expr      =  expr ".." expr ".." expr                                                 { range 0 2 4}
                |  expr ".." expr                                                           { range 0 2 }


func_param_decls = '(' param_decls ')'                                                      { 1 }
                 | '(' ')'                                                                  { block } // empty block

param_decls     = param_decls ',' param_decl                                                { block 0 2 }
                | param_decl                                                                { block 0 }

param_decl      = var_decl
                | "..."

//first 0 indicating to mark variable, second value indicate mutablility
var_decls       = "let" var_decl_inits                                                      { mark_mut 0 0 1 }
                | "let" var_type_list                                                       { mark_mut 0 0 1 }
                | "let" "mut" var_decl_inits                                                { mark_mut 0 1 2 }
                | "let" "mut" var_type_list                                                 { mark_mut 0 1 2 }

var_decl_inits  = var_decl_inits ',' var_decl_init                                          { block 0 2 }
                | var_decl_init

var_decl_list   = var_decl_list ',' var_decl                                                { block 0 2 }
                | var_decl

var_type_list   = var_type_list ',' var_type_decl                                           { block 0 2 }
                | var_type_decl

var_type_decl   = ident_type                                                                { variable 1 0 }   // var name:type, no initial value

var_decl        = var_type_decl
                | IDENT                                                                     { variable 0 0 }   // variable name only

var_decl_init   = var_decl_list '=' expr_block                                              { variable 2 0 2 } // var with init value

expr_block      = expr
                | NEWLINE INDENT expr DEDENT                                                { 2 }