/* Copyright (C) 2019, 2020 Leo Tenenbaum. This file is part of toc. toc is distributed under version 3 of the GNU General Public License, without any warranty whatsoever. You should have received a copy of the GNU General Public License along with toc. If not, see . */ /* NOTE: make sure you edit copy.c and package.c and cgen_recurse_subexprs/types when you make a change to expression-related types or type-related types in this file! */ typedef long double Floating; /* OPTIM: Switch to double, but make sure floating-point literals are right */ #if __STDC_VERSION__ < 201112 /* try to find the type with the strictest alignment */ typedef union { long double floating; void *ptr; #if __STDC_VERSION__ >= 199901 long #endif long integer; void (*fn_ptr)(void); } MaxAlign; #else typedef max_align_t MaxAlign; #endif typedef uint8_t U8; #define U8_MAX UINT8_MAX typedef uint16_t U16; #define U16_MAX UINT16_MAX typedef uint32_t U32; #define U32_MAX UINT32_MAX typedef uint64_t U64; #define U64_MAX UINT64_MAX #define U8_FMT "%" PRIu8 #define U16_FMT "%" PRIu16 #define U32_FMT "%" PRIu32 #define U64_FMT "%" PRIu64 #if __STDC_VERSION__ >= 199901 #include #elif defined __cplusplus #else typedef U8 bool; #endif typedef int8_t I8; #define I8_MAX INT8_MAX typedef int16_t I16; #define I16_MAX INT16_MAX typedef int32_t I32; #define I32_MAX INT32_MAX typedef int64_t I64; #define I64_MAX INT64_MAX #define I8_FMT "%" PRId8 #define I16_FMT "%" PRId16 #define I32_FMT "%" PRId32 #define I64_FMT "%" PRId64 /* NOTE: if you change these, make sure you change hash_tables.c */ typedef float F32; typedef double F64; #define F32_MANT_DIG FLT_MANT_DIG #define F32_DIG FLT_DIG #define F64_MANT_DIG DBL_MANT_DIG #define F64_DIG DBL_DIG #define F32_FMT "%.16f" #define F64_FMT "%.16f" typedef U32 IdentID; /* identifier ID for cgen (anonymous variables). not to be confused with IdentTree.id */ /* for keeping track of whence something came */ #ifdef TOC_DEBUG #define SOURCE_LOCATION char *src_file; int src_line; #define SOURCE_LOCATION_PARAMS char *src_file, int src_line, #define DEBUG_UNDERSCORE(x) x##_ #else #define SOURCE_LOCATION #define SOURCE_LOCATION_PARAMS #define DEBUG_UNDERSCORE(x) x #endif typedef struct Location { U32 line; U32 pos; /* position in ctx->str */ struct ErrCtx *ctx; } Location; typedef struct ErrCtx { const char *filename; char *str; /* file contents */ bool enabled; Location *instance_stack; /* stack of locations which generate the instances we're dealing with */ } ErrCtx; typedef struct Page { struct Page *next; size_t used; /* number MaxAligns used, not bytes */ MaxAlign data[]; } Page; typedef struct Allocator { Page *first; Page *last; } Allocator; typedef struct ArrBlock { void *data; size_t n; /* number of things in this block so far */ void *last; /* last one of them */ } ArrBlock; typedef struct BlockArr { size_t item_sz; int lg_block_sz; ArrBlock *blocks; } BlockArr; typedef struct HashTable { void *data; bool *occupied; U64 n; U64 cap; } HashTable; typedef struct Slice { I64 n; void *data; } Slice; typedef union Value { U8 u8; U16 u16; U32 u32; U64 u64; I8 i8; I16 i16; I32 i32; I64 i64; bool boolv; char charv; float f32; double f64; struct FnExpr *fn; void *arr; void *ptr; void *struc; union Value *tuple; Slice slice; struct Type *type; struct Package *pkg; } Value; enum { IDECL_HAS_VAL = 0x01, }; typedef enum { IDECL_DECL, IDECL_EXPR } IdentDeclKind; typedef struct IdentDecl { union { struct Declaration *decl; struct Expression *expr; /* for example, this identifier is declared in an each expression */ }; struct Block *scope; /* NULL for file scope */ Value val; SOURCE_LOCATION IdentDeclKind kind; U16 flags; } IdentDecl; /* The way you search an identifier in a tree is: root.children[low part of 1st char].children[high part of 1st char] .children[low part of 2nd char]... */ #define TREE_NCHILDREN 16 typedef struct IdentTree { /* zero value is an empty trie */ uint16_t depth; unsigned char index_in_parent; /* index of this in .parent.children */ bool export_name; /* is this identifier's name important? */ U64 export_id; /* 0 if there's no exported identifier here, otherwise unique positive integer associated with this identifier */ struct Package *pkg; /* NULL if this is not associated with a package */ struct IdentTree *parent; struct IdentTree *children[TREE_NCHILDREN]; IdentDecl *decls; /* array of declarations of this identifier */ } IdentTree; typedef IdentTree *Identifier; typedef struct Identifiers { BlockArr trees; IdentTree *root; U64 nidents; } Identifiers; typedef enum { TOKEN_KW, TOKEN_IDENT, TOKEN_DIRECT, TOKEN_LITERAL_NUM, TOKEN_LITERAL_CHAR, TOKEN_LITERAL_STR, TOKEN_EOF } TokenKind; typedef enum { DIRECT_C, DIRECT_SIZEOF, DIRECT_ALIGNOF, DIRECT_EXPORT, DIRECT_COUNT } Directive; typedef enum { KW_SEMICOLON, KW_COLON, KW_COMMA, KW_LPAREN, KW_RPAREN, KW_LBRACE, KW_RBRACE, KW_LSQUARE, KW_RSQUARE, KW_EQ_EQ, KW_PLUS_EQ, KW_MINUS_EQ, KW_ASTERISK_EQ, KW_SLASH_EQ, KW_NE, KW_LE, KW_LT, KW_GE, KW_GT, KW_PLUS, KW_MINUS, KW_ASTERISK, KW_EXCLAMATION, KW_AMPERSAND, KW_SLASH, KW_DOTDOT, KW_DOT, KW_EQ, KW_LAST_SYMBOL = KW_EQ, /* last one entirely consisting of symbols */ KW_IF, KW_ELIF, KW_ELSE, KW_WHILE, KW_EACH, KW_RETURN, KW_FN, KW_AS, KW_NEW, KW_DEL, KW_STRUCT, KW_INT, KW_I8, KW_I16, KW_I32, KW_I64, KW_U8, KW_U16, KW_U32, KW_U64, KW_FLOAT, KW_F32, KW_F64, KW_TYPE, KW_CHAR, KW_BOOL, KW_TRUE, KW_FALSE, KW_PKG, KW_COUNT } Keyword; typedef enum { NUM_LITERAL_INT, NUM_LITERAL_FLOAT } NumLiteralKind; typedef struct NumLiteral { NumLiteralKind kind; union { U64 intval; Floating floatval; }; } NumLiteral; typedef struct StrLiteral { char *str; size_t len; } StrLiteral; /* NOTE: Location is typedef'd in util/err.c */ typedef struct Token { TokenKind kind; Location where; union { Keyword kw; Directive direct; Identifier ident; NumLiteral num; char chr; StrLiteral str; }; } Token; typedef struct Tokenizer { Allocator *allocr; Token *tokens; char *s; /* string being parsed */ ErrCtx *err_ctx; U32 line; Token *token; /* token currently being processed */ Identifiers *idents; } Tokenizer; typedef enum { TYPE_VOID, TYPE_UNKNOWN, TYPE_BUILTIN, TYPE_FN, TYPE_TUPLE, TYPE_ARR, TYPE_PTR, TYPE_SLICE, TYPE_TYPE, TYPE_PKG, TYPE_EXPR, /* just use this expression as the type. this kind of type doesn't exist after resolving. */ TYPE_STRUCT #define TYPE_COUNT (TYPE_STRUCT+1) } TypeKind; typedef enum { BUILTIN_I8, BUILTIN_I16, BUILTIN_I32, BUILTIN_I64, BUILTIN_U8, BUILTIN_U16, BUILTIN_U32, BUILTIN_U64, BUILTIN_F32, BUILTIN_F64, BUILTIN_CHAR, BUILTIN_BOOL } BuiltinType; /* field of a struct */ typedef struct Field { Identifier name; struct Type *type; size_t offset; /* offset during compile time */ } Field; typedef U8 Constness; #define CONSTNESS_NO ((Constness)0) #define CONSTNESS_SEMI ((Constness)1) #define CONSTNESS_YES ((Constness)2) typedef struct FnType { struct Type *types; /* dynamic array [0] = ret_type, [1:] = param_types */ Constness *constness; /* [i] = constness of param #i. iff no parameters are constant, this is NULL. don't use it as a dynamic array, because eventually it might not be. */ } FnType; enum { STRUCT_DEF_FOUND_OFFSETS = 0x01, STRUCT_DEF_CGENERATED = 0x02, }; typedef struct { Field *fields; Location where; U16 flags; size_t size; /* size of this struct during compile time */ size_t align; struct { Identifier name; IdentID id; } c; struct { U32 id; /* (index into exptr->exported_structs) + 1, or 0 if hasn't been exported */ } export; } StructDef; enum { TYPE_IS_FLEXIBLE = 0x01, TYPE_IS_RESOLVED = 0x02, }; typedef U16 TypeFlags; typedef struct Type { Location where; TypeKind kind; TypeFlags flags; struct Expression *was_expr; /* if non-NULL, indicates that this type used to be an expression (TYPE_EXPR) */ union { BuiltinType builtin; FnType fn; struct Type *tuple; struct { struct Type *of; union { U64 n; /* after resolving */ struct Expression *n_expr; /* before resolving */ }; } arr; struct Type *ptr; struct Type *slice; StructDef *struc; /* it's a pointer so that multiple Types can reference the same struct definition */ struct Expression *expr; }; } Type; enum { BLOCK_IS_FN = 0x01, BLOCK_FOUND_TYPES = 0x02, }; typedef struct Block { U16 flags; Location start; Location end; struct Statement *stmts; struct Expression *ret_expr; /* the return expression of this block, e.g. {foo(); 3} => 3 NULL for no expression. */ } Block; typedef enum { EXPR_LITERAL_FLOAT, EXPR_LITERAL_INT, EXPR_LITERAL_STR, EXPR_LITERAL_BOOL, EXPR_LITERAL_CHAR, EXPR_IDENT, /* variable or constant */ EXPR_BINARY_OP, EXPR_UNARY_OP, EXPR_IF, EXPR_WHILE, EXPR_EACH, EXPR_FN, EXPR_CAST, EXPR_NEW, EXPR_CALL, EXPR_BLOCK, EXPR_TUPLE, EXPR_C, EXPR_DSIZEOF, EXPR_DALIGNOF, EXPR_SLICE, EXPR_TYPE, EXPR_PKG, /* a value (it's useful to have this). right now they don't work with cgen_set_tuple (as of yet, that is unneeded) */ EXPR_VAL } ExprKind; typedef enum { UNARY_MINUS, UNARY_ADDRESS, /* &x */ UNARY_DEREF, /* *x */ UNARY_NOT, /* !x */ UNARY_DEL, /* del x */ UNARY_LEN /* x.len ; replaces BINARY_DOT len when typing */ } UnaryOp; typedef enum { BINARY_SET, /* e.g. x = y */ BINARY_ADD, BINARY_SUB, BINARY_MUL, BINARY_DIV, BINARY_SET_ADD, /* e.g. x += y */ BINARY_SET_SUB, BINARY_SET_MUL, BINARY_SET_DIV, BINARY_GT, BINARY_LT, BINARY_GE, BINARY_LE, BINARY_EQ, BINARY_NE, BINARY_AT_INDEX, /* e.g. x[i] */ BINARY_DOT } BinaryOp; typedef struct CallExpr { struct Expression *fn; union { struct Argument *args; struct Expression *arg_exprs; }; struct Instance *instance; /* NULL = ordinary function, no compile time args */ struct { IdentID id; } c; } CallExpr; typedef struct IfExpr { struct Expression *cond; /* NULL = this is an else */ struct Expression *next_elif; /* next elif/else of this statement */ struct { IdentID id; } c; Block body; } IfExpr; typedef struct WhileExpr { struct Expression *cond; struct { IdentID id; } c; Block body; } WhileExpr; enum { EACH_IS_RANGE = 0x01, EACH_ANNOTATED_TYPE = 0x02, }; typedef struct EachExpr { U8 flags; struct { IdentID id; } c; Type type; Identifier index; /* NULL = no index */ Identifier value; /* NULL = no value */ Block body; union { struct { struct Expression *from; /* can't be null */ struct Expression *to; /* can be null */ union { /* (either) can be null */ struct Expression *step; /* before typing */ Value *stepval; /* after typing */ }; } range; struct Expression *of; }; } EachExpr; typedef struct FnExpr { struct Declaration *params; /* declarations of the parameters to this function */ struct Declaration *ret_decls; /* array of decls, if this has named return values. otherwise, NULL */ Type ret_type; Block body; HashTable instances; /* for fns with constant parameters. the key is a tuple where the first element is a u64 value whose ith bit (1<exported_fns) + 1, or 0 if this function has not been added to the exporting array yet */ } export; struct { /* if name = NULL, this is an anonymous function, and id will be the ID of the fn. */ Identifier name; IdentID id; } c; } FnExpr; /* an expression such as fn(x: int) int { 2 * x } */ typedef struct Instance { Value val; /* key into hash table */ FnExpr fn; /* the typed function */ struct { U64 id; } c; } Instance; typedef struct CastExpr { struct Expression *expr; Type type; } CastExpr; typedef struct NewExpr { Type type; struct Expression *n; /* e.g. for new(int, 5) */ } NewExpr; typedef struct SliceExpr { struct Expression *of; /* required */ struct Expression *from; /* optional */ struct Expression *to; /* optional */ struct { IdentID id; } c; } SliceExpr; enum { EXPR_FOUND_TYPE = 0x01 }; typedef struct Expression { Type type; Location where; ExprKind kind; U16 flags; union { Floating floatl; /* Floating floatl; */ U64 intl; StrLiteral strl; bool booll; char charl; struct { UnaryOp op; struct Expression *of; } unary; struct { BinaryOp op; struct Expression *lhs; struct Expression *rhs; Field *field; /* for . only */ } binary; CallExpr call; struct { struct Expression *code; } c; struct { struct Expression *of; } dsizeof; /* #sizeof directive */ struct { struct Expression *of; } dalignof; /* #alignof directive */ Identifier ident; NewExpr new; struct { Type type; } del; union { struct Expression *name_expr; Identifier name_ident; } pkg; IfExpr if_; WhileExpr while_; EachExpr *each; FnExpr *fn; CastExpr cast; SliceExpr slice; struct { Block block; IdentID block_ret_id; }; struct Expression *tuple; Type typeval; struct { Value val; IdentID val_c_id; }; }; } Expression; typedef struct Argument { Location where; Identifier name; /* NULL = no name */ Expression val; } Argument; enum { DECL_ANNOTATES_TYPE = 0x0001, DECL_IS_CONST = 0x0002, DECL_SEMI_CONST = 0x0004, DECL_HAS_EXPR = 0x0008, DECL_FOUND_TYPE = 0x0010, DECL_ERRORED_ABOUT_SELF_REFERENCE = 0x0020, /* has there been an error about this decl referencing itself? */ DECL_FOUND_VAL = 0x0040, DECL_IS_PARAM = 0x0080, DECL_INFER = 0x0100, /* infer the value (e.g. fn(t::Type=, x:t)) */ DECL_EXPORT = 0x0200 }; typedef U16 DeclFlags; /* OPTIM: Instead of using dynamic arrays, do two passes. */ typedef struct Declaration { Location where; Identifier *idents; Type type; DeclFlags flags; Expression expr; Value val; /* only for constant decls. */ } Declaration; typedef enum { STMT_DECL, STMT_EXPR, STMT_RET } StatementKind; enum { RET_HAS_EXPR = 0x01, }; typedef struct Return { U8 flags; /* if this changes, go to package.c */ Expression expr; } Return; enum { STMT_EXPR_NO_SEMICOLON = 0x01, }; typedef struct Statement { Location where; StatementKind kind; U16 flags; union { Declaration decl; Expression expr; Return ret; }; } Statement; typedef struct ParsedFile { Statement *stmts; Expression *pkg_name; } ParsedFile; typedef struct Parser { Tokenizer *tokr; Allocator *allocr; Block *block; /* which block are we in? NULL = file scope */ ParsedFile *file; } Parser; typedef enum { DECL_END_SEMICOLON, DECL_END_RPAREN_COMMA, DECL_END_LBRACE_COMMA } DeclEndKind; typedef struct Evaluator { Allocator *allocr; struct Typer *typer; bool returning; Value ret_val; void **to_free; /* an array of data to free for this scope. */ bool enabled; } Evaluator; typedef struct Typer { Allocator *allocr; Evaluator *evalr; Identifiers *idents; struct Exporter *exptr; Expression **in_expr_decls; /* an array of expressions whose declarations (e.g. each **x := foo**) we are currently inside */ Declaration **in_decls; /* array of declarations we are currently inside */ Block *block; Block **blocks; /* dyn array of all the block's we're in ([0] = NULL for global scope) */ FnExpr *fn; /* the function we're currently parsing. */ char *pkg_name; } Typer; typedef struct Package { Identifier name; Identifiers idents; } Package; typedef struct Exporter { FILE *out; /* .top (toc package) to output to */ bool export_locations; bool started; U64 ident_id; FnExpr **exported_fns; StructDef **exported_structs; Identifier *exported_idents; /* (only those whose names are exported) */ const char *code; } Exporter; typedef struct CGenerator { Allocator *allocr; FILE *outc; IdentID ident_counter; int indent_lvl; /* how many levels of indentation? */ bool will_indent; /* will the next thing be indented? */ ParsedFile *file; Block *block; FnExpr *fn; /* which function are we in? (NULL for none) - not used during decls */ Evaluator *evalr; Exporter *exptr; Identifier main_ident; Identifiers *idents; } CGenerator; #ifdef TOC_DEBUG #define add_ident_decls(b, d, flags) add_ident_decls_(__FILE__, __LINE__, b, d, flags) #endif