/* Copyright (C) 2019 Leo Tenenbaum. This file is part of toc. toc is distributed under version 3 of the GNU General Public License, without any warranty whatsoever. You should have received a copy of the GNU General Public License along with toc. If not, see . */ /* NOTE: make sure you edit copy.c when you make a change to expression-related types or type-related types in this file! */ typedef int64_t Integer; typedef uint64_t UInteger; typedef long double Floating; /* OPTIM: Switch to double, but make sure floating-point literals are right */ #if __STDC_VERSION__ < 201112 /* try to find the type with the strictest alignment */ typedef union { long double floating; void *ptr; #if __STDC_VERSION__ >= 199901 long #endif long integer; void (*fn_ptr)(void); } MaxAlign; #else typedef max_align_t MaxAlign; #endif #define INTEGER_MAX INT64_MAX #define UINTEGER_MAX UINT64_MAX #define INTEGER_FMT "%"PRId64 #define UINTEGER_FMT "%"PRIu64 typedef uint8_t U8; typedef uint16_t U16; typedef uint32_t U32; typedef uint64_t U64; typedef int8_t I8; typedef int16_t I16; typedef int32_t I32; typedef int64_t I64; /* NOTE: if you change these, make sure you change hash_tables.c */ typedef float F32; typedef double F64; #define F32_MANT_DIG FLT_MANT_DIG #define F32_DIG FLT_DIG #define F64_MANT_DIG DBL_MANT_DIG #define F64_DIG DBL_DIG #define F32_FMT "%.16f" #define F64_FMT "%.16f" typedef U32 IdentID; /* identifier ID for cgen (anonymous variables) */ typedef U32 LineNo; typedef U32 CodePos; #define join(a,b) a##b /* for keeping track of whence something came */ #ifdef TOC_DEBUG #define SOURCE_LOCATION char *src_file; int src_line; #define SOURCE_LOCATION_PARAMS char *src_file, int src_line, #define DEBUG_UNDERSCORE(x) join(x,_) #else #define SOURCE_LOCATION #define SOURCE_LOCATION_PARAMS #define DEBUG_UNDERSCORE(x) x #endif typedef struct Location { LineNo line; CodePos pos; /* position in ctx->str */ struct ErrCtx *ctx; } Location; typedef struct ErrCtx { const char *filename; char *str; /* file contents */ bool enabled; Location *instance_stack; /* stack of locations which generate the instances we're dealing with */ } ErrCtx; typedef struct Page { struct Page *next; size_t used; /* number MaxAligns used, not bytes */ MaxAlign data[]; } Page; typedef struct Allocator { Page *first; Page *last; } Allocator; typedef struct ArrBlock { void *data; size_t n; /* number of things in this block so far */ void *last; /* last one of them */ } ArrBlock; typedef struct BlockArr { size_t item_sz; int lg_block_sz; ArrBlock *blocks; } BlockArr; typedef struct HashTable { void *data; bool *occupied; U64 n; U64 cap; } HashTable; typedef struct Slice { I64 n; void *data; } Slice; typedef union Value { U8 u8; U16 u16; U32 u32; U64 u64; I8 i8; I16 i16; I32 i32; I64 i64; bool boolv; char charv; float f32; double f64; struct FnExpr *fn; void *arr; void *ptr; void *struc; union Value *tuple; Slice slice; struct Type *type; } Value; enum { IDECL_HAS_VAL = 0x01, }; typedef enum { IDECL_DECL, IDECL_EXPR } IdentDeclKind; typedef struct IdentDecl { union { struct Declaration *decl; struct Expression *expr; /* for example, this identifier is declared in an each expression */ }; struct Block *scope; /* NULL for file scope */ Value val; SOURCE_LOCATION IdentDeclKind kind; U16 flags; } IdentDecl; /* The way you search an identifier in a tree is: root.children[low part of 1st char].children[high part of 1st char] .children[low part of 2nd char]... */ #define TREE_NCHILDREN 16 typedef struct IdentTree { /* zero value is an empty trie */ uint16_t depth; unsigned char index_in_parent; /* index of this in .parent.children */ U64 id; /* 0 if there's no actual identifier here, otherwise unique positive integer associated with this identifier */ struct IdentTree *parent; struct IdentTree *children[TREE_NCHILDREN]; IdentDecl *decls; /* array of declarations of this identifier */ } IdentTree; typedef IdentTree *Identifier; typedef struct Identifiers { BlockArr trees; IdentTree *root; U64 nidents; } Identifiers; typedef enum { TOKEN_KW, TOKEN_IDENT, TOKEN_DIRECT, TOKEN_LITERAL_NUM, TOKEN_LITERAL_CHAR, TOKEN_LITERAL_STR, TOKEN_EOF } TokenKind; typedef enum { DIRECT_C, DIRECT_SIZEOF, DIRECT_ALIGNOF, DIRECT_COUNT } Directive; typedef enum { KW_SEMICOLON, KW_COLON, KW_COMMA, KW_LPAREN, KW_RPAREN, KW_LBRACE, KW_RBRACE, KW_LSQUARE, KW_RSQUARE, KW_EQ_EQ, KW_PLUS_EQ, KW_MINUS_EQ, KW_ASTERISK_EQ, KW_SLASH_EQ, KW_NE, KW_LE, KW_LT, KW_GE, KW_GT, KW_PLUS, KW_MINUS, KW_ASTERISK, KW_EXCLAMATION, KW_AMPERSAND, KW_SLASH, KW_DOTDOT, KW_DOT, KW_EQ, KW_LAST_SYMBOL = KW_EQ, /* last one entirely consisting of symbols */ KW_IF, KW_ELIF, KW_ELSE, KW_WHILE, KW_EACH, KW_RETURN, KW_FN, KW_AS, KW_NEW, KW_DEL, KW_STRUCT, KW_INT, KW_I8, KW_I16, KW_I32, KW_I64, KW_U8, KW_U16, KW_U32, KW_U64, KW_FLOAT, KW_F32, KW_F64, KW_TYPE, KW_CHAR, KW_BOOL, KW_TRUE, KW_FALSE, KW_COUNT } Keyword; typedef enum { NUM_LITERAL_INT, NUM_LITERAL_FLOAT } NumLiteralKind; typedef struct NumLiteral { NumLiteralKind kind; union { UInteger intval; Floating floatval; }; } NumLiteral; typedef struct StrLiteral { char *str; size_t len; } StrLiteral; /* NOTE: Location is typedef'd in util/err.c */ typedef struct Token { TokenKind kind; Location where; union { Keyword kw; Directive direct; Identifier ident; NumLiteral num; char chr; StrLiteral str; }; } Token; typedef struct Tokenizer { Allocator *allocr; Token *tokens; char *s; /* string being parsed */ ErrCtx *err_ctx; LineNo line; Token *token; /* token currently being processed */ Identifiers *idents; } Tokenizer; typedef enum { TYPE_VOID, TYPE_UNKNOWN, TYPE_BUILTIN, TYPE_FN, TYPE_TUPLE, TYPE_ARR, TYPE_PTR, TYPE_SLICE, TYPE_TYPE, TYPE_EXPR, /* just use this expression as the type. this kind of type doesn't exist after resolving. */ TYPE_STRUCT #define TYPE_COUNT (TYPE_STRUCT+1) } TypeKind; typedef enum { BUILTIN_I8, BUILTIN_I16, BUILTIN_I32, BUILTIN_I64, BUILTIN_U8, BUILTIN_U16, BUILTIN_U32, BUILTIN_U64, BUILTIN_F32, BUILTIN_F64, BUILTIN_CHAR, BUILTIN_BOOL } BuiltinType; /* field of a struct */ typedef struct Field { Identifier name; struct Type *type; size_t offset; /* offset during compile time */ } Field; typedef U8 Constness; #define CONSTNESS_NO ((Constness)0) #define CONSTNESS_SEMI ((Constness)1) #define CONSTNESS_YES ((Constness)2) typedef struct FnType { struct Type *types; /* dynamic array [0] = ret_type, [1:] = param_types */ Constness *constness; /* [i] = constness of param #i. iff no parameters are constant, this is NULL. don't use it as a dynamic array, because eventually it might not be. */ } FnType; enum { STRUCT_DEF_FOUND_OFFSETS = 0x01, STRUCT_DEF_CGENERATED = 0x02, }; typedef struct { Field *fields; U16 flags; size_t size; /* size of this struct during compile time */ struct { Identifier name; IdentID id; } c; } StructDef; enum { TYPE_IS_FLEXIBLE = 0x01, TYPE_IS_RESOLVED = 0x02, }; typedef U16 TypeFlags; typedef struct Type { Location where; TypeKind kind; TypeFlags flags; struct Expression *was_expr; /* if non-NULL, indicates that this type used to be an expression (TYPE_EXPR) */ union { BuiltinType builtin; FnType fn; struct Type *tuple; struct { struct Type *of; union { U64 n; /* after resolving */ struct Expression *n_expr; /* before resolving */ }; } arr; struct Type *ptr; struct Type *slice; StructDef *struc; /* it's a pointer so that multiple Types can reference the same struct definition */ struct Expression *expr; }; } Type; enum { BLOCK_IS_FN = 0x01, BLOCK_FOUND_TYPES = 0x02, }; typedef struct Block { U16 flags; Location start; Location end; struct Statement *stmts; struct Expression *ret_expr; /* the return expression of this block, e.g. {foo(); 3} => 3 NULL for no expression. */ } Block; typedef enum { EXPR_LITERAL_FLOAT, EXPR_LITERAL_INT, EXPR_LITERAL_STR, EXPR_LITERAL_BOOL, EXPR_LITERAL_CHAR, EXPR_IDENT, /* variable or constant */ EXPR_BINARY_OP, EXPR_UNARY_OP, EXPR_IF, EXPR_WHILE, EXPR_EACH, EXPR_FN, EXPR_CAST, EXPR_NEW, EXPR_CALL, EXPR_BLOCK, EXPR_TUPLE, EXPR_C, EXPR_DSIZEOF, EXPR_DALIGNOF, EXPR_SLICE, EXPR_TYPE, /* a value (it's useful to have this). right now they don't work with cgen_set_tuple (as of yet, that is unneeded) */ EXPR_VAL } ExprKind; typedef enum { UNARY_MINUS, UNARY_ADDRESS, /* &x */ UNARY_DEREF, /* *x */ UNARY_NOT, /* !x */ UNARY_DEL, /* del x */ UNARY_LEN /* x.len ; replaces BINARY_DOT len when typing */ } UnaryOp; typedef enum { BINARY_SET, /* e.g. x = y */ BINARY_ADD, BINARY_SUB, BINARY_MUL, BINARY_DIV, BINARY_SET_ADD, /* e.g. x += y */ BINARY_SET_SUB, BINARY_SET_MUL, BINARY_SET_DIV, BINARY_GT, BINARY_LT, BINARY_GE, BINARY_LE, BINARY_EQ, BINARY_NE, BINARY_AT_INDEX, /* e.g. x[i] */ BINARY_DOT } BinaryOp; typedef struct CallExpr { struct Expression *fn; union { struct Argument *args; struct Expression *arg_exprs; }; struct Instance *instance; /* NULL = ordinary function, no compile time args */ struct { IdentID id; } c; } CallExpr; typedef struct IfExpr { struct Expression *cond; /* NULL = this is an else */ struct Expression *next_elif; /* next elif/else of this statement */ struct { IdentID id; } c; Block body; } IfExpr; typedef struct WhileExpr { struct Expression *cond; struct { IdentID id; } c; Block body; } WhileExpr; enum { EACH_IS_RANGE = 0x01, EACH_ANNOTATED_TYPE = 0x02, }; typedef struct EachExpr { U16 flags; struct { IdentID id; } c; Type type; Identifier index; /* NULL = no index */ Identifier value; /* NULL = no value */ Block body; union { struct { struct Expression *from; /* can't be null */ struct Expression *to; /* can be null */ union { /* (either) can be null */ struct Expression *step; /* before typing */ Value *stepval; /* after typing */ }; } range; struct Expression *of; }; } EachExpr; typedef struct FnExpr { struct Declaration *params; /* declarations of the parameters to this function */ struct Declaration *ret_decls; /* array of decls, if this has named return values. otherwise, NULL */ Type ret_type; Block body; HashTable instances; /* for fns with constant parameters. the key is a tuple where the first element is a u64 value whose ith bit (1<