typedef enum {
			  TOKEN_KW,
			  TOKEN_IDENT,
			  TOKEN_NUM_LITERAL,
			  TOKEN_EOF
			  /* TODO: char literals, str literals */
} TokenKind;

typedef enum {
			  KW_SEMICOLON,
			  KW_EQEQ,
			  KW_LT,
			  KW_LE,
			  KW_EQ,
			  KW_COUNT
} Keyword;

/* OPTIM: Use a trie or just a function if this gets too long */
static const char *keywords[KW_COUNT] =
	{";", "==", "<", "<=", "="}; 

#define TOKENIZER_USE_LLONG 1

#if TOKENIZER_USE_LLONG
typedef long long LiteralInt;
typedef unsigned long long LiteralUInt;
#define LITERAL_INT_FMT "%lld"
#define LITERAL_UINT_FMT "%llu"
#else
typedef long LiteralInt;
typedef unsigned long LiteralUInt;
#define LITERAL_INT_FMT "%ld"
#define LITERAL_UINT_FMT "%lu"
#endif

typedef double LiteralReal;

typedef enum {
			  NUM_LITERAL_INT,
			  NUM_LITERAL_UINT,
			  NUM_LITERAL_REAL
} NumLiteralKind;

typedef struct {
	NumLiteralKind kind;
	union {
		LiteralInt intval;
		LiteralUInt uintval;
		LiteralReal realval;
	};
} NumLiteral;

/* NOTE: LineNo is typedef'd in util/err.c */
typedef struct {
	TokenKind kind;
	LineNo line;
    LineNo col;
	union {
		Keyword kw;
		Identifier ident;
		NumLiteral num;
	};
} Token;

typedef struct {
	Token *tokens;
	size_t ntokens;
	size_t cap;	/* used internally */
	Token *token; /* token currently being processed */
} Tokenizer;

static void token_fprint(FILE *out, Token *t) {
	fprintf(out, "l%luc%lu-", (unsigned long)t->line, (unsigned long)t->col);
	switch (t->kind) {
	case TOKEN_KW:
		fprintf(out, "keyword: %s", keywords[t->kw]);
		break;
	case TOKEN_IDENT:
		fprintf(out, "identifier: %ld:", t->ident->id);
		ident_fprint(out, t->ident);
		break;
	case TOKEN_NUM_LITERAL:
		fprintf(out, "number: ");
		switch (t->num.kind) {
		case NUM_LITERAL_INT:
			fprintf(out, LITERAL_INT_FMT, t->num.intval);
			break;
		case NUM_LITERAL_UINT:
			fprintf(out, LITERAL_UINT_FMT, t->num.uintval);
			break;
		case NUM_LITERAL_REAL:
			fprintf(out, "%f", t->num.realval);
			break;
		}
		break;
	case TOKEN_EOF:
		fprintf(out, "eof");
		break;
	}
}

static void tokenizer_add(Tokenizer *t, Token *token, LineNo line, LineNo col) {
	if (t->ntokens >= t->cap) {
		t->cap *= 2;
		t->tokens = err_realloc(t->tokens, t->cap);
	}
	token->line = line;
	token->col = col;
	t->tokens[t->ntokens++] = *token;
}

static Tokenizer tokenize_string(char *s) {	/* NOTE: May modify string. Don't even try to pass it a literal.*/
	int has_err = 0;
	Tokenizer t;
	t.cap = 4096; /* TODO: test more tokens than this */
	t.ntokens = 0;
	t.tokens = malloc(t.cap * sizeof(*t.tokens));

	LineNo line = 1;
	LineNo col = 1;
	
	while (1) {
	    if (*s == 0) break;
		if (isspace(*s)) {
			if (*s == '\n') {
				line++;
				col = 0;
			}
			s++; col++;
	    	continue;
		}

		if (*s == '/') {
			/* maybe it's a comment */
			int is_comment = 1;
			s++; col++;
			switch (*s) {
			case '/': /* single line comment */
				for (s++; *s != '\n' && *s; s++);
				line++;
				col = 1;
				break;
			case '*': { /* multi line comment */
				int comment_level = 1; /* allow nested multi-line comments */
			    while (*s) {
					if (*s == '\n') {
						line++;
						col = 1;
						s++;
						continue;
					}
					if (s[0] == '*' && s[1] == '/') {
						s += 2; col += 2;
						comment_level--;
						if (comment_level == 0) {
							break;
						}
					} else if (s[0] == '/' && s[1] == '*') {
						s += 2; col += 2;
						comment_level++;
					} else {
						s++; col++;
					}
				}
				if (*s == 0) {
					err_print(line, col, "End of file reached inside multi-line comment.");
					abort(); /* there won't be any further errors, of course */
				}
			} break;
			default:
				is_comment = 0;
				s--; /* go back */
				break;
			}
			if (is_comment) continue;
		}
		Keyword kw;
		for (kw = 0; kw < KW_COUNT; kw++) {
			if (strncmp(s, keywords[kw], strlen(keywords[kw])) == 0) {
				break;
			}
		}
		if (kw != KW_COUNT) {
			/* it's a keyword */
			Token token;
			token.kind = TOKEN_KW;
			token.kw = kw;
			tokenizer_add(&t, &token, line, col);
			col += (LineNo)strlen(keywords[kw]);
			s += (LineNo)strlen(keywords[kw]);
			continue;
		}

		if (isdigit(*s)) {
			/* it's a numerical constant */
			int base = 10;
			LiteralInt intval = 0;
			LineNo line_start = line, col_start = col;
			if (*s == '0') {
				s++; col++;
				/* octal/hexadecimal/binary (or zero) */
				char format = *s;
				if (isdigit(format)) /* octal */
					base = 8;
				else {
					switch (format) {
					case 'b':
						base = 2;
						s++; col++;
						break;
					case 'x':
						base = 16;
						s++; col++;
						break;
					default:
						/* it's 0/0.something etc.  */
						break;
					}
				}
			}
			while (1) {
				if (*s == '.') {
					/* TODO */
				} else if (*s == 'e') {
					/* TODO */
				}
				int digit = -1;
				if (base == 16) {
					if (*s >= 'a' && *s <= 'f')
						digit = 10 + *s - 'a';
					else if (*s >= 'A' && *s <= 'F')
						digit = *s - 'A';
				}
				if (digit == -1) {
					if (*s >= '0' && *s <= '9')
						digit = *s - '0';
				}
				if (digit < 0 || digit >= base) {
					/* end of numerical literal */
					break;
				}
				/* TODO: check overflow; switch to uint */
				intval *= base;
				intval += digit;
				s++; col++;
			}
			Token token;
			token.kind = TOKEN_NUM_LITERAL;
			token.num.kind = NUM_LITERAL_INT;
			token.num.intval = intval;
			tokenizer_add(&t, &token, line_start, col_start);
			continue;
		}
		
		if (isident(*s)) {
			/* it's an identifier */
			Identifier ident = ident_insert(&s);
			Token token;
			token.kind = TOKEN_IDENT;
			token.ident = ident;
			tokenizer_add(&t, &token, line, col);			
			continue;
		}

		int has_newline;
		char *end_of_line = strchr(s, '\n');
		has_newline = end_of_line != NULL;
		if (has_newline)
			*end_of_line = 0;
		
		err_print(line, col, "Unrecognized token:\n\there --> %s\n", s);
		has_err = 1;
		if (has_newline) {
			/* increment line counter because of it */
		    line++;
			col = 1;
		} else {
			col += (LineNo)strlen(s);
		}
		s += strlen(s);
	}
	/* TODO: Check ferror/errno */
	if (has_err) {
		fprintf(stderr, "Errors occured while preprocessing.\n");
		abort();
	}
	t.token = t.tokens;
	return t;
}

static void tokenizer_free(Tokenizer *t) {
	free(t->tokens);
}