From ca34590d9968d36feb1061b9e275d0f85f09863a Mon Sep 17 00:00:00 2001 From: Leo Tenenbaum Date: Sun, 1 Sep 2019 17:54:33 -0400 Subject: trying to get unicode idents to work --- base_cgen.c | 4 +++- identifiers.c | 35 ++++++++++++++++++++++++++++++++--- out.c | 6 +++--- out.h | 4 ++-- test.toc | 5 +++-- util/err.c | 1 - 6 files changed, 43 insertions(+), 12 deletions(-) diff --git a/base_cgen.c b/base_cgen.c index 125146b..b13012c 100644 --- a/base_cgen.c +++ b/base_cgen.c @@ -99,7 +99,9 @@ static bool cgen_ident(CGenerator *g, Identifier i, Location *where) { } } cgen_indent(g); - fprint_ident(cgen_writing_to(g), i); + fprint_ident(stdout, i); + printf("\n"); + fprint_ident_ascii(cgen_writing_to(g), i); return true; } diff --git a/identifiers.c b/identifiers.c index b7a2b3c..93c7eb6 100644 --- a/identifiers.c +++ b/identifiers.c @@ -72,6 +72,17 @@ static void idents_create(Identifiers *ids) { ids->root = ident_new(ids, NULL, 0); /* create root tree */ } +#if CHAR_MIN < 0 +#define ident_char_to_uchar(c) ((c) < 0 ? (256 + (c)) : (c)) +#else +#define ident_char_to_uchar(c) (c) +#endif + +#if CHAR_MIN < 0 +#define ident_uchar_to_char(c) ((c) > 127 ? ((c) - 256) : (c)) +#else +#define ident_uchar_to_char(c) (c) +#endif /* moves s to the char after the identifier */ /* inserts if does not exist. reads until non-ident char is found. */ @@ -82,10 +93,11 @@ static Identifier ident_insert(Identifiers *ids, char **s) { if (!isident(**s)) { return tree; } - int c = (**s) - CHAR_MIN; + int c = ident_char_to_uchar(**s); assert(c >= 0 && c <= 255); unsigned char c_low = (unsigned char)(c & 0xf); unsigned char c_high = (unsigned char)(c >> 4); + printf("inserting %d as %d = %d, %d\n", **s, c, c_low, c_high); if (!tree->children[c_low]) { tree->children[c_low] = ident_new(ids, tree, c_low); } @@ -106,10 +118,27 @@ static void fprint_ident(FILE *out, Identifier id) { fprint_ident(out, id->parent->parent); /* to go up one character, we need to go to the grandparent */ int c_low = id->parent->index_in_parent; int c_high = id->index_in_parent; - int c = c_low + (c_high << 4) + CHAR_MIN; + int c = ident_uchar_to_char(c_low + (c_high << 4)); fputc(c, out); } +static void fprint_ident_ascii(FILE *out, Identifier id) { + assert(id); + if (id->parent == NULL) return; /* at root */ + fprint_ident(out, id->parent->parent); /* to go up one character, we need to go to the grandparent */ + int c_low = id->parent->index_in_parent; + int c_high = id->index_in_parent; + int c = c_low + (c_high << 4); + printf("Got %d as %d, %d\n", c, c_low, c_high); + if (c > 127) { + puts("x thing"); + fprintf(out, "x__%x",c); + } else { + printf("single char %d\n",c); + fputc(ident_uchar_to_char(c), out); + } +} + /* NULL = no such identifier */ static Identifier ident_get(Identifiers *ids, const char *s) { IdentTree *tree = ids->root; @@ -137,7 +166,7 @@ static char *ident_to_str(Identifier i) { str--; unsigned char c_high = i->index_in_parent; unsigned char c_low = i->parent->index_in_parent; - char c = (char)(CHAR_MIN + (int)(c_low + (c_high << 4))); + char c = (char)ident_uchar_to_char((int)c_low + ((int)c_high << 4)); *str = c; i = i->parent->parent; /* go to grandparent (prev char) */ } diff --git a/out.c b/out.c index d363960..49ba6b7 100644 --- a/out.c +++ b/out.c @@ -1,10 +1,10 @@ #include "out.h" /* toc */ -void foo(void) { +void Äx__a8(void) { } -void main__(void) { - foo(); +void main(void) { + Äx__a8(); } int main(void) { diff --git a/out.h b/out.h index 8d1ba2b..23bf38f 100644 --- a/out.h +++ b/out.h @@ -1,4 +1,4 @@ #include #include -void foo(void); -void main__(void); +void Äx__a8(void); +void main(void); diff --git a/test.toc b/test.toc index 10e160d..4e46515 100644 --- a/test.toc +++ b/test.toc @@ -1,5 +1,6 @@ -foo @= fn() { +Ĩ @= fn() { }; main @= fn() { - foo(); + Ĩ(); + }; diff --git a/util/err.c b/util/err.c index 1a05e9d..72e94be 100644 --- a/util/err.c +++ b/util/err.c @@ -52,7 +52,6 @@ static void warn_print_header_(LineNo line) { } static void err_print_footer_(const char *context) { - err_fprint("\n\there --> "); const char *end = strchr(context, '\n'); int has_newline = end != NULL; if (!has_newline) -- cgit v1.2.3