diff options
author | Leo Tenenbaum <pommicket@gmail.com> | 2021-05-04 14:41:09 -0400 |
---|---|---|
committer | Leo Tenenbaum <pommicket@gmail.com> | 2021-05-04 14:41:09 -0400 |
commit | 6fb841710498e1b854204287e6a8c7b2a0bd3b5c (patch) | |
tree | e2ed6e2daee671295e0f14e916e00215b0996f3c | |
parent | 995ebf6366caaac08107f1ba175d476bbca6e7c4 (diff) |
multiple data types
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | main.c | 195 | ||||
-rw-r--r-- | ui.glade | 39 | ||||
-rw-r--r-- | unicode.h | 149 |
4 files changed, 354 insertions, 31 deletions
@@ -1,6 +1,6 @@ ALL_CFLAGS=$(CFLAGS) -Wall -Wextra -Wshadow -Wconversion -Wpedantic -pedantic -std=gnu99 \ -Wno-unused-function -Wno-unused-parameter -Wimplicit-fallthrough -Wno-format-truncation -Wno-unknown-warning-option \ - `pkg-config --libs --cflags gtk+-3.0` -rdynamic + `pkg-config --libs --cflags gtk+-3.0` -rdynamic -fno-strict-aliasing DEBUG_CFLAGS=$(ALL_CFLAGS) -DDEBUG -O0 -g RELEASE_CFLAGS=$(ALL_CFLAGS) -Ofast -g PROFILE_CFLAGS=$(ALL_CFLAGS) -Ofast -g -DPROFILE=1 @@ -7,6 +7,8 @@ #include <inttypes.h> #include <unistd.h> #include <assert.h> +#include <ctype.h> +#include <wctype.h> typedef pid_t PID; typedef uint64_t Address; @@ -14,6 +16,8 @@ typedef uint64_t Address; #define PRIdADDR PRId64 #define PRIxADDR PRIx64 +#include "unicode.h" + // a memory map typedef struct { Address lo, size; @@ -29,6 +33,8 @@ typedef enum { TYPE_U64, TYPE_S64, TYPE_ASCII, + TYPE_UTF16, + TYPE_UTF32, TYPE_F32, TYPE_F64 } DataType; @@ -42,6 +48,12 @@ static DataType data_type_from_name(char const *name) { case 32: return TYPE_U32; case 64: return TYPE_U64; } + if (strncmp(name, "utf", 3) == 0) { + switch (atoi(&name[3])) { + case 16: return TYPE_UTF16; + case 32: return TYPE_UTF32; + } + } break; case 's': switch (atoi(&name[1])) { @@ -66,6 +78,129 @@ static DataType data_type_from_name(char const *name) { return TYPE_U8; } +static size_t data_type_size(DataType type) { + switch (type) { + case TYPE_U8: + case TYPE_S8: + case TYPE_ASCII: + return 1; + case TYPE_U16: + case TYPE_S16: + case TYPE_UTF16: + return 2; + case TYPE_U32: + case TYPE_S32: + case TYPE_F32: + case TYPE_UTF32: + return 4; + case TYPE_U64: + case TYPE_S64: + case TYPE_F64: + return 8; + } + return (size_t)-1; +} + +// set str to "a" for 'a', "\\n" for '\n', "\\xff" for (wchar_t)255, etc. +static void char_to_str(uint32_t c, char *str, size_t str_size) { + if (c <= WINT_MAX && iswgraph((wint_t)c)) { + snprintf(str, str_size, "%lc", (wint_t)c); + } else { + switch (c) { + case ' ': snprintf(str, str_size, "(space)"); break; + case '\n': snprintf(str, str_size, "\\n"); break; + case '\t': snprintf(str, str_size, "\\t"); break; + case '\r': snprintf(str, str_size, "\\r"); break; + case '\v': snprintf(str, str_size, "\\v"); break; + case '\0': snprintf(str, str_size, "\\0"); break; + default: + if (c < 256) + snprintf(str, str_size, "\\x%02x", (unsigned)c); + else + snprintf(str, str_size, "\\x%05lx", (unsigned long)c); + } + } +} + +static bool char_from_str(char const *str, uint32_t *c) { + if (str[0] == '\0') return false; + if (str[0] == '\\') { + switch (str[1]) { + case 'n': *c = '\n'; return str[2] == '\0'; + case 't': *c = '\t'; return str[2] == '\0'; + case 'r': *c = '\r'; return str[2] == '\0'; + case 'v': *c = '\v'; return str[2] == '\0'; + case '0': *c = '\0'; return str[2] == '\0'; + case 'x': { + unsigned long v = 0; + int w = 0; + if (sscanf(&str[2], "%lx%n", &v, &w) != 1 || + (size_t)w != strlen(&str[2]) || v > UINT32_MAX) + return false; + *c = (uint32_t)v; + return true; + } + } + } + return unicode_utf8_to_utf32(c, str, strlen(str)) == strlen(str); +} + +static void data_to_str(void const *value, DataType type, char *str, size_t str_size) { + switch (type) { + case TYPE_U8: snprintf(str, str_size, "%" PRIu8, *(uint8_t *)value); break; + case TYPE_U16: snprintf(str, str_size, "%" PRIu16, *(uint16_t *)value); break; + case TYPE_U32: snprintf(str, str_size, "%" PRIu32, *(uint32_t *)value); break; + case TYPE_U64: snprintf(str, str_size, "%" PRIu64, *(uint64_t *)value); break; + case TYPE_S8: snprintf(str, str_size, "%" PRId8, *(int8_t *)value); break; + case TYPE_S16: snprintf(str, str_size, "%" PRId16, *(int16_t *)value); break; + case TYPE_S32: snprintf(str, str_size, "%" PRId32, *(int32_t *)value); break; + case TYPE_S64: snprintf(str, str_size, "%" PRId64, *(int64_t *)value); break; + case TYPE_F32: snprintf(str, str_size, "%g", *(float *)value); break; + case TYPE_F64: snprintf(str, str_size, "%g", *(double *)value); break; + case TYPE_UTF16: char_to_str(*(uint16_t *)value, str, str_size); break; + case TYPE_UTF32: char_to_str(*(uint32_t *)value, str, str_size); break; + case TYPE_ASCII: + char_to_str((uint8_t)*(char *)value, str, str_size); + break; + + } +} + +// returns true on success, false if str is not a well-formatted value +static bool data_from_str(char const *str, DataType type, void *value) { + int len = (int)strlen(str); + int w = 0; + uint32_t c = 0; + switch (type) { + case TYPE_U8: return sscanf(str, "%" SCNu8 "%n", (uint8_t *)value, &w) == 1 && w == len; + case TYPE_S8: return sscanf(str, "%" SCNd8 "%n", ( int8_t *)value, &w) == 1 && w == len; + case TYPE_U16: return sscanf(str, "%" SCNu16 "%n", (uint16_t *)value, &w) == 1 && w == len; + case TYPE_S16: return sscanf(str, "%" SCNd16 "%n", ( int16_t *)value, &w) == 1 && w == len; + case TYPE_U32: return sscanf(str, "%" SCNu32 "%n", (uint32_t *)value, &w) == 1 && w == len; + case TYPE_S32: return sscanf(str, "%" SCNd32 "%n", ( int32_t *)value, &w) == 1 && w == len; + case TYPE_U64: return sscanf(str, "%" SCNu64 "%n", (uint64_t *)value, &w) == 1 && w == len; + case TYPE_S64: return sscanf(str, "%" SCNd64 "%n", ( int64_t *)value, &w) == 1 && w == len; + case TYPE_F32: return sscanf(str, "%f%n", (float *)value, &w) == 1 && w == len; + case TYPE_F64: return sscanf(str, "%lf%n", (double *)value, &w) == 1 && w == len; + case TYPE_ASCII: + if (!char_from_str(str, &c)) return false; + if (c > 127) return false; + *(uint8_t *)value = (uint8_t)c; + return true; + case TYPE_UTF16: + if (!char_from_str(str, &c)) return false; + if (c > 65535) return false; + *(uint16_t *)value = (uint16_t)c; + return true; + case TYPE_UTF32: + if (!char_from_str(str, &c)) return false; + *(uint32_t *)value = c; + return true; + } + assert(0); + return false; +} + typedef struct { GtkWindow *window; GtkBuilder *builder; @@ -74,7 +209,7 @@ typedef struct { PID pid; Map *maps; Address memory_view_address; - Address memory_view_entries; // # of entries to show + uint32_t memory_view_entries; // # of entries to show unsigned nmaps; DataType data_type; } State; @@ -175,7 +310,7 @@ static Address memory_read_bytes(int reader, Address addr, uint8_t *memory, Addr lseek(reader, (off_t)addr, SEEK_SET); Address idx = 0; while (idx < nbytes) { - ssize_t n = read(reader, &memory[idx], nbytes - idx); + ssize_t n = read(reader, &memory[idx], (size_t)(nbytes - idx)); if (n <= 0) break; idx += (Address)n; } @@ -191,6 +326,18 @@ static Address memory_write_byte(int writer, Address addr, uint8_t byte) { return 0; } +// returns # of bytes written +static Address memory_write_bytes(int writer, Address addr, uint8_t const *bytes, Address nbytes) { + lseek(writer, (off_t)addr, SEEK_SET); + Address idx = 0; + while (idx < nbytes) { + ssize_t n = write(writer, &bytes[idx], (size_t)(nbytes - idx)); + if (n < 0) break; + idx += (Address)n; + } + return idx; +} + // pass config_potentially_changed = false if there definitely hasn't been an update to the target address // (this is used by auto-refresh so we don't have to clear and re-make the list store each time, which would screw up selection) static void update_memory_view(State *state, bool config_potentially_changed) { @@ -198,25 +345,27 @@ static void update_memory_view(State *state, bool config_potentially_changed) { return; GtkBuilder *builder = state->builder; GtkListStore *store = GTK_LIST_STORE(gtk_builder_get_object(builder, "memory")); - Address ndisplay = state->memory_view_entries; + uint32_t ndisplay = state->memory_view_entries; if (config_potentially_changed) gtk_list_store_clear(store); if (ndisplay == 0) return; - uint8_t *mem = calloc(1, ndisplay); + DataType data_type = state->data_type; + size_t item_size = data_type_size(data_type); + void *mem = calloc(item_size, ndisplay); if (mem) { int reader = memory_reader_open(state); if (reader) { GtkTreeIter iter; - ndisplay = memory_read_bytes(reader, state->memory_view_address, mem, ndisplay); + ndisplay = (uint32_t)memory_read_bytes(reader, state->memory_view_address, mem, ndisplay * item_size); gtk_tree_model_get_iter_from_string(GTK_TREE_MODEL(store), &iter, "0"); - for (Address i = 0; i < ndisplay; ++i) { - Address addr = state->memory_view_address + i; - uint8_t value = mem[i]; + char *value = mem; + Address addr = state->memory_view_address; + for (Address i = 0; i < ndisplay; i += 1, addr += item_size, value += item_size) { char index_str[32], addr_str[32], value_str[32]; sprintf(index_str, "%" PRIdADDR, i); sprintf(addr_str, "%" PRIxADDR, addr); - sprintf(value_str, "%u", value); + data_to_str(value, data_type, value_str, sizeof value_str); if (config_potentially_changed) { gtk_list_store_insert_with_values(store, &iter, -1, 0, index_str, 1, addr_str, 2, value_str, -1); } else { @@ -230,7 +379,7 @@ static void update_memory_view(State *state, bool config_potentially_changed) { } free(mem); } else { - display_error(state, "Out of memory (trying to display %zu bytes of memory).", ndisplay); + display_error(state, "Out of memory (trying to display %" PRIu32 " bytes of memory).", ndisplay); } } @@ -245,7 +394,7 @@ G_MODULE_EXPORT void update_configuration(GtkWidget *widget, gpointer user_data) bool update_memview = false; unsigned long n_entries = strtoul(n_entries_text, &endp, 10); if (*n_entries_text && !*endp && n_entries != state->memory_view_entries) { - state->memory_view_entries = n_entries; + state->memory_view_entries = (uint32_t)n_entries; update_memview = true; } char const *address_text = gtk_entry_get_text( @@ -326,7 +475,6 @@ G_MODULE_EXPORT void select_pid(GtkButton *button, gpointer user_data) { if (dir == -1) { display_error(state, "Error opening %s: %s", dirname, strerror(errno)); } else { - printf("PID: %lld\n", pid_number); int cmdline = openat(dir, "cmdline", O_RDONLY); char process_name[64] = {0}; if (cmdline != -1) { @@ -357,22 +505,22 @@ G_MODULE_EXPORT void select_pid(GtkButton *button, gpointer user_data) { G_MODULE_EXPORT void memory_edited(GtkCellRendererText *renderer, char *path, char *new_text, gpointer user_data) { State *state = user_data; GtkBuilder *builder = state->builder; + DataType data_type = state->data_type; + size_t item_size = data_type_size(data_type); Address idx = (Address)atol(path); - Address addr = state->memory_view_address + idx; - char *endp; - long value = strtol(new_text, &endp, 10); + Address addr = state->memory_view_address + idx * item_size; state->editing_memory = -1; - if (*new_text && *endp == '\0' && value >= 0 && value < 256) { - uint8_t byte = (uint8_t)value; + uint64_t value = 0; + if (data_from_str(new_text, data_type, &value)) { int writer = memory_writer_open(state); if (writer) { - bool success = memory_write_byte(writer, addr, byte) == 1; + bool success = memory_write_bytes(writer, addr, (uint8_t const *)&value, item_size) == item_size; memory_writer_close(state, writer); if (success) { GtkListStore *store = GTK_LIST_STORE(gtk_builder_get_object(builder, "memory")); GtkTreeIter iter; - char value_str[16]; - sprintf(value_str, "%u", byte); + char value_str[32]; + data_to_str(&value, data_type, value_str, sizeof value_str); gtk_tree_model_get_iter_from_string(GTK_TREE_MODEL(store), &iter, path); gtk_list_store_set(store, &iter, 2, value_str, -1); } @@ -381,13 +529,6 @@ G_MODULE_EXPORT void memory_edited(GtkCellRendererText *renderer, char *path, ch } } -G_MODULE_EXPORT void memory_row_activated(GtkTreeView *tree_view, GtkTreePath *path, GtkTreeViewColumn *column, gpointer user_data) { - State *state = user_data; - GtkBuilder *builder = state->builder; - GtkToggleButton *auto_refresh = GTK_TOGGLE_BUTTON(gtk_builder_get_object(builder, "auto-refresh")); - gtk_toggle_button_set_active(auto_refresh, 0); -} - G_MODULE_EXPORT void refresh_memory(GtkWidget *widget, gpointer user_data) { State *state = user_data; update_memory_view(state, true); @@ -136,7 +136,6 @@ <property name="vscroll-policy">natural</property> <property name="model">memory</property> <property name="search-column">0</property> - <signal name="row-activated" handler="memory_row_activated" swapped="no"/> <child internal-child="selection"> <object class="GtkTreeSelection"/> </child> @@ -443,6 +442,40 @@ </packing> </child> <child> + <object class="GtkRadioButton" id="type-utf16"> + <property name="label" translatable="yes">UTF-16 text</property> + <property name="name">utf16</property> + <property name="visible">True</property> + <property name="can-focus">True</property> + <property name="receives-default">False</property> + <property name="active">True</property> + <property name="draw-indicator">True</property> + <property name="group">type-u8</property> + <signal name="toggled" handler="update_configuration" swapped="no"/> + </object> + <packing> + <property name="expand">False</property> + <property name="fill">True</property> + <property name="position">12</property> + </packing> + </child> + <child> + <object class="GtkRadioButton" id="type-utf32"> + <property name="label" translatable="yes">UTF-32 text</property> + <property name="name">utf32</property> + <property name="visible">True</property> + <property name="can-focus">True</property> + <property name="receives-default">False</property> + <property name="draw-indicator">True</property> + <property name="group">type-u8</property> + </object> + <packing> + <property name="expand">False</property> + <property name="fill">True</property> + <property name="position">13</property> + </packing> + </child> + <child> <object class="GtkRadioButton" id="type-f32"> <property name="label" translatable="yes">32-bit floating-point</property> <property name="name">f32</property> @@ -456,7 +489,7 @@ <packing> <property name="expand">False</property> <property name="fill">True</property> - <property name="position">12</property> + <property name="position">14</property> </packing> </child> <child> @@ -473,7 +506,7 @@ <packing> <property name="expand">False</property> <property name="fill">True</property> - <property name="position">13</property> + <property name="position">15</property> </packing> </child> </object> diff --git a/unicode.h b/unicode.h new file mode 100644 index 0000000..c327006 --- /dev/null +++ b/unicode.h @@ -0,0 +1,149 @@ +#ifndef UNICODE_H_ +#define UNICODE_H_ +#define UNICODE_BOX_CHARACTER 0x2610 +#define UNICODE_CODE_POINTS 0x110000 // number of Unicode code points + +static bool unicode_is_start_of_code_point(uint8_t byte) { + // see https://en.wikipedia.org/wiki/UTF-8#Encoding + // continuation bytes are of the form 10xxxxxx + return (byte & 0xC0) != 0x80; +} + +// A lot like mbrtoc32. Doesn't depend on the locale though, for one thing. +// *c will be filled with the next UTF-8 code point in `str`. `bytes` refers to the maximum +// number of bytes that can be read from `str`. +// Returns: +// 0 - if a NULL character was encountered +// (size_t)-1 - on invalid UTF-8 +// (size_t)-2 - on incomplete code point (str should be longer) +// other - the number of bytes read from `str`. +static size_t unicode_utf8_to_utf32(uint32_t *c, char const *str, size_t bytes) { + if (bytes == 0) { + *c = 0; + return 0; + } + // it's easier to do things with unsigned integers + uint8_t const *p = (uint8_t const *)str; + + uint8_t first_byte = *p; + + if (first_byte & 0x80) { + if ((first_byte & 0xE0) == 0xC0) { + // two-byte code point + if (bytes >= 2) { + ++p; + uint32_t second_byte = *p; + uint32_t value = ((uint32_t)first_byte & 0x1F) << 6 + | (second_byte & 0x3F); + *c = (uint32_t)value; + return 2; + } else { + // incomplete code point + *c = 0; + return (size_t)-2; + } + } + if ((first_byte & 0xF0) == 0xE0) { + // three-byte code point + if (bytes >= 3) { + ++p; + uint32_t second_byte = *p; + ++p; + uint32_t third_byte = *p; + uint32_t value = ((uint32_t)first_byte & 0x0F) << 12 + | (second_byte & 0x3F) << 6 + | (third_byte & 0x3F); + if (value < 0xD800 || value > 0xDFFF) { + *c = (uint32_t)value; + return 3; + } else { + // reserved for UTF-16 surrogate halves + *c = 0; + return (size_t)-1; + } + } else { + // incomplete + *c = 0; + return (size_t)-2; + } + } + if ((first_byte & 0xF8) == 0xF0) { + // four-byte code point + if (bytes >= 4) { + ++p; + uint32_t second_byte = *p; + ++p; + uint32_t third_byte = *p; + ++p; + uint32_t fourth_byte = *p; + uint32_t value = ((uint32_t)first_byte & 0x07) << 18 + | (second_byte & 0x3F) << 12 + | (third_byte & 0x3F) << 6 + | (fourth_byte & 0x3F); + if (value <= 0x10FFFF) { + *c = (uint32_t)value; + return 4; + } else { + // Code points this big can't be encoded by UTF-16 and so are invalid UTF-8. + *c = 0; + return (size_t)-1; + } + } else { + // incomplete + *c = 0; + return (size_t)-2; + } + } + // invalid UTF-8 + *c = 0; + return (size_t)-1; + } else { + // ASCII character + if (first_byte == 0) { + *c = 0; + return 0; + } + *c = first_byte; + return 1; + } +} + +// A lot like c32rtomb +// Converts a UTF-32 codepoint to a UTF-8 string. Writes at most 4 bytes to s. +// NOTE: It is YOUR JOB to null-terminate your string if the UTF-32 isn't null-terminated! +// Returns the number of bytes written to s, or (size_t)-1 on invalid UTF-32. +static size_t unicode_utf32_to_utf8(char *s, uint32_t c32) { + uint8_t *p = (uint8_t *)s; + if (c32 <= 0x7F) { + // ASCII + *p = (uint8_t)c32; + return 1; + } else if (c32 <= 0x7FF) { + // two bytes needed + *p++ = (uint8_t)(0xC0 | (c32 >> 6)); + *p = (uint8_t)(0x80 | (c32 & 0x3F)); + return 2; + } else if (c32 <= 0x7FFF) { + if (c32 < 0xD800 || c32 > 0xDFFF) { + *p++ = (uint8_t)(0xE0 | ( c32 >> 12)); + *p++ = (uint8_t)(0x80 | ((c32 >> 6) & 0x3F)); + *p = (uint8_t)(0x80 | ( c32 & 0x3F)); + return 3; + } else { + // UTF-16 surrogate halves + *p = 0; + return (size_t)-1; + } + } else if (c32 <= 0x10FFFF) { + *p++ = (uint8_t)(0xF0 | ( c32 >> 18)); + *p++ = (uint8_t)(0x80 | ((c32 >> 12) & 0x3F)); + *p++ = (uint8_t)(0x80 | ((c32 >> 6) & 0x3F)); + *p = (uint8_t)(0x80 | ( c32 & 0x3F)); + return 4; + } else { + // code point too big + *p = 0; + return (size_t)-1; + } +} +#endif // UNICODE_H_ |