summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeo Tenenbaum <pommicket@gmail.com>2021-05-04 14:41:09 -0400
committerLeo Tenenbaum <pommicket@gmail.com>2021-05-04 14:41:09 -0400
commit6fb841710498e1b854204287e6a8c7b2a0bd3b5c (patch)
treee2ed6e2daee671295e0f14e916e00215b0996f3c
parent995ebf6366caaac08107f1ba175d476bbca6e7c4 (diff)
multiple data types
-rw-r--r--Makefile2
-rw-r--r--main.c195
-rw-r--r--ui.glade39
-rw-r--r--unicode.h149
4 files changed, 354 insertions, 31 deletions
diff --git a/Makefile b/Makefile
index 5c2d3e2..e77e7c1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
ALL_CFLAGS=$(CFLAGS) -Wall -Wextra -Wshadow -Wconversion -Wpedantic -pedantic -std=gnu99 \
-Wno-unused-function -Wno-unused-parameter -Wimplicit-fallthrough -Wno-format-truncation -Wno-unknown-warning-option \
- `pkg-config --libs --cflags gtk+-3.0` -rdynamic
+ `pkg-config --libs --cflags gtk+-3.0` -rdynamic -fno-strict-aliasing
DEBUG_CFLAGS=$(ALL_CFLAGS) -DDEBUG -O0 -g
RELEASE_CFLAGS=$(ALL_CFLAGS) -Ofast -g
PROFILE_CFLAGS=$(ALL_CFLAGS) -Ofast -g -DPROFILE=1
diff --git a/main.c b/main.c
index 65f0657..6052a26 100644
--- a/main.c
+++ b/main.c
@@ -7,6 +7,8 @@
#include <inttypes.h>
#include <unistd.h>
#include <assert.h>
+#include <ctype.h>
+#include <wctype.h>
typedef pid_t PID;
typedef uint64_t Address;
@@ -14,6 +16,8 @@ typedef uint64_t Address;
#define PRIdADDR PRId64
#define PRIxADDR PRIx64
+#include "unicode.h"
+
// a memory map
typedef struct {
Address lo, size;
@@ -29,6 +33,8 @@ typedef enum {
TYPE_U64,
TYPE_S64,
TYPE_ASCII,
+ TYPE_UTF16,
+ TYPE_UTF32,
TYPE_F32,
TYPE_F64
} DataType;
@@ -42,6 +48,12 @@ static DataType data_type_from_name(char const *name) {
case 32: return TYPE_U32;
case 64: return TYPE_U64;
}
+ if (strncmp(name, "utf", 3) == 0) {
+ switch (atoi(&name[3])) {
+ case 16: return TYPE_UTF16;
+ case 32: return TYPE_UTF32;
+ }
+ }
break;
case 's':
switch (atoi(&name[1])) {
@@ -66,6 +78,129 @@ static DataType data_type_from_name(char const *name) {
return TYPE_U8;
}
+static size_t data_type_size(DataType type) {
+ switch (type) {
+ case TYPE_U8:
+ case TYPE_S8:
+ case TYPE_ASCII:
+ return 1;
+ case TYPE_U16:
+ case TYPE_S16:
+ case TYPE_UTF16:
+ return 2;
+ case TYPE_U32:
+ case TYPE_S32:
+ case TYPE_F32:
+ case TYPE_UTF32:
+ return 4;
+ case TYPE_U64:
+ case TYPE_S64:
+ case TYPE_F64:
+ return 8;
+ }
+ return (size_t)-1;
+}
+
+// set str to "a" for 'a', "\\n" for '\n', "\\xff" for (wchar_t)255, etc.
+static void char_to_str(uint32_t c, char *str, size_t str_size) {
+ if (c <= WINT_MAX && iswgraph((wint_t)c)) {
+ snprintf(str, str_size, "%lc", (wint_t)c);
+ } else {
+ switch (c) {
+ case ' ': snprintf(str, str_size, "(space)"); break;
+ case '\n': snprintf(str, str_size, "\\n"); break;
+ case '\t': snprintf(str, str_size, "\\t"); break;
+ case '\r': snprintf(str, str_size, "\\r"); break;
+ case '\v': snprintf(str, str_size, "\\v"); break;
+ case '\0': snprintf(str, str_size, "\\0"); break;
+ default:
+ if (c < 256)
+ snprintf(str, str_size, "\\x%02x", (unsigned)c);
+ else
+ snprintf(str, str_size, "\\x%05lx", (unsigned long)c);
+ }
+ }
+}
+
+static bool char_from_str(char const *str, uint32_t *c) {
+ if (str[0] == '\0') return false;
+ if (str[0] == '\\') {
+ switch (str[1]) {
+ case 'n': *c = '\n'; return str[2] == '\0';
+ case 't': *c = '\t'; return str[2] == '\0';
+ case 'r': *c = '\r'; return str[2] == '\0';
+ case 'v': *c = '\v'; return str[2] == '\0';
+ case '0': *c = '\0'; return str[2] == '\0';
+ case 'x': {
+ unsigned long v = 0;
+ int w = 0;
+ if (sscanf(&str[2], "%lx%n", &v, &w) != 1 ||
+ (size_t)w != strlen(&str[2]) || v > UINT32_MAX)
+ return false;
+ *c = (uint32_t)v;
+ return true;
+ }
+ }
+ }
+ return unicode_utf8_to_utf32(c, str, strlen(str)) == strlen(str);
+}
+
+static void data_to_str(void const *value, DataType type, char *str, size_t str_size) {
+ switch (type) {
+ case TYPE_U8: snprintf(str, str_size, "%" PRIu8, *(uint8_t *)value); break;
+ case TYPE_U16: snprintf(str, str_size, "%" PRIu16, *(uint16_t *)value); break;
+ case TYPE_U32: snprintf(str, str_size, "%" PRIu32, *(uint32_t *)value); break;
+ case TYPE_U64: snprintf(str, str_size, "%" PRIu64, *(uint64_t *)value); break;
+ case TYPE_S8: snprintf(str, str_size, "%" PRId8, *(int8_t *)value); break;
+ case TYPE_S16: snprintf(str, str_size, "%" PRId16, *(int16_t *)value); break;
+ case TYPE_S32: snprintf(str, str_size, "%" PRId32, *(int32_t *)value); break;
+ case TYPE_S64: snprintf(str, str_size, "%" PRId64, *(int64_t *)value); break;
+ case TYPE_F32: snprintf(str, str_size, "%g", *(float *)value); break;
+ case TYPE_F64: snprintf(str, str_size, "%g", *(double *)value); break;
+ case TYPE_UTF16: char_to_str(*(uint16_t *)value, str, str_size); break;
+ case TYPE_UTF32: char_to_str(*(uint32_t *)value, str, str_size); break;
+ case TYPE_ASCII:
+ char_to_str((uint8_t)*(char *)value, str, str_size);
+ break;
+
+ }
+}
+
+// returns true on success, false if str is not a well-formatted value
+static bool data_from_str(char const *str, DataType type, void *value) {
+ int len = (int)strlen(str);
+ int w = 0;
+ uint32_t c = 0;
+ switch (type) {
+ case TYPE_U8: return sscanf(str, "%" SCNu8 "%n", (uint8_t *)value, &w) == 1 && w == len;
+ case TYPE_S8: return sscanf(str, "%" SCNd8 "%n", ( int8_t *)value, &w) == 1 && w == len;
+ case TYPE_U16: return sscanf(str, "%" SCNu16 "%n", (uint16_t *)value, &w) == 1 && w == len;
+ case TYPE_S16: return sscanf(str, "%" SCNd16 "%n", ( int16_t *)value, &w) == 1 && w == len;
+ case TYPE_U32: return sscanf(str, "%" SCNu32 "%n", (uint32_t *)value, &w) == 1 && w == len;
+ case TYPE_S32: return sscanf(str, "%" SCNd32 "%n", ( int32_t *)value, &w) == 1 && w == len;
+ case TYPE_U64: return sscanf(str, "%" SCNu64 "%n", (uint64_t *)value, &w) == 1 && w == len;
+ case TYPE_S64: return sscanf(str, "%" SCNd64 "%n", ( int64_t *)value, &w) == 1 && w == len;
+ case TYPE_F32: return sscanf(str, "%f%n", (float *)value, &w) == 1 && w == len;
+ case TYPE_F64: return sscanf(str, "%lf%n", (double *)value, &w) == 1 && w == len;
+ case TYPE_ASCII:
+ if (!char_from_str(str, &c)) return false;
+ if (c > 127) return false;
+ *(uint8_t *)value = (uint8_t)c;
+ return true;
+ case TYPE_UTF16:
+ if (!char_from_str(str, &c)) return false;
+ if (c > 65535) return false;
+ *(uint16_t *)value = (uint16_t)c;
+ return true;
+ case TYPE_UTF32:
+ if (!char_from_str(str, &c)) return false;
+ *(uint32_t *)value = c;
+ return true;
+ }
+ assert(0);
+ return false;
+}
+
typedef struct {
GtkWindow *window;
GtkBuilder *builder;
@@ -74,7 +209,7 @@ typedef struct {
PID pid;
Map *maps;
Address memory_view_address;
- Address memory_view_entries; // # of entries to show
+ uint32_t memory_view_entries; // # of entries to show
unsigned nmaps;
DataType data_type;
} State;
@@ -175,7 +310,7 @@ static Address memory_read_bytes(int reader, Address addr, uint8_t *memory, Addr
lseek(reader, (off_t)addr, SEEK_SET);
Address idx = 0;
while (idx < nbytes) {
- ssize_t n = read(reader, &memory[idx], nbytes - idx);
+ ssize_t n = read(reader, &memory[idx], (size_t)(nbytes - idx));
if (n <= 0) break;
idx += (Address)n;
}
@@ -191,6 +326,18 @@ static Address memory_write_byte(int writer, Address addr, uint8_t byte) {
return 0;
}
+// returns # of bytes written
+static Address memory_write_bytes(int writer, Address addr, uint8_t const *bytes, Address nbytes) {
+ lseek(writer, (off_t)addr, SEEK_SET);
+ Address idx = 0;
+ while (idx < nbytes) {
+ ssize_t n = write(writer, &bytes[idx], (size_t)(nbytes - idx));
+ if (n < 0) break;
+ idx += (Address)n;
+ }
+ return idx;
+}
+
// pass config_potentially_changed = false if there definitely hasn't been an update to the target address
// (this is used by auto-refresh so we don't have to clear and re-make the list store each time, which would screw up selection)
static void update_memory_view(State *state, bool config_potentially_changed) {
@@ -198,25 +345,27 @@ static void update_memory_view(State *state, bool config_potentially_changed) {
return;
GtkBuilder *builder = state->builder;
GtkListStore *store = GTK_LIST_STORE(gtk_builder_get_object(builder, "memory"));
- Address ndisplay = state->memory_view_entries;
+ uint32_t ndisplay = state->memory_view_entries;
if (config_potentially_changed)
gtk_list_store_clear(store);
if (ndisplay == 0)
return;
- uint8_t *mem = calloc(1, ndisplay);
+ DataType data_type = state->data_type;
+ size_t item_size = data_type_size(data_type);
+ void *mem = calloc(item_size, ndisplay);
if (mem) {
int reader = memory_reader_open(state);
if (reader) {
GtkTreeIter iter;
- ndisplay = memory_read_bytes(reader, state->memory_view_address, mem, ndisplay);
+ ndisplay = (uint32_t)memory_read_bytes(reader, state->memory_view_address, mem, ndisplay * item_size);
gtk_tree_model_get_iter_from_string(GTK_TREE_MODEL(store), &iter, "0");
- for (Address i = 0; i < ndisplay; ++i) {
- Address addr = state->memory_view_address + i;
- uint8_t value = mem[i];
+ char *value = mem;
+ Address addr = state->memory_view_address;
+ for (Address i = 0; i < ndisplay; i += 1, addr += item_size, value += item_size) {
char index_str[32], addr_str[32], value_str[32];
sprintf(index_str, "%" PRIdADDR, i);
sprintf(addr_str, "%" PRIxADDR, addr);
- sprintf(value_str, "%u", value);
+ data_to_str(value, data_type, value_str, sizeof value_str);
if (config_potentially_changed) {
gtk_list_store_insert_with_values(store, &iter, -1, 0, index_str, 1, addr_str, 2, value_str, -1);
} else {
@@ -230,7 +379,7 @@ static void update_memory_view(State *state, bool config_potentially_changed) {
}
free(mem);
} else {
- display_error(state, "Out of memory (trying to display %zu bytes of memory).", ndisplay);
+ display_error(state, "Out of memory (trying to display %" PRIu32 " bytes of memory).", ndisplay);
}
}
@@ -245,7 +394,7 @@ G_MODULE_EXPORT void update_configuration(GtkWidget *widget, gpointer user_data)
bool update_memview = false;
unsigned long n_entries = strtoul(n_entries_text, &endp, 10);
if (*n_entries_text && !*endp && n_entries != state->memory_view_entries) {
- state->memory_view_entries = n_entries;
+ state->memory_view_entries = (uint32_t)n_entries;
update_memview = true;
}
char const *address_text = gtk_entry_get_text(
@@ -326,7 +475,6 @@ G_MODULE_EXPORT void select_pid(GtkButton *button, gpointer user_data) {
if (dir == -1) {
display_error(state, "Error opening %s: %s", dirname, strerror(errno));
} else {
- printf("PID: %lld\n", pid_number);
int cmdline = openat(dir, "cmdline", O_RDONLY);
char process_name[64] = {0};
if (cmdline != -1) {
@@ -357,22 +505,22 @@ G_MODULE_EXPORT void select_pid(GtkButton *button, gpointer user_data) {
G_MODULE_EXPORT void memory_edited(GtkCellRendererText *renderer, char *path, char *new_text, gpointer user_data) {
State *state = user_data;
GtkBuilder *builder = state->builder;
+ DataType data_type = state->data_type;
+ size_t item_size = data_type_size(data_type);
Address idx = (Address)atol(path);
- Address addr = state->memory_view_address + idx;
- char *endp;
- long value = strtol(new_text, &endp, 10);
+ Address addr = state->memory_view_address + idx * item_size;
state->editing_memory = -1;
- if (*new_text && *endp == '\0' && value >= 0 && value < 256) {
- uint8_t byte = (uint8_t)value;
+ uint64_t value = 0;
+ if (data_from_str(new_text, data_type, &value)) {
int writer = memory_writer_open(state);
if (writer) {
- bool success = memory_write_byte(writer, addr, byte) == 1;
+ bool success = memory_write_bytes(writer, addr, (uint8_t const *)&value, item_size) == item_size;
memory_writer_close(state, writer);
if (success) {
GtkListStore *store = GTK_LIST_STORE(gtk_builder_get_object(builder, "memory"));
GtkTreeIter iter;
- char value_str[16];
- sprintf(value_str, "%u", byte);
+ char value_str[32];
+ data_to_str(&value, data_type, value_str, sizeof value_str);
gtk_tree_model_get_iter_from_string(GTK_TREE_MODEL(store), &iter, path);
gtk_list_store_set(store, &iter, 2, value_str, -1);
}
@@ -381,13 +529,6 @@ G_MODULE_EXPORT void memory_edited(GtkCellRendererText *renderer, char *path, ch
}
}
-G_MODULE_EXPORT void memory_row_activated(GtkTreeView *tree_view, GtkTreePath *path, GtkTreeViewColumn *column, gpointer user_data) {
- State *state = user_data;
- GtkBuilder *builder = state->builder;
- GtkToggleButton *auto_refresh = GTK_TOGGLE_BUTTON(gtk_builder_get_object(builder, "auto-refresh"));
- gtk_toggle_button_set_active(auto_refresh, 0);
-}
-
G_MODULE_EXPORT void refresh_memory(GtkWidget *widget, gpointer user_data) {
State *state = user_data;
update_memory_view(state, true);
diff --git a/ui.glade b/ui.glade
index 32c9559..94d37ae 100644
--- a/ui.glade
+++ b/ui.glade
@@ -136,7 +136,6 @@
<property name="vscroll-policy">natural</property>
<property name="model">memory</property>
<property name="search-column">0</property>
- <signal name="row-activated" handler="memory_row_activated" swapped="no"/>
<child internal-child="selection">
<object class="GtkTreeSelection"/>
</child>
@@ -443,6 +442,40 @@
</packing>
</child>
<child>
+ <object class="GtkRadioButton" id="type-utf16">
+ <property name="label" translatable="yes">UTF-16 text</property>
+ <property name="name">utf16</property>
+ <property name="visible">True</property>
+ <property name="can-focus">True</property>
+ <property name="receives-default">False</property>
+ <property name="active">True</property>
+ <property name="draw-indicator">True</property>
+ <property name="group">type-u8</property>
+ <signal name="toggled" handler="update_configuration" swapped="no"/>
+ </object>
+ <packing>
+ <property name="expand">False</property>
+ <property name="fill">True</property>
+ <property name="position">12</property>
+ </packing>
+ </child>
+ <child>
+ <object class="GtkRadioButton" id="type-utf32">
+ <property name="label" translatable="yes">UTF-32 text</property>
+ <property name="name">utf32</property>
+ <property name="visible">True</property>
+ <property name="can-focus">True</property>
+ <property name="receives-default">False</property>
+ <property name="draw-indicator">True</property>
+ <property name="group">type-u8</property>
+ </object>
+ <packing>
+ <property name="expand">False</property>
+ <property name="fill">True</property>
+ <property name="position">13</property>
+ </packing>
+ </child>
+ <child>
<object class="GtkRadioButton" id="type-f32">
<property name="label" translatable="yes">32-bit floating-point</property>
<property name="name">f32</property>
@@ -456,7 +489,7 @@
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
- <property name="position">12</property>
+ <property name="position">14</property>
</packing>
</child>
<child>
@@ -473,7 +506,7 @@
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
- <property name="position">13</property>
+ <property name="position">15</property>
</packing>
</child>
</object>
diff --git a/unicode.h b/unicode.h
new file mode 100644
index 0000000..c327006
--- /dev/null
+++ b/unicode.h
@@ -0,0 +1,149 @@
+#ifndef UNICODE_H_
+#define UNICODE_H_
+#define UNICODE_BOX_CHARACTER 0x2610
+#define UNICODE_CODE_POINTS 0x110000 // number of Unicode code points
+
+static bool unicode_is_start_of_code_point(uint8_t byte) {
+ // see https://en.wikipedia.org/wiki/UTF-8#Encoding
+ // continuation bytes are of the form 10xxxxxx
+ return (byte & 0xC0) != 0x80;
+}
+
+// A lot like mbrtoc32. Doesn't depend on the locale though, for one thing.
+// *c will be filled with the next UTF-8 code point in `str`. `bytes` refers to the maximum
+// number of bytes that can be read from `str`.
+// Returns:
+// 0 - if a NULL character was encountered
+// (size_t)-1 - on invalid UTF-8
+// (size_t)-2 - on incomplete code point (str should be longer)
+// other - the number of bytes read from `str`.
+static size_t unicode_utf8_to_utf32(uint32_t *c, char const *str, size_t bytes) {
+ if (bytes == 0) {
+ *c = 0;
+ return 0;
+ }
+ // it's easier to do things with unsigned integers
+ uint8_t const *p = (uint8_t const *)str;
+
+ uint8_t first_byte = *p;
+
+ if (first_byte & 0x80) {
+ if ((first_byte & 0xE0) == 0xC0) {
+ // two-byte code point
+ if (bytes >= 2) {
+ ++p;
+ uint32_t second_byte = *p;
+ uint32_t value = ((uint32_t)first_byte & 0x1F) << 6
+ | (second_byte & 0x3F);
+ *c = (uint32_t)value;
+ return 2;
+ } else {
+ // incomplete code point
+ *c = 0;
+ return (size_t)-2;
+ }
+ }
+ if ((first_byte & 0xF0) == 0xE0) {
+ // three-byte code point
+ if (bytes >= 3) {
+ ++p;
+ uint32_t second_byte = *p;
+ ++p;
+ uint32_t third_byte = *p;
+ uint32_t value = ((uint32_t)first_byte & 0x0F) << 12
+ | (second_byte & 0x3F) << 6
+ | (third_byte & 0x3F);
+ if (value < 0xD800 || value > 0xDFFF) {
+ *c = (uint32_t)value;
+ return 3;
+ } else {
+ // reserved for UTF-16 surrogate halves
+ *c = 0;
+ return (size_t)-1;
+ }
+ } else {
+ // incomplete
+ *c = 0;
+ return (size_t)-2;
+ }
+ }
+ if ((first_byte & 0xF8) == 0xF0) {
+ // four-byte code point
+ if (bytes >= 4) {
+ ++p;
+ uint32_t second_byte = *p;
+ ++p;
+ uint32_t third_byte = *p;
+ ++p;
+ uint32_t fourth_byte = *p;
+ uint32_t value = ((uint32_t)first_byte & 0x07) << 18
+ | (second_byte & 0x3F) << 12
+ | (third_byte & 0x3F) << 6
+ | (fourth_byte & 0x3F);
+ if (value <= 0x10FFFF) {
+ *c = (uint32_t)value;
+ return 4;
+ } else {
+ // Code points this big can't be encoded by UTF-16 and so are invalid UTF-8.
+ *c = 0;
+ return (size_t)-1;
+ }
+ } else {
+ // incomplete
+ *c = 0;
+ return (size_t)-2;
+ }
+ }
+ // invalid UTF-8
+ *c = 0;
+ return (size_t)-1;
+ } else {
+ // ASCII character
+ if (first_byte == 0) {
+ *c = 0;
+ return 0;
+ }
+ *c = first_byte;
+ return 1;
+ }
+}
+
+// A lot like c32rtomb
+// Converts a UTF-32 codepoint to a UTF-8 string. Writes at most 4 bytes to s.
+// NOTE: It is YOUR JOB to null-terminate your string if the UTF-32 isn't null-terminated!
+// Returns the number of bytes written to s, or (size_t)-1 on invalid UTF-32.
+static size_t unicode_utf32_to_utf8(char *s, uint32_t c32) {
+ uint8_t *p = (uint8_t *)s;
+ if (c32 <= 0x7F) {
+ // ASCII
+ *p = (uint8_t)c32;
+ return 1;
+ } else if (c32 <= 0x7FF) {
+ // two bytes needed
+ *p++ = (uint8_t)(0xC0 | (c32 >> 6));
+ *p = (uint8_t)(0x80 | (c32 & 0x3F));
+ return 2;
+ } else if (c32 <= 0x7FFF) {
+ if (c32 < 0xD800 || c32 > 0xDFFF) {
+ *p++ = (uint8_t)(0xE0 | ( c32 >> 12));
+ *p++ = (uint8_t)(0x80 | ((c32 >> 6) & 0x3F));
+ *p = (uint8_t)(0x80 | ( c32 & 0x3F));
+ return 3;
+ } else {
+ // UTF-16 surrogate halves
+ *p = 0;
+ return (size_t)-1;
+ }
+ } else if (c32 <= 0x10FFFF) {
+ *p++ = (uint8_t)(0xF0 | ( c32 >> 18));
+ *p++ = (uint8_t)(0x80 | ((c32 >> 12) & 0x3F));
+ *p++ = (uint8_t)(0x80 | ((c32 >> 6) & 0x3F));
+ *p = (uint8_t)(0x80 | ( c32 & 0x3F));
+ return 4;
+ } else {
+ // code point too big
+ *p = 0;
+ return (size_t)-1;
+ }
+}
+#endif // UNICODE_H_