From ed07fe85ec0b85d8d0ade65caac0f04b999890b4 Mon Sep 17 00:00:00 2001 From: pommicket Date: Tue, 17 Oct 2023 13:27:30 -0400 Subject: set things up for UTF-8 PCRE2 --- CMakeLists.txt | 4 ++-- Makefile | 18 ++++++++++-------- find.c | 22 +++++++++++----------- make.bat | 7 ++++--- pcre-inc.h | 2 +- tags.c | 12 ++++++------ 6 files changed, 34 insertions(+), 31 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 238b113..74816ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,10 +27,10 @@ if(MSVC) set(CMAKE_C_FLAGS_DEBUG "/WX /Zi /Ob0 /Od /RTC1") set(SDL2_LIB_DIR ${CMAKE_SOURCE_DIR}/SDL2/lib/x64) target_link_libraries(ted ${SDL2_LIB_DIR}/SDL2.lib) - target_link_libraries(ted ${CMAKE_SOURCE_DIR}/pcre2-32-static.lib) + target_link_libraries(ted ${CMAKE_SOURCE_DIR}/pcre2-32-static.lib ${CMAKE_SOURCE_DIR}/pcre2-8-static.lib) else() # NOTE: -gdwarf-4 is needed for valgrind to work set(CMAKE_C_FLAGS "-Wall -Wextra -Wshadow -Wconversion -Wpedantic -pedantic -std=gnu11 -gdwarf-4 -Wno-unused-function -Wno-fixed-enum-extension -Wimplicit-fallthrough -Wno-format-truncation -Wno-unknown-warning-option") target_link_libraries(ted m SDL2) - target_link_libraries(ted ${CMAKE_SOURCE_DIR}/libpcre2-32.a) + target_link_libraries(ted ${CMAKE_SOURCE_DIR}/libpcre2-32.a ${CMAKE_SOURCE_DIR}/libpcre2-8.a) endif() diff --git a/Makefile b/Makefile index 651384d..50861cd 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ ALL_CFLAGS=$(CFLAGS) -Wall -Wextra -Wshadow -Wconversion -Wpedantic -pedantic -std=gnu11 \ -Wno-unused-function -Wno-fixed-enum-extension -Wimplicit-fallthrough -Wno-format-truncation -Wno-unknown-warning-option \ -Ipcre2 -LIBS=-lSDL2 -lGL -lm libpcre2-32.a +LIBS=-lSDL2 -lGL -lm libpcre2-32.a libpcre2-8.a RELEASE_CFLAGS=$(ALL_CFLAGS) -O3 PROFILE_CFLAGS=$(ALL_CFLAGS) -O3 -g -DPROFILE=1 # if you change the directories below, ted won't work. @@ -17,15 +17,15 @@ ted: debug/ted compile_commands.json: debug/ted rm -f compile_commands.json cp debug/compile_commands.json . -debug/ted: *.[ch] libpcre2-32.a CMakeLists.txt +debug/ted: *.[ch] pcre-lib CMakeLists.txt mkdir -p debug cd debug && cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DCMAKE_BUILD_TYPE=Debug -GNinja .. ninja -C debug -release: *.[ch] libpcre2-32.a +release: *.[ch] pcre-lib $(CC) main.c -o ted $(RELEASE_CFLAGS) $(LIBS) -release_debug: *.[ch] libpcre2-32.a +release_debug: *.[ch] pcre-lib $(CC) main.c -g -o ted $(RELEASE_CFLAGS) $(LIBS) -profile: *.[ch] libpcre2-32.a +profile: *.[ch] pcre-lib $(CC) main.c -o ted $(PROFILE_CFLAGS) $(LIBS) clean: rm -f ted *.o *.a @@ -39,9 +39,11 @@ install: release cp -r themes $(GLOBAL_DATA_DIR) install -m 644 ted.cfg $(GLOBAL_DATA_DIR) install ted $(INSTALL_BIN_DIR) -libpcre2-32.a: pcre2 - cd pcre2 && cmake -DPCRE2_BUILD_PCRE2_32=ON . && $(MAKE) -j8 - cp pcre2/libpcre2-32.a ./ +pcre-lib: + @if [ '(' '!' -f libpcre2-32.a ')' -o '(' '!' -f libpcre2-8.a ')' ]; then \ + cd pcre2 && cmake -DPCRE2_BUILD_PCRE2_32=ON . && $(MAKE) -j8 && \ + cp libpcre2-32.a libpcre2-8.a ../ ; \ + fi keywords.h: keywords.py python3 keywords.py ted.deb: release diff --git a/find.c b/find.c index b5fee08..992f508 100644 --- a/find.c +++ b/find.c @@ -31,7 +31,7 @@ TextBuffer *find_search_buffer(Ted *ted) { static void ted_error_from_pcre2_error(Ted *ted, int err) { char32_t buf[256] = {0}; - size_t len = (size_t)pcre2_get_error_message(err, buf, arr_count(buf) - 1); + size_t len = (size_t)pcre2_get_error_message_32(err, buf, arr_count(buf) - 1); char *error_cstr = str32_to_utf8_cstr(str32(buf, len)); if (error_cstr) { ted_error(ted, "Search error: %s.", error_cstr); @@ -43,11 +43,11 @@ static bool find_compile_pattern(Ted *ted) { TextBuffer *find_buffer = ted->find_buffer; String32 term = buffer_get_line(find_buffer, 0); if (term.len) { - pcre2_match_data *match_data = pcre2_match_data_create(FIND_MAX_GROUPS, NULL); + pcre2_match_data_32 *match_data = pcre2_match_data_create_32(FIND_MAX_GROUPS, NULL); if (match_data) { int error = 0; PCRE2_SIZE error_pos = 0; - pcre2_code *code = pcre2_compile(term.str, term.len, find_compilation_flags(ted), &error, &error_pos, NULL); + pcre2_code_32 *code = pcre2_compile_32(term.str, term.len, find_compilation_flags(ted), &error, &error_pos, NULL); if (code) { ted->find_code = code; ted->find_match_data = match_data; @@ -56,7 +56,7 @@ static bool find_compile_pattern(Ted *ted) { } else { ted->find_invalid_pattern = true; } - pcre2_match_data_free(match_data); + pcre2_match_data_free_32(match_data); } else { ted_error(ted, "Out of memory."); } @@ -68,11 +68,11 @@ static bool find_compile_pattern(Ted *ted) { static void find_free_pattern(Ted *ted) { if (ted->find_code) { - pcre2_code_free(ted->find_code); + pcre2_code_free_32(ted->find_code); ted->find_code = NULL; } if (ted->find_match_data) { - pcre2_match_data_free(ted->find_match_data); + pcre2_match_data_free_32(ted->find_match_data); ted->find_match_data = NULL; } arr_clear(ted->find_results); @@ -97,19 +97,19 @@ static WarnUnusedResult bool find_match(Ted *ted, BufferPos *pos, u32 *match_sta TextBuffer *buffer = find_search_buffer(ted); if (!buffer) return false; String32 str = buffer_get_line(buffer, pos->line); - PCRE2_SIZE *groups = pcre2_get_ovector_pointer(ted->find_match_data); + PCRE2_SIZE *groups = pcre2_get_ovector_pointer_32(ted->find_match_data); u32 match_flags = PCRE2_NOTEMPTY; int ret; if (direction == +1) - ret = pcre2_match(ted->find_code, str.str, str.len, pos->index, match_flags, ted->find_match_data, NULL); + ret = pcre2_match_32(ted->find_code, str.str, str.len, pos->index, match_flags, ted->find_match_data, NULL); else { // unfortunately PCRE does not have a backwards option, so we need to do the search multiple times u32 last_pos = 0; ret = -1; while (1) { - int next_ret = pcre2_match(ted->find_code, str.str, pos->index, last_pos, match_flags, ted->find_match_data, NULL); + int next_ret = pcre2_match_32(ted->find_code, str.str, pos->index, last_pos, match_flags, ted->find_match_data, NULL); if (next_ret > 0) { ret = next_ret; last_pos = (u32)groups[1]; @@ -243,14 +243,14 @@ static bool find_replace_match(Ted *ted, u32 match_idx) { char32_t *str = line.str + match.start.index; u32 len = match.end.index - match.start.index; - int ret = pcre2_substitute(ted->find_code, str, len, 0, + int ret = pcre2_substitute_32(ted->find_code, str, len, 0, PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|flags, ted->find_match_data, NULL, replacement.str, replacement.len, NULL, &output_size); char32_t *output_buffer = output_size ? calloc(output_size, sizeof *output_buffer) : NULL; if (output_buffer || !output_size) { - ret = pcre2_substitute(ted->find_code, str, len, 0, + ret = pcre2_substitute_32(ted->find_code, str, len, 0, flags, ted->find_match_data, NULL, replacement.str, replacement.len, output_buffer, &output_size); if (ret > 0) { diff --git a/make.bat b/make.bat index 30de8a4..16dc9db 100644 --- a/make.bat +++ b/make.bat @@ -3,14 +3,15 @@ if _%VCVARS% == _ ( set VCVARS=1 call vcvarsall x64 ) -if not exist pcre2-32-static.lib ( +if not exist pcre2-8-static.lib ( pushd pcre2 cmake -D PCRE2_BUILD_PCRE2_8=OFF -D PCRE2_BUILD_TESTS=OFF -D PCRE2_BUILD_PCRE2_32=ON -D CMAKE_BUILD_TYPE=Release -D CMAKE_GENERATOR_PLATFORM=x64 -D PCRE2_STATIC=ON . cmake --build . --config Release popd - copy pcre2\Release\pcre2-32-static.lib + copy /y pcre2\Release\pcre2-32-static.lib + copy /y pcre2\Release\pcre2-8-static.lib ) -SET C_FLAGS=/nologo /W4 /MD /wd4200 /wd4204 /wd4221 /wd4706 /wd4214 /D_CRT_SECURE_NO_WARNINGS /I SDL2/include /I pcre2 User32.lib SDL2/lib/x64/SDL2main.lib SDL2/lib/x64/SDL2.lib pcre2-32-static.lib +SET C_FLAGS=/nologo /W4 /MD /wd4200 /wd4204 /wd4221 /wd4706 /wd4214 /D_CRT_SECURE_NO_WARNINGS /I SDL2/include /I pcre2 User32.lib SDL2/lib/x64/SDL2main.lib SDL2/lib/x64/SDL2.lib pcre2-8-static.lib pcre2-32-static.lib rc /nologo ted.rc if _%1 == _ ( if not exist debug mkdir debug diff --git a/pcre-inc.h b/pcre-inc.h index 3d8f3b9..6c36f4a 100644 --- a/pcre-inc.h +++ b/pcre-inc.h @@ -5,7 +5,7 @@ #define PCRE_INC_H_ #define PCRE2_STATIC -#define PCRE2_CODE_UNIT_WIDTH 32 +#define PCRE2_CODE_UNIT_WIDTH 0 #include #endif // PCRE_INC_H_ diff --git a/tags.c b/tags.c index 7d8086b..68a3f2c 100644 --- a/tags.c +++ b/tags.c @@ -324,18 +324,18 @@ top:; if (end_anchored) options |= PCRE2_ENDANCHORED; int error_code; PCRE2_SIZE error_offset; - pcre2_code *code = pcre2_compile(pattern32.str, pattern32.len, + pcre2_code_32 *code = pcre2_compile_32(pattern32.str, pattern32.len, options, &error_code, &error_offset, NULL); if (code) { - pcre2_match_data *match_data = pcre2_match_data_create(10, NULL); + pcre2_match_data_32 *match_data = pcre2_match_data_create_32(10, NULL); if (match_data) { for (u32 line_idx = 0, line_count = buffer_line_count(buffer); line_idx < line_count; ++line_idx) { String32 line = buffer_get_line(buffer, line_idx); - int n = pcre2_match(code, line.str, line.len, 0, PCRE2_NOTEMPTY, + int n = pcre2_match_32(code, line.str, line.len, 0, PCRE2_NOTEMPTY, match_data, NULL); if (n == 1) { // found it! - PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(match_data); PCRE2_SIZE index = ovector[0]; BufferPos pos = {line_idx, (u32)index}; buffer_cursor_move_to_pos(buffer, pos); @@ -344,9 +344,9 @@ top:; break; } } - pcre2_match_data_free(match_data); + pcre2_match_data_free_32(match_data); } - pcre2_code_free(code); + pcre2_code_free_32(code); } str32_free(&pattern32); free(pattern); -- cgit v1.2.3