diff options
author | pommicket <pommicket@gmail.com> | 2025-09-16 20:49:49 -0400 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2025-09-16 20:52:22 -0400 |
commit | ea7b73aac55177d1d556d0c9dba04b0870d3aaf6 (patch) | |
tree | 98862ab518680573c4d1d77542bc88b5ec9ceb2a | |
parent | 62bb1ffdee060819657161e260e75e3e1df017ac (diff) |
Allow short reads from read_func
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Doxyfile | 2 | ||||
-rw-r--r-- | cpp/Doxyfile | 26 | ||||
-rw-r--r-- | cpp/examples/all_functions.cpp | 23 | ||||
-rw-r--r-- | cpp/pom.cpp | 18 | ||||
-rw-r--r-- | cpp/pom.hpp | 105 | ||||
-rw-r--r-- | examples/all_functions.c | 23 | ||||
-rw-r--r-- | pom.c | 16 | ||||
-rw-r--r-- | pom.h | 6 | ||||
-rwxr-xr-x | pre-commit.sh | 7 |
10 files changed, 143 insertions, 85 deletions
@@ -1,4 +1,4 @@ -/doc +doc Debug Release .cache @@ -7,7 +7,7 @@ OUTPUT_LANGUAGE = English OPTIMIZE_OUTPUT_FOR_C = YES MARKDOWN_SUPPORT = YES INPUT_ENCODING = UTF-8 -FILE_PATTERNS = pom.h +INPUT = pom.h RECURSIVE = NO GENERATE_HTML = YES HTML_OUTPUT = . diff --git a/cpp/Doxyfile b/cpp/Doxyfile new file mode 100644 index 0000000..03da967 --- /dev/null +++ b/cpp/Doxyfile @@ -0,0 +1,26 @@ +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = libpom++ +PROJECT_BRIEF = "Parser for the POM configuration language" +OUTPUT_DIRECTORY = doc +CREATE_SUBDIRS = NO +OUTPUT_LANGUAGE = English +MARKDOWN_SUPPORT = YES +INPUT_ENCODING = UTF-8 +INPUT = pom.hpp +RECURSIVE = NO +GENERATE_HTML = YES +HTML_OUTPUT = . +HTML_FILE_EXTENSION = .html +GENERATE_LATEX = NO +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = YES +MULTILINE_CPP_IS_BRIEF = YES +AUTOLINK_SUPPORT = NO +DISTRIBUTE_GROUP_DOC = YES +EXTRACT_STATIC = YES +COLLABORATION_GRAPH = NO +WARN_IF_UNDOCUMENTED = NO +QUIET = YES +INCLUDE_GRAPH = NO +WARN_AS_ERROR = FAIL_ON_WARNINGS +HIDE_FRIEND_COMPOUNDS = YES diff --git a/cpp/examples/all_functions.cpp b/cpp/examples/all_functions.cpp index 326ae39..5081e83 100644 --- a/cpp/examples/all_functions.cpp +++ b/cpp/examples/all_functions.cpp @@ -15,24 +15,13 @@ public: } ~FdReader() { close(m_fd); } size_t read(char *buf, size_t size) { - size_t total_read = 0; - while (true) { - // must call read in a loop to fill buf up as much as possible! - // (read isn't guaranteed to read len bytes even if it could) - ssize_t ret = ::read(m_fd, buf, size); - if (ret < 0) { - // read error - throw "read error"; - } else if (ret == 0) { - // end-of-file - break; - } else { - total_read += ret; - buf += ret; - size -= ret; - } + ssize_t ret = ::read(m_fd, buf, size); + if (ret < 0) { + // read error + throw "read error"; + } else { + return ret; } - return total_read; } private: int m_fd; diff --git a/cpp/pom.cpp b/cpp/pom.cpp index ffa70a2..058040e 100644 --- a/cpp/pom.cpp +++ b/cpp/pom.cpp @@ -64,18 +64,6 @@ Configuration Configuration::section(std::string_view name) const { return Configuration(static_cast<void *>(C_section_copy)); } -static void *allocator_calloc(void *udata, size_t n, size_t sz) { - return static_cast<Allocator *>(udata)->calloc(n, sz); -} - -static void *allocator_realloc(void *udata, void *ptr, size_t sz) { - return static_cast<Allocator *>(udata)->realloc(ptr, sz); -} - -static void allocator_free(void *udata, void *ptr) { - return static_cast<Allocator *>(udata)->free(ptr); -} - void Settings::set_error_language(std::string_view lang) { size_t len = std::min(sizeof m_error_lang - 1, lang.size()); memcpy(m_error_lang, lang.data(), len); @@ -85,12 +73,6 @@ void Settings::set_error_language(std::string_view lang) { void Settings::to_C(void *C) const { pom_settings &C_settings = *static_cast<pom_settings *>(C); strcpy(C_settings.error_lang, m_error_lang); - if (m_allocator) { - C_settings.allocator_udata = m_allocator.get(); - C_settings.calloc = allocator_calloc; - C_settings.realloc = allocator_realloc; - C_settings.free = allocator_free; - } } static size_t readable_read(void *udata, char *buf, size_t count) { diff --git a/cpp/pom.hpp b/cpp/pom.hpp index 69a979d..4e20727 100644 --- a/cpp/pom.hpp +++ b/cpp/pom.hpp @@ -1,3 +1,36 @@ +/// \file +/// POM configuration parser for C++. +/// +/// ## Thread-safety +/// +/// Of course, you should not \ref pom::Configuration::merge into +/// a configuration while another thread is using it. +/// +/// Otherwise, libpom is fully thread-safe +/// provided that C11 atomics are available +/// (`__STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__)`). +/// But beware of race conditions when using \ref pom::Configuration::unread_keys +/// — this will not lead to UB but you may get unexpected results — +/// ensure there is synchronization so that all threads +/// have certainly read their keys before calling it. +/// +/// If C11 atomics are not available, you can almost certainly still get away +/// with sharing configurations across threads, as long as you use proper +/// synchronization for \ref pom::Configuration::unread_keys. +/// (Essentially, libpom may end up writing the same value to the same address +/// from separate threads, which is *technically* undefined behaviour, but will +/// likely never be an issue on any real machine.) +/// Even if you are extremely paranoid, you can still use +/// distinct configurations in different threads without worry. +/// +/// ## Notes +/// +/// Every libpom++ function may change the value of `errno` arbitrarily +/// (its value after any libpom++ call should be ignored). + +/// \mainpage libpom++ doxygen documentation +/// +/// See \ref pom.hpp for all types/functions. #ifndef POM_HPP_ #define POM_HPP_ @@ -11,18 +44,41 @@ namespace pom { +/// A libpom++ error. class Error: public std::exception { public: ~Error(); Error(Error &other) = delete; + /// Get file where error occured. + /// + /// Returned string view lives for as long as `this`. std::string_view file() const noexcept; + /// Get line number where error occurred. uint64_t line() const noexcept; + /// Get error message. You probably want \ref to_string instead. + /// + /// This only gets a single error message from this entry in an error list, + /// and doesn't include the file name or line number. + /// + /// Returned string view lives for as long as `this`. std::string_view message() const noexcept; + /// Get next error + /// + /// Returned pointer lives for as long as `this`. inline const Error *next() const noexcept { return m_next.get(); } + /// Get description of error. + /// + /// Returned string view lives for as long as `this`. std::string_view to_string() noexcept; - /// You should only call this on the first error in an error list. - /// (This can't be enforced with constness because it needs to - /// override `std::exception::what`.) + /// Get description of error (equivalent to \ref to_string — use that instead if you can). + /// + /// You should only call this on the first error in an error list + /// (i.e. don't call it on an error gotten from \ref next); + /// otherwise you will get a valid but mostly useless string. + /// (Unlike \ref to_string, this can't be enforced with + /// constness because this method needs to override `std::exception::what`.) + /// + /// Returned pointer lives for as long as `this`. virtual const char *what() const noexcept override; private: friend class Configuration; @@ -34,36 +90,38 @@ private: bool m_is_original; std::unique_ptr<const Error> m_next; }; +/// Print error. std::ostream &operator<<(std::ostream &, Error &); -class Allocator { -public: - inline virtual ~Allocator() {}; - virtual void *calloc(size_t, size_t) = 0; - virtual void *realloc(void *, size_t) = 0; - virtual void free(void *) = 0; -}; - +/// Settings for configuration parsing. class Settings { public: - inline Settings() {}; - /// Set allocator. - inline void set_allocator(std::shared_ptr<Allocator> allocator) { - m_allocator = allocator; - } + /// Default settings. + inline Settings() {} + /// Set language for error messages. + /// + /// `lang` should be an IETF-like language tag. + /// + /// The closest supported language will be used + /// (e.g. `fr-CA` will currently redirect to `fr`). + /// + /// Currently supported: `en`, `fr`. void set_error_language(std::string_view lang); private: void check_version() const; void to_C(void *C) const; friend class Configuration; char m_error_lang[16] = {}; - std::shared_ptr<Allocator> m_allocator; // to allow for future extensions without breaking backwards compatibility const uint32_t version = 1; }; +/// Abstract base class for a file reader. +/// +/// This can be passed to \ref Configuration::Configuration(std::string_view, Reader &, const Settings *) class Reader { public: + /// Read up to `count` bytes of data into `buf`. virtual size_t read(char *buf, size_t count) = 0; }; @@ -90,7 +148,8 @@ private: friend class Configuration; std::string m_file; uint64_t m_line; - void *_reserved[4] = {}; + // to allow for future extensions without breaking backwards compatibility + const uint32_t version = 1; }; std::ostream &operator<<(std::ostream &, const Location &); @@ -99,6 +158,10 @@ public: Configuration(); Configuration &operator=(const Configuration &other); inline Configuration(const Configuration &other) { *this = other; }; + /// Load configuration from abstract \ref Reader. + /// + /// Most of the time, you will be able to use another constructor to load a configuration. + /// But if you have special functions for performing reads, you may need this. Configuration(std::string_view filename, Reader &source, const Settings *settings = nullptr); Configuration(std::string_view filename, std::istream &stream, const Settings *settings = nullptr); Configuration(std::string_view path, const Settings *settings = nullptr); @@ -118,9 +181,15 @@ public: std::optional<std::vector<std::string>> get_list(std::string_view key) const; std::vector<std::string> get_list_or_default(std::string_view key, const std::vector<std::string> &dflt) const; Configuration section(std::string_view name) const; + /// Get list of keys which haven't been the target of a `get_*` method. std::vector<std::string> unread_keys() const; std::vector<std::string> keys() const; std::vector<std::shared_ptr<Item>> items() const; + /// Merge `other` configuration into `this`. + /// + /// Puts all the key-value pairs of `other` into this configuration. + /// If `this` and `other` both have a value for a key, the one + /// in `other` is preferred. void merge(const Configuration &other); private: void load(std::string_view filename, Reader &source, const Settings *settings); diff --git a/examples/all_functions.c b/examples/all_functions.c index 413d114..ee960b3 100644 --- a/examples/all_functions.c +++ b/examples/all_functions.c @@ -10,21 +10,16 @@ static size_t custom_read(void *udata, char *buf, size_t size) { int fd = (int)(intptr_t)udata; - size_t total_read = 0; - while (true) { - // must call read in a loop to fill buf up as much as possible! - // (read isn't guaranteed to read len bytes even if it could) - ssize_t ret = read(fd, buf, size); - if (ret <= 0) { - // read error/end-of-file - break; - } else { - total_read += ret; - buf += ret; - size -= ret; - } + // only read up to 4 bytes at a time. why not! + // it's much slower, but it is allowed. + ssize_t ret = read(fd, buf, size < 4 ? size : 4); + if (ret < 0) { + // read error occured. + // we could store an error away somewhere if we wanted to + // (read errors are unusual anyways.) + ret = 0; } - return total_read; + return ret; } int main(void) { @@ -170,8 +170,6 @@ struct parser { size_t count; } items; bool - // last call to read_func returned <size - short_read, // end-of-file reached eof, // memory allocation failed @@ -446,22 +444,16 @@ static bool parser_read_to_buf(struct parser *parser, bool skip_bom) { if (parser->eof) return false; uint8_t utf8_state = parser->utf8_state; - if (parser->short_read) { // last read was short, so we're at EOF - // EOF reached. - eof: + char *buf = parser->buf; + size_t read_count = parser->read_func(parser->userdata, buf, sizeof parser->buf - 1); + parser->buf_pos = 0; + if (read_count == 0) { if (utf8_state) { parser_error(parser, ERROR_INVALID_UTF8); } parser->eof = true; return false; } - char *buf = parser->buf; - size_t read_count = parser->read_func(parser->userdata, buf, sizeof parser->buf - 1); - parser->buf_pos = 0; - if (read_count == 0) - goto eof; - if (read_count < sizeof parser->buf - 1) - parser->short_read = true; if (parser->leftover_cr && buf[0] != '\n') parser_error(parser, ERROR_ASCII_CONTROL, '\r'); size_t in = 0, out = 0; @@ -138,9 +138,9 @@ typedef struct pom_settings { /// /// `read_func` will be passed the `userdata` pointer passed to this function, /// a buffer, and the length of that buffer (which will be nonzero). -/// It must fill out the buffer as much as possible, -/// and return the number of bytes read. -/// A return value less than `len` indicates the end of the file was reached. +/// It returns the number of bytes read, or 0 if the end of the file was reached. +/// It can fill out as much or as little of the buffer as it wants — +/// a short read count is not interpreted as the end of the file. /// `read_func` will not be called excessively/with lots of tiny reads—it's /// okay to do unbuffered reads in it. /// diff --git a/pre-commit.sh b/pre-commit.sh index bde503b..bebe9d9 100755 --- a/pre-commit.sh +++ b/pre-commit.sh @@ -1,7 +1,12 @@ #!/bin/sh # Ensure no doxygen errors -which doxygen >/dev/null 2>/dev/null && { doxygen || exit 1; } +if which doxygen >/dev/null 2>/dev/null; then + doxygen || exit 1 + cd cpp + doxygen || exit 1 + cd .. +fi make -j`nproc` test || exit 1 |