summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2025-09-16 20:49:49 -0400
committerpommicket <pommicket@gmail.com>2025-09-16 20:52:22 -0400
commitea7b73aac55177d1d556d0c9dba04b0870d3aaf6 (patch)
tree98862ab518680573c4d1d77542bc88b5ec9ceb2a
parent62bb1ffdee060819657161e260e75e3e1df017ac (diff)
Allow short reads from read_func
-rw-r--r--.gitignore2
-rw-r--r--Doxyfile2
-rw-r--r--cpp/Doxyfile26
-rw-r--r--cpp/examples/all_functions.cpp23
-rw-r--r--cpp/pom.cpp18
-rw-r--r--cpp/pom.hpp105
-rw-r--r--examples/all_functions.c23
-rw-r--r--pom.c16
-rw-r--r--pom.h6
-rwxr-xr-xpre-commit.sh7
10 files changed, 143 insertions, 85 deletions
diff --git a/.gitignore b/.gitignore
index c325e0d..bc23d9f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-/doc
+doc
Debug
Release
.cache
diff --git a/Doxyfile b/Doxyfile
index eacb913..f70c18f 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -7,7 +7,7 @@ OUTPUT_LANGUAGE = English
OPTIMIZE_OUTPUT_FOR_C = YES
MARKDOWN_SUPPORT = YES
INPUT_ENCODING = UTF-8
-FILE_PATTERNS = pom.h
+INPUT = pom.h
RECURSIVE = NO
GENERATE_HTML = YES
HTML_OUTPUT = .
diff --git a/cpp/Doxyfile b/cpp/Doxyfile
new file mode 100644
index 0000000..03da967
--- /dev/null
+++ b/cpp/Doxyfile
@@ -0,0 +1,26 @@
+DOXYFILE_ENCODING = UTF-8
+PROJECT_NAME = libpom++
+PROJECT_BRIEF = "Parser for the POM configuration language"
+OUTPUT_DIRECTORY = doc
+CREATE_SUBDIRS = NO
+OUTPUT_LANGUAGE = English
+MARKDOWN_SUPPORT = YES
+INPUT_ENCODING = UTF-8
+INPUT = pom.hpp
+RECURSIVE = NO
+GENERATE_HTML = YES
+HTML_OUTPUT = .
+HTML_FILE_EXTENSION = .html
+GENERATE_LATEX = NO
+ENABLE_PREPROCESSING = YES
+MACRO_EXPANSION = YES
+MULTILINE_CPP_IS_BRIEF = YES
+AUTOLINK_SUPPORT = NO
+DISTRIBUTE_GROUP_DOC = YES
+EXTRACT_STATIC = YES
+COLLABORATION_GRAPH = NO
+WARN_IF_UNDOCUMENTED = NO
+QUIET = YES
+INCLUDE_GRAPH = NO
+WARN_AS_ERROR = FAIL_ON_WARNINGS
+HIDE_FRIEND_COMPOUNDS = YES
diff --git a/cpp/examples/all_functions.cpp b/cpp/examples/all_functions.cpp
index 326ae39..5081e83 100644
--- a/cpp/examples/all_functions.cpp
+++ b/cpp/examples/all_functions.cpp
@@ -15,24 +15,13 @@ public:
}
~FdReader() { close(m_fd); }
size_t read(char *buf, size_t size) {
- size_t total_read = 0;
- while (true) {
- // must call read in a loop to fill buf up as much as possible!
- // (read isn't guaranteed to read len bytes even if it could)
- ssize_t ret = ::read(m_fd, buf, size);
- if (ret < 0) {
- // read error
- throw "read error";
- } else if (ret == 0) {
- // end-of-file
- break;
- } else {
- total_read += ret;
- buf += ret;
- size -= ret;
- }
+ ssize_t ret = ::read(m_fd, buf, size);
+ if (ret < 0) {
+ // read error
+ throw "read error";
+ } else {
+ return ret;
}
- return total_read;
}
private:
int m_fd;
diff --git a/cpp/pom.cpp b/cpp/pom.cpp
index ffa70a2..058040e 100644
--- a/cpp/pom.cpp
+++ b/cpp/pom.cpp
@@ -64,18 +64,6 @@ Configuration Configuration::section(std::string_view name) const {
return Configuration(static_cast<void *>(C_section_copy));
}
-static void *allocator_calloc(void *udata, size_t n, size_t sz) {
- return static_cast<Allocator *>(udata)->calloc(n, sz);
-}
-
-static void *allocator_realloc(void *udata, void *ptr, size_t sz) {
- return static_cast<Allocator *>(udata)->realloc(ptr, sz);
-}
-
-static void allocator_free(void *udata, void *ptr) {
- return static_cast<Allocator *>(udata)->free(ptr);
-}
-
void Settings::set_error_language(std::string_view lang) {
size_t len = std::min(sizeof m_error_lang - 1, lang.size());
memcpy(m_error_lang, lang.data(), len);
@@ -85,12 +73,6 @@ void Settings::set_error_language(std::string_view lang) {
void Settings::to_C(void *C) const {
pom_settings &C_settings = *static_cast<pom_settings *>(C);
strcpy(C_settings.error_lang, m_error_lang);
- if (m_allocator) {
- C_settings.allocator_udata = m_allocator.get();
- C_settings.calloc = allocator_calloc;
- C_settings.realloc = allocator_realloc;
- C_settings.free = allocator_free;
- }
}
static size_t readable_read(void *udata, char *buf, size_t count) {
diff --git a/cpp/pom.hpp b/cpp/pom.hpp
index 69a979d..4e20727 100644
--- a/cpp/pom.hpp
+++ b/cpp/pom.hpp
@@ -1,3 +1,36 @@
+/// \file
+/// POM configuration parser for C++.
+///
+/// ## Thread-safety
+///
+/// Of course, you should not \ref pom::Configuration::merge into
+/// a configuration while another thread is using it.
+///
+/// Otherwise, libpom is fully thread-safe
+/// provided that C11 atomics are available
+/// (`__STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__)`).
+/// But beware of race conditions when using \ref pom::Configuration::unread_keys
+/// — this will not lead to UB but you may get unexpected results —
+/// ensure there is synchronization so that all threads
+/// have certainly read their keys before calling it.
+///
+/// If C11 atomics are not available, you can almost certainly still get away
+/// with sharing configurations across threads, as long as you use proper
+/// synchronization for \ref pom::Configuration::unread_keys.
+/// (Essentially, libpom may end up writing the same value to the same address
+/// from separate threads, which is *technically* undefined behaviour, but will
+/// likely never be an issue on any real machine.)
+/// Even if you are extremely paranoid, you can still use
+/// distinct configurations in different threads without worry.
+///
+/// ## Notes
+///
+/// Every libpom++ function may change the value of `errno` arbitrarily
+/// (its value after any libpom++ call should be ignored).
+
+/// \mainpage libpom++ doxygen documentation
+///
+/// See \ref pom.hpp for all types/functions.
#ifndef POM_HPP_
#define POM_HPP_
@@ -11,18 +44,41 @@
namespace pom {
+/// A libpom++ error.
class Error: public std::exception {
public:
~Error();
Error(Error &other) = delete;
+ /// Get file where error occured.
+ ///
+ /// Returned string view lives for as long as `this`.
std::string_view file() const noexcept;
+ /// Get line number where error occurred.
uint64_t line() const noexcept;
+ /// Get error message. You probably want \ref to_string instead.
+ ///
+ /// This only gets a single error message from this entry in an error list,
+ /// and doesn't include the file name or line number.
+ ///
+ /// Returned string view lives for as long as `this`.
std::string_view message() const noexcept;
+ /// Get next error
+ ///
+ /// Returned pointer lives for as long as `this`.
inline const Error *next() const noexcept { return m_next.get(); }
+ /// Get description of error.
+ ///
+ /// Returned string view lives for as long as `this`.
std::string_view to_string() noexcept;
- /// You should only call this on the first error in an error list.
- /// (This can't be enforced with constness because it needs to
- /// override `std::exception::what`.)
+ /// Get description of error (equivalent to \ref to_string — use that instead if you can).
+ ///
+ /// You should only call this on the first error in an error list
+ /// (i.e. don't call it on an error gotten from \ref next);
+ /// otherwise you will get a valid but mostly useless string.
+ /// (Unlike \ref to_string, this can't be enforced with
+ /// constness because this method needs to override `std::exception::what`.)
+ ///
+ /// Returned pointer lives for as long as `this`.
virtual const char *what() const noexcept override;
private:
friend class Configuration;
@@ -34,36 +90,38 @@ private:
bool m_is_original;
std::unique_ptr<const Error> m_next;
};
+/// Print error.
std::ostream &operator<<(std::ostream &, Error &);
-class Allocator {
-public:
- inline virtual ~Allocator() {};
- virtual void *calloc(size_t, size_t) = 0;
- virtual void *realloc(void *, size_t) = 0;
- virtual void free(void *) = 0;
-};
-
+/// Settings for configuration parsing.
class Settings {
public:
- inline Settings() {};
- /// Set allocator.
- inline void set_allocator(std::shared_ptr<Allocator> allocator) {
- m_allocator = allocator;
- }
+ /// Default settings.
+ inline Settings() {}
+ /// Set language for error messages.
+ ///
+ /// `lang` should be an IETF-like language tag.
+ ///
+ /// The closest supported language will be used
+ /// (e.g. `fr-CA` will currently redirect to `fr`).
+ ///
+ /// Currently supported: `en`, `fr`.
void set_error_language(std::string_view lang);
private:
void check_version() const;
void to_C(void *C) const;
friend class Configuration;
char m_error_lang[16] = {};
- std::shared_ptr<Allocator> m_allocator;
// to allow for future extensions without breaking backwards compatibility
const uint32_t version = 1;
};
+/// Abstract base class for a file reader.
+///
+/// This can be passed to \ref Configuration::Configuration(std::string_view, Reader &, const Settings *)
class Reader {
public:
+ /// Read up to `count` bytes of data into `buf`.
virtual size_t read(char *buf, size_t count) = 0;
};
@@ -90,7 +148,8 @@ private:
friend class Configuration;
std::string m_file;
uint64_t m_line;
- void *_reserved[4] = {};
+ // to allow for future extensions without breaking backwards compatibility
+ const uint32_t version = 1;
};
std::ostream &operator<<(std::ostream &, const Location &);
@@ -99,6 +158,10 @@ public:
Configuration();
Configuration &operator=(const Configuration &other);
inline Configuration(const Configuration &other) { *this = other; };
+ /// Load configuration from abstract \ref Reader.
+ ///
+ /// Most of the time, you will be able to use another constructor to load a configuration.
+ /// But if you have special functions for performing reads, you may need this.
Configuration(std::string_view filename, Reader &source, const Settings *settings = nullptr);
Configuration(std::string_view filename, std::istream &stream, const Settings *settings = nullptr);
Configuration(std::string_view path, const Settings *settings = nullptr);
@@ -118,9 +181,15 @@ public:
std::optional<std::vector<std::string>> get_list(std::string_view key) const;
std::vector<std::string> get_list_or_default(std::string_view key, const std::vector<std::string> &dflt) const;
Configuration section(std::string_view name) const;
+ /// Get list of keys which haven't been the target of a `get_*` method.
std::vector<std::string> unread_keys() const;
std::vector<std::string> keys() const;
std::vector<std::shared_ptr<Item>> items() const;
+ /// Merge `other` configuration into `this`.
+ ///
+ /// Puts all the key-value pairs of `other` into this configuration.
+ /// If `this` and `other` both have a value for a key, the one
+ /// in `other` is preferred.
void merge(const Configuration &other);
private:
void load(std::string_view filename, Reader &source, const Settings *settings);
diff --git a/examples/all_functions.c b/examples/all_functions.c
index 413d114..ee960b3 100644
--- a/examples/all_functions.c
+++ b/examples/all_functions.c
@@ -10,21 +10,16 @@
static size_t custom_read(void *udata, char *buf, size_t size) {
int fd = (int)(intptr_t)udata;
- size_t total_read = 0;
- while (true) {
- // must call read in a loop to fill buf up as much as possible!
- // (read isn't guaranteed to read len bytes even if it could)
- ssize_t ret = read(fd, buf, size);
- if (ret <= 0) {
- // read error/end-of-file
- break;
- } else {
- total_read += ret;
- buf += ret;
- size -= ret;
- }
+ // only read up to 4 bytes at a time. why not!
+ // it's much slower, but it is allowed.
+ ssize_t ret = read(fd, buf, size < 4 ? size : 4);
+ if (ret < 0) {
+ // read error occured.
+ // we could store an error away somewhere if we wanted to
+ // (read errors are unusual anyways.)
+ ret = 0;
}
- return total_read;
+ return ret;
}
int main(void) {
diff --git a/pom.c b/pom.c
index 99102da..b41b9d4 100644
--- a/pom.c
+++ b/pom.c
@@ -170,8 +170,6 @@ struct parser {
size_t count;
} items;
bool
- // last call to read_func returned <size
- short_read,
// end-of-file reached
eof,
// memory allocation failed
@@ -446,22 +444,16 @@ static bool
parser_read_to_buf(struct parser *parser, bool skip_bom) {
if (parser->eof) return false;
uint8_t utf8_state = parser->utf8_state;
- if (parser->short_read) { // last read was short, so we're at EOF
- // EOF reached.
- eof:
+ char *buf = parser->buf;
+ size_t read_count = parser->read_func(parser->userdata, buf, sizeof parser->buf - 1);
+ parser->buf_pos = 0;
+ if (read_count == 0) {
if (utf8_state) {
parser_error(parser, ERROR_INVALID_UTF8);
}
parser->eof = true;
return false;
}
- char *buf = parser->buf;
- size_t read_count = parser->read_func(parser->userdata, buf, sizeof parser->buf - 1);
- parser->buf_pos = 0;
- if (read_count == 0)
- goto eof;
- if (read_count < sizeof parser->buf - 1)
- parser->short_read = true;
if (parser->leftover_cr && buf[0] != '\n')
parser_error(parser, ERROR_ASCII_CONTROL, '\r');
size_t in = 0, out = 0;
diff --git a/pom.h b/pom.h
index 8ae9ff1..e7f8777 100644
--- a/pom.h
+++ b/pom.h
@@ -138,9 +138,9 @@ typedef struct pom_settings {
///
/// `read_func` will be passed the `userdata` pointer passed to this function,
/// a buffer, and the length of that buffer (which will be nonzero).
-/// It must fill out the buffer as much as possible,
-/// and return the number of bytes read.
-/// A return value less than `len` indicates the end of the file was reached.
+/// It returns the number of bytes read, or 0 if the end of the file was reached.
+/// It can fill out as much or as little of the buffer as it wants —
+/// a short read count is not interpreted as the end of the file.
/// `read_func` will not be called excessively/with lots of tiny reads—it's
/// okay to do unbuffered reads in it.
///
diff --git a/pre-commit.sh b/pre-commit.sh
index bde503b..bebe9d9 100755
--- a/pre-commit.sh
+++ b/pre-commit.sh
@@ -1,7 +1,12 @@
#!/bin/sh
# Ensure no doxygen errors
-which doxygen >/dev/null 2>/dev/null && { doxygen || exit 1; }
+if which doxygen >/dev/null 2>/dev/null; then
+ doxygen || exit 1
+ cd cpp
+ doxygen || exit 1
+ cd ..
+fi
make -j`nproc` test || exit 1