diff options
author | pommicket <pommicket@gmail.com> | 2025-09-17 00:57:40 -0400 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2025-09-17 00:57:45 -0400 |
commit | 72a54324836f8ca4d102a2e5ba46093f127d8cfa (patch) | |
tree | d3cebaab40f9645c42fb28ec1821dacd2652a271 | |
parent | ea1f2121389569d5e0addf8066bca9bf24174c16 (diff) |
More parsing
-rw-r--r-- | examples/conf.pom | 2 | ||||
-rw-r--r-- | examples/read_conf.py | 9 | ||||
-rw-r--r-- | mypy.ini | 1 | ||||
-rw-r--r-- | pom_parser/__init__.py | 168 | ||||
-rw-r--r-- | pylintrc.toml | 11 |
5 files changed, 176 insertions, 15 deletions
diff --git a/examples/conf.pom b/examples/conf.pom index 82e1abd..36a6dcf 100644 --- a/examples/conf.pom +++ b/examples/conf.pom @@ -1,7 +1,7 @@ indentation-type = tabs show-line-numbers = yes tab-size = 4 -font-size = "18" +font-size = 18 [file-extensions] C = .c, .h diff --git a/examples/read_conf.py b/examples/read_conf.py index e49fd4f..f8ce8b9 100644 --- a/examples/read_conf.py +++ b/examples/read_conf.py @@ -6,5 +6,10 @@ import sys sys.path.append(str(Path(__file__).parent.parent)) import pom_parser -filename = 'examples/conf.pom' if len(sys.argv) < 2 else sys.argv[1] -print(pom_parser.load_path(filename)) +try: + filename = 'examples/conf.pom' if len(sys.argv) < 2 else sys.argv[1] + conf = pom_parser.load_path(filename) + print(conf.get('indentation-type','hafkjd')) + print(conf.section('plug-in')) +except pom_parser.Error as e: + print('Parse error:', str(e), sep = '\n') @@ -1,3 +1,2 @@ [mypy] strict = true -ignore_missing_imports = True diff --git a/pom_parser/__init__.py b/pom_parser/__init__.py index 7a53e2d..7e6401c 100644 --- a/pom_parser/__init__.py +++ b/pom_parser/__init__.py @@ -1,17 +1,167 @@ import io +from typing import Optional, Any, Iterable + +class Error(ValueError): + next: Optional['Error'] + message: str + file: str + line: int + def __init__(self, file: str, line_number: int, message: str) -> None: + self.file = file + self.line_number = line_number + self.message = message + self.next = None + + def __str__(self) -> str: + err: Optional['Error'] = self + messages = [] + while err: + messages.append(f'{err.file}:{err.line_number}: {err.message}') + err = err.next + return '\n'.join(messages) + + @staticmethod + def _from_list(l: list['Error']) -> 'Error': + for (i, e) in enumerate(l[:-1]): + e.next = l[i+1] + return l[0] + +class Item: + key: str + value: str + file: str + line: int + def __repr__(self) -> str: + return f'<Item {self.key} at {self.file}:{self.line}>' class Configuration: - pass + _items: dict[str, Item] + def __repr__(self) -> str: + result = [] + for item in self._items.values(): + result.append(f'{item.key}: {repr(item.value)}') + return '\n'.join(result) + + def get(self, key: str, default: Optional[str] = None) -> Optional[str]: + item = self._items.get(key) + if item is None: + return default + return item.value + + def items(self) -> Iterable[Item]: + import copy + return map(copy.copy, self._items.values()) + + def section(self, name: str) -> 'Configuration': + import copy + section_items = {} + name_dot = name + '.' + for item in self.items(): + if item.key.startswith(name_dot): + item_copy = copy.copy(item) + section_items[item.key[len(name_dot):]] = item_copy + conf = Configuration() + conf._items = section_items + return conf + +class _Parser: + line_number: int + filename: str + current_section: str + errors: list[Error] + file: io.BufferedIOBase + items: dict[str, Item] + + def __init__(self, filename: str, file: io.BufferedIOBase): + self.errors = [] + self.filename = filename + self.file = file + self.line_number = 0 + self.current_section = '' + self.items = {} + + def _error(self, message: str) -> None: + self.errors.append(Error(self.filename, self.line_number, message)) + + def _check_key(self, key: str) -> None: + if not key: + self._error('Empty key (expected something before =)') + return + if '..' in key: + self._error(f"Key {key} shouldn't contain ..") + return + if key.startswith('.'): + self._error(f"Key {key} shouldn't start with .") + return + if key.endswith('.'): + self._error(f"Key {key} shouldn't end with .") + return + # TODO + + def _parse_quoted_value(self, value_start: str) -> str: + raise NotImplementedError('TODO: quoted value') -class Settings: - pass + def _read_line(self) -> bool: + line_bytes = self.file.readline() + if not line_bytes: + return False + self.line_number += 1 + try: + line = line_bytes.decode() + except UnicodeDecodeError: + self._error('Bad UTF-8') + return True + if line.endswith('\r\n'): + line = line[:-2] + elif line.endswith('\n'): + line = line[:-1] + for c in line: + if ord(c) < 32 and c != '\t': + self._error(f'Invalid character in file: ASCII control character {ord(c)}') + return True + line = line.lstrip(' \t') + if not line or line.startswith('#'): + return True + if line.startswith('['): + line = line.rstrip(' \t') + if not line.endswith(']'): + self._error('[ with no matching ]') + return True + self.current_section = line[1:-1] + if self.current_section: + self._check_key(self.current_section) + return True + equals_idx = line.find('=') + if equals_idx == -1: + self._error('Invalid line — should either start with [ or contain =') + return True + relative_key = line[:equals_idx].rstrip(' \t') + self._check_key(relative_key) + value = line[equals_idx+1:].lstrip(' \t') + if value.startswith('"') or value.startswith('`'): + value = self._parse_quoted_value(value) + key = f'{self.current_section}.{relative_key}' if self.current_section else relative_key + item = Item() + item.key = key + item.value = value + item.file = self.filename + item.line = self.line_number + self.items[key] = item + return True -def load_file(filename: str, file: io.IOBase, settings: Settings = Settings()) -> Configuration: - raise NotImplementedError('not implemented') +def load_file(filename: str, file: io.BufferedIOBase) -> Configuration: + parser = _Parser(filename, file) + while parser._read_line(): + pass + if parser.errors: + raise Error._from_list(parser.errors) + conf = Configuration() + conf._items = parser.items + return conf -def load_string(filename: str, string: str, settings: Settings = Settings()) -> Configuration: - return load_file(filename, io.BytesIO(string.encode()), settings) +def load_string(filename: str, string: str) -> Configuration: + return load_file(filename, io.BytesIO(string.encode())) -def load_path(path: str, settings: Settings = Settings()) -> Configuration: +def load_path(path: str) -> Configuration: with open(path, 'rb') as file: - return load_file(path, file, settings) + return load_file(path, file) diff --git a/pylintrc.toml b/pylintrc.toml index d7f2d64..ad744ac 100644 --- a/pylintrc.toml +++ b/pylintrc.toml @@ -318,7 +318,7 @@ max-module-lines = 1000 [tool.pylint.imports] # List of modules that can be imported at any level, not just the top level one. -allow-any-import-level = ["boto3", "botocore", "pydo"] +allow-any-import-level = ["copy"] # Allow explicit reexports by alias from a package __init__. # allow-reexport-from-package = @@ -374,7 +374,14 @@ confidence = ["HIGH", "CONTROL_FLOW", "INFERENCE", "INFERENCE_FAILURE", "UNDEFIN # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use "--disable=all --enable=classes # --disable=W". -disable = ["raw-checker-failed", "bad-inline-option", "locally-disabled", "file-ignored", "suppressed-message", "useless-suppression", "deprecated-pragma", "use-implicit-booleaness-not-comparison-to-string", "use-implicit-booleaness-not-comparison-to-zero", "use-symbolic-message-instead"] +disable = ["raw-checker-failed", "bad-inline-option", "locally-disabled", "file-ignored", + "suppressed-message", "useless-suppression", "deprecated-pragma", + "use-implicit-booleaness-not-comparison-to-string", "use-implicit-booleaness-not-comparison-to-zero", + "use-symbolic-message-instead", + "protected-access", # way too strict to be useful + "too-many-return-statements", # who cares? + "too-few-public-methods", # who cares? +] # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option |