summaryrefslogtreecommitdiff
path: root/pom_parser/__init__.py
blob: c179158f3a2ea42d389324c450c6ac60862a69cf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
import io
from typing import Optional, Any, Iterable, Iterator

class Error(ValueError):
	next: Optional['Error']
	message: str
	file: str
	line: int
	def __init__(self, file: str, line_number: int, message: str) -> None:
		self.file = file
		self.line_number = line_number
		self.message = message
		self.next = None

	def __str__(self) -> str:
		err: Optional['Error'] = self
		messages = []
		while err:
			messages.append(f'{err.file}:{err.line_number}: {err.message}')
			err = err.next
		return '\n'.join(messages)

	@staticmethod
	def _from_list(l: list['Error']) -> 'Error':
		for (i, e) in enumerate(l[:-1]):
			e.next = l[i+1]
		return l[0]

class Item:
	key: str
	value: str
	file: str
	line: int
	read: bool
	def __repr__(self) -> str:
		return f'<Item {self.key} at {self.file}:{self.line}>'

	def _error(self, message: str) -> Error:
		return Error(self.file, self.line, message)

	def _parse_uint(self, using: Optional[str] = None) -> Optional[int]:
		s = self.value if using is None else using
		if s.startswith('+'):
			s = s[1:]
		if s.startswith('0x') or s.startswith('0X'):
			if not all(c in '0123456789abcdefABCDEF' for c in s[2:]):
				return None
			value = int(s[2:], 16)
			if value >> 53:
				return None
			return value
		if s == '0':
			return 0
		if s.startswith('0'):
			return None
		if not all(c in '0123456789' for c in s):
			return None
		value = int(s)
		if value >> 53:
			return None
		return value

	def _parse_int(self) -> Optional[int]:
		sign = 1
		value = self.value
		if value.startswith('-'):
			if value.startswith('-+'):
				return None
			sign = -1
			value = value[1:]
		uint = self._parse_uint(value)
		if uint is None:
			return None
		return uint * sign

	def _parse_float(self) -> Optional[float]:
		value = self.value
		if not all(c in '0123456789eE+-.' for c in value):
			return None
		for (i, c) in enumerate(value):
			# ensure . is preceded and followed by digit
			if c == '.' and (i == 0 or i == len(value)-1 or \
				not value[i+1].isdigit() or not value[i-1].isdigit()):
				return None
		return float(value)

	def _parse_bool(self) -> Optional[bool]:
		value = self.value
		if value in ('yes', 'true', 'on'):
			return True
		if value in ('no', 'false', 'off'):
			return False
		return None

	def _parse_list(self) -> list[str]:
		chars = iter(self.value)
		list_ = []
		entry: list[str] = []
		while (c := next(chars, '')):
			if c == ',':
				list_.append(''.join(entry).strip(' \t'))
				entry = []
			elif c == '\\':
				c = next(chars, '')
				if c not in ',\\':
					entry.append('\\')
				entry.append(c)
			else:
				entry.append(c)
		last_entry = ''.join(entry).strip(' \t')
		if last_entry:
			list_.append(last_entry)
		return list_

class Configuration:
	_items: dict[str, Item]
	def __repr__(self) -> str:
		result = []
		for item in self._items.values():
			result.append(f'{item.key}: {repr(item.value)}')
		return '\n'.join(result)

	def has(self, key: str) -> bool:
		return key in self._items

	def location(self, key: str) -> Optional[tuple[str, int]]:
		item = self._items.get(key)
		if item is None:
			return item
		return (item.file, item.line)

	def get(self, key: str, default: Optional[str] = None) -> Optional[str]:
		item = self._items.get(key)
		if item is None:
			return default
		item.read = True
		return item.value

	def get_uint(self, key: str, default: Optional[int] = None) -> Optional[int]:
		item = self._items.get(key)
		if item is None:
			return None if default is None else int(default)
		item.read = True
		uint = item._parse_uint()
		if uint is None:
			raise item._error(f'Value {repr(item.value)} for {item.key} is not a valid (non-negative) integer.')
		return uint

	def get_int(self, key: str, default: Optional[int] = None) -> Optional[int]:
		item = self._items.get(key)
		if item is None:
			return None if default is None else int(default)
		item.read = True
		intv = item._parse_int()
		if intv is None:
			raise item._error(f'Value {repr(item.value)} for {item.key} is not a valid integer.')
		return intv

	def get_float(self, key: str, default: Optional[float] = None) -> Optional[float]:
		item = self._items.get(key)
		if item is None:
			return None if default is None else float(default)
		item.read = True
		intv = item._parse_float()
		if intv is None:
			raise item._error(f'Value {repr(item.value)} for {item.key} is not a valid number.')
		return intv

	def get_bool(self, key: str, default: Optional[bool] = None) -> Optional[bool]:
		item = self._items.get(key)
		if item is None:
			return None if default is None else bool(default)
		item.read = True
		boolv = item._parse_bool()
		if boolv is None:
			raise item._error(f'Value {repr(item.value)} for {item.key} is invalid (want on/off/yes/no/true/false)')
		return boolv

	def get_list(self, key: str, default: Optional[list[str]] = None) -> Optional[list[str]]:
		item = self._items.get(key)
		if item is None:
			return None if default is None else default
		item.read = True
		return item._parse_list()


	def items(self) -> Iterable[Item]:
		import copy
		return map(copy.copy, self._items.values())

	def keys(self) -> Iterable[str]:
		return iter({key.split('.', 1)[0] for key in self._items})

	def unread_keys(self) -> Iterable[str]:
		return (item.key for item in self._items.values() if not item.read)

	def section(self, name: str) -> 'Configuration':
		import copy
		section_items = {}
		name_dot = name + '.'
		for item in self.items():
			if item.key.startswith(name_dot):
				item_copy = copy.copy(item)
				section_items[item.key[len(name_dot):]] = item_copy
		conf = Configuration()
		conf._items = section_items
		return conf

def _parse_hex_digit(d: Optional[str]) -> Optional[int]:
	if d in list('0123456789'):
		return ord(d) - ord('0')
	if d in list('abcdef'):
		return ord(d) - ord('a') + 10
	if d in list('ABCDEF'):
		return ord(d) - ord('A') + 10
	return None

class _Parser:
	line_number: int
	filename: str
	current_section: str
	errors: list[Error]
	file: io.BufferedIOBase
	items: dict[str, Item]

	def __init__(self, filename: str, file: io.BufferedIOBase):
		self.errors = []
		self.filename = filename
		self.file = file
		self.line_number = 0
		self.current_section = ''
		self.items = {}

	def _error(self, message: str) -> None:
		self.errors.append(Error(self.filename, self.line_number, message))

	def _check_key(self, key: str) -> None:
		if not key:
			self._error('Empty key (expected something before =)')
			return
		if '..' in key:
			self._error(f"Key {key} shouldn't contain ..")
			return
		if key.startswith('.'):
			self._error(f"Key {key} shouldn't start with .")
			return
		if key.endswith('.'):
			self._error(f"Key {key} shouldn't end with .")
			return
		for c in key:
			o = ord(c)
			if (0xf800000178000001fc001bffffffffff >> o) & 1:
				self._error(f"Key {key} contains illegal character {c}")

	def _process_escape_sequence(self, chars: Iterator[str]) -> str:
		def bad_escape_sequence(chs: Iterable[Optional[str]]) -> str:
			seq = ''.join(c for c in chs if c)
			self._error(f'Invalid escape sequence: \\{seq}')
			return ''
		c = next(chars, None)
		simple_sequences: dict[str | None, str] = {
			'n': '\n', 't': '\t', 'r': '\r',
			'\'': '\'', '"': '"', '`': '`',
			',': '\\,', '\\': '\\'
		}
		simple = simple_sequences.get(c)
		if simple is not None:
			return simple
		if c == 'x':
			c1 = next(chars, None)
			c2 = next(chars, None)
			dig1 = _parse_hex_digit(c1)
			dig2 = _parse_hex_digit(c2)
			if dig1 is None or dig2 is None:
				return bad_escape_sequence((c, c1, c2))
			value = dig1 << 4 | dig2
			if value == 0 or value >= 0x80:
				return bad_escape_sequence((c, c1, c2))
			return chr(value)
		if c == 'u':
			open_brace = next(chars, None)
			if open_brace != '{':
				return bad_escape_sequence((c, open_brace))
			sequence: list[str | None] = ['u{']
			value = 0
			for i in range(7):
				c = next(chars, None)
				sequence.append(c)
				if c == '}':
					break
				if i == 6:
					return bad_escape_sequence(sequence)
				digit = _parse_hex_digit(c)
				if digit is None:
					return bad_escape_sequence(sequence)
				value <<= 4
				value |= digit
			if value == 0 or \
				0xD800 <= value <= 0xDFFF or \
				value > 0x10FFFF:
				return bad_escape_sequence(sequence)
			return chr(value)
		bad_escape_sequence((c,))
		return ''

	def _read_line(self) -> Optional[str]:
		line_bytes = self.file.readline()
		if not line_bytes:
			return None
		self.line_number += 1
		try:
			line = line_bytes.decode()
		except UnicodeDecodeError:
			self._error('Bad UTF-8')
			return ''
		if line.endswith('\r\n'):
			line = line[:-2]
		elif line.endswith('\n'):
			line = line[:-1]
		for c in line:
			if ord(c) < 32 and c != '\t':
				self._error(f'Invalid character in file: ASCII control character {ord(c)}')
				return ''
		return line

	def _parse_quoted_value(self, value_start: str) -> str:
		delimiter = value_start[0]
		start_line = self.line_number
		line = value_start[1:] + '\n'
		value = []
		while True:
			chars = iter(line)
			while (c := next(chars, None)) is not None:
				if c == '\\':
					value.append(self._process_escape_sequence(chars))
				elif c == delimiter:
					for stray in chars:
						if stray not in ' \t\n':
							self._error(f'Stray {stray} after string.')
					return ''.join(value)
				else:
					value.append(c)
			next_line = self._read_line()
			if next_line is None:
				self.line_number = start_line
				self._error(f'Closing {delimiter} not found.')
				return ''
			line = next_line + '\n'

	def _parse_line(self) -> bool:
		line = self._read_line()
		if line is None:
			return False
		line = line.lstrip(' \t')
		if not line or line.startswith('#'):
			return True
		if line.startswith('['):
			line = line.rstrip(' \t')
			if not line.endswith(']'):
				self._error('[ with no matching ]')
				return True
			self.current_section = line[1:-1]
			if self.current_section:
				self._check_key(self.current_section)
			return True
		equals_idx = line.find('=')
		if equals_idx == -1:
			self._error('Invalid line — should either start with [ or contain =')
			return True
		relative_key = line[:equals_idx].rstrip(' \t')
		self._check_key(relative_key)
		value = line[equals_idx+1:].lstrip(' \t')
		if value.startswith('"') or value.startswith('`'):
			value = self._parse_quoted_value(value)
		key = f'{self.current_section}.{relative_key}' if self.current_section else relative_key
		item = Item()
		item.key = key
		item.read = False
		item.value = value
		item.file = self.filename
		item.line = self.line_number
		self.items[key] = item
		return True

def load_file(filename: str, file: io.BufferedIOBase) -> Configuration:
	parser = _Parser(filename, file)
	while parser._parse_line():
		pass
	if parser.errors:
		raise Error._from_list(parser.errors)
	conf = Configuration()
	conf._items = parser.items
	return conf

def load_string(filename: str, string: str) -> Configuration:
	return load_file(filename, io.BytesIO(string.encode()))

def load_path(path: str) -> Configuration:
	with open(path, 'rb') as file:
		return load_file(path, file)