Edit on GitHub

parsetypes

This package provides tools for parsing serialised data to recover their original underlying types.

The TypeParser class provides configurable type inference and parsing. This can be initialised with different settings to, for example:

treat inf as either a float or a normal string
give exact Decimal values instead of floats
detect inline lists

View Source

 1"""
 2	This package provides tools for parsing serialised data to recover their original underlying types.
 3
 4	The `TypeParser` class provides configurable type inference and parsing. This can be initialised with different settings to, for example:
 5	- treat `inf` as either a float or a normal string
 6	- give exact Decimal values instead of floats
 7	- detect inline lists
 8"""
 9
10
11__version__ = "0.3.3"
12
13from ._common import AnyScalar, AnyScalarType, AnyValue, AnyValueType, GenericValue, Nullable
14from ._parser import TypeParser
15from ._reduce_types import reduce_types
16
17__all__ = ('TypeParser', 'reduce_types', 'Nullable')

class TypeParser: View Source

  60class TypeParser:
  61	"""
  62		A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.
  63
  64		The behaviour of the parser and the type inference can be configured either in the constructor or using mutable properties of a parser instance. See the constructor documentation for the list of available options.
  65	"""
  66
  67	def __init__(self,
  68		*,
  69		trim: bool=True,
  70		use_decimal: bool=False,
  71		list_delimiter: Optional[str]=None,
  72		none_values: Iterable[str]=[""],
  73		none_case_sensitive: bool=False,
  74		true_values: Iterable[str]=["true"],
  75		false_values: Iterable[str]=["false"],
  76		bool_case_sensitive: bool=False,
  77		int_case_sensitive: bool=False,
  78		inf_values: Iterable[str]=[],
  79		nan_values: Iterable[str]=[],
  80		float_case_sensitive: bool=False,
  81		case_sensitive: Optional[bool]=None,
  82	):
  83		"""
  84			Initialise a new parser
  85
  86			The behaviour of the parser and the type inference can be configured either in the constructor or using mutable properties of a parser instance. For example,
  87
  88			```python
  89			parser = TypeParser(list_delimiter=",")
  90			assert parser.list_delimiter == ","
  91			parser.list_delimiter = ";"
  92			assert parser.list_delimiter == ";"
  93			```
  94
  95			Keyword arguments
  96			-----------------
  97			`trim`
  98			: whether leading and trailing whitespace should be stripped from strings
  99
 100			`use_decimal`
 101			: whether non-integer numeric values should be inferred to be Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer the type (`infer()` `infer_series()`, `infer_table()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
 102
 103			`list_delimiter`
 104			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead. Note that this setting is unaffected by <code><var>parser</var>.trim</code> and <code><var>parser</var>.case_sensitive</code>, and will always be used verbatim.
 105
 106			`none_values`
 107			: list of strings that represent the value `None`
 108
 109			`none_case_sensitive`
 110			: whether matches against `none_values` should be made in a case-sensitive manner
 111
 112			`true_values`
 113			: list of strings that represent the bool value `True`
 114
 115			`false_values`
 116			: list of strings that represent the bool value `False`
 117
 118			`bool_case_sensitive`
 119			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
 120
 121			`int_case_sensitive`
 122			: whether checks for int should be done in a case-sensitive manner. This only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
 123
 124			`inf_values`
 125			: list of strings that represent the float or Decimal value of infinity. Each of the strings can also be prepended with a negative sign to represent negative infinity.
 126
 127			`nan_values`
 128			: list of strings that represent a float or Decimal that is NaN (not a number)
 129
 130			`float_case_sensitive`
 131			: whether checks for float or Decimal should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
 132
 133			`case_sensitive`
 134			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, discarding any individual settings.
 135
 136			Raises
 137			------
 138			`ValueError` if any of the options would lead to ambiguities during parsing
 139		"""
 140
 141		self._trim: bool = False
 142		self._use_decimal: bool = False
 143		self._list_delimiter: Union[str, None] = None
 144		self._match_none_values: set[str] = set()
 145		self._original_none_values: set[str] = set()
 146		self._none_case_sensitive: bool = False
 147		self._match_true_values: set[str] = set()
 148		self._original_true_values: set[str] = set()
 149		self._match_false_values: set[str] = set()
 150		self._original_false_values: set[str] = set()
 151		self._bool_case_sensitive: bool = False
 152		self._int_case_sensitive: bool = False
 153		self._match_inf_values: set[str] = set()
 154		self._original_inf_values: set[str] = set()
 155		self._match_nan_values: set[str] = set()
 156		self._original_nan_values: set[str] = set()
 157		self._float_case_sensitive: bool = False
 158
 159		# Unconfigurable default values
 160		self._negative_char = "-"
 161		self._negative_chars = {self._negative_char, "−"}
 162		self._sign_chars = self._negative_chars | {"+"}
 163		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
 164		self._digit_separators = {"_"}
 165		self._scientific_char = "e"
 166		self._float_separator = "."
 167		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
 168		# special_chars = self._reserved_chars | self._list_delimiter
 169
 170		# Configured values
 171
 172		self.trim = trim
 173		self.use_decimal = use_decimal
 174		self.list_delimiter = list_delimiter
 175
 176		self.none_case_sensitive = none_case_sensitive
 177		self.bool_case_sensitive = bool_case_sensitive
 178		self.int_case_sensitive = int_case_sensitive
 179		self.float_case_sensitive = float_case_sensitive
 180		self.case_sensitive = case_sensitive
 181
 182		self.none_values = none_values
 183
 184		self.true_values = true_values
 185		self.false_values = false_values
 186
 187		self.inf_values = inf_values
 188		self.nan_values = nan_values
 189
 190		# Check if any special values conflict
 191		for name, special_values in [
 192			(_SpecialValue.LIST, [self._list_delimiter] if self._list_delimiter is not None else []),
 193			(_SpecialValue.NONE, self._match_none_values),
 194			(_SpecialValue.TRUE, self._match_true_values),
 195			(_SpecialValue.FALSE, self._match_false_values),
 196			(_SpecialValue.INF, self._match_inf_values),
 197			(_SpecialValue.NAN, self._match_nan_values),
 198		]:
 199			for special_value in special_values:
 200				self._validate_special(name, special_value)
 201
 202
 203	def _validate_special(self, name: _SpecialValue, value: str):
 204		if value in self._reserved_chars:
 205			raise ValueError(f"cannot use reserved char as {name.value}: {value}")
 206
 207		if name != _SpecialValue.NONE and self.is_none(value):
 208			raise ValueError(f"cannot use None value as {name.value}: {value}")
 209
 210		if (
 211			(name == _SpecialValue.TRUE and self.parse_bool(value) != True) or
 212			(name == _SpecialValue.FALSE and self.parse_bool(value) != False) or
 213			(name != _SpecialValue.TRUE and name != _SpecialValue.FALSE and self.is_bool(value))
 214		):
 215			raise ValueError(f"cannot use bool value as {name.value}: {value}")
 216
 217		if self.is_int(value):
 218			raise ValueError(f"cannot use int value as {name.value}: {value}")
 219
 220		if self._use_decimal:
 221			if (
 222				(name == _SpecialValue.INF and self.parse_decimal(value) != Decimal(math.inf)) or
 223				(name == _SpecialValue.NAN and not self.parse_decimal(value).is_nan()) or
 224				(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(value))
 225			):
 226				raise ValueError(f"cannot use Decimal value as {name}: {value}")
 227		else:
 228			if (
 229				(name == _SpecialValue.INF and self.parse_float(value) != math.inf) or
 230				(name == _SpecialValue.NAN and self.parse_float(value) is not math.nan) or
 231				(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(value))
 232			):
 233				raise ValueError(f"cannot use float value as {name}: {value}")
 234
 235
 236	@property
 237	def trim(self) -> bool:
 238		return self._trim
 239
 240	@trim.setter
 241	def trim(self, value: bool):
 242		if type(value) != bool:
 243			raise TypeError(f"trim must be a bool: {value}")
 244		if value != self._trim:
 245			self._trim = value
 246			self.none_values = self._original_none_values
 247			self.true_values = self._original_true_values
 248			self.false_values = self._original_false_values
 249			self.inf_values = self._original_inf_values
 250			self.nan_values = self._original_nan_values
 251
 252
 253	@property
 254	def use_decimal(self) -> bool:
 255		return self._use_decimal
 256
 257	@use_decimal.setter
 258	def use_decimal(self, value: bool):
 259		if type(value) != bool:
 260			raise TypeError(f"use_decimal must be a bool: {value}")
 261		self._use_decimal = value
 262
 263
 264	@property
 265	def list_delimiter(self) -> Union[str, None]:
 266		return self._list_delimiter
 267
 268	@list_delimiter.setter
 269	def list_delimiter(self, value: Union[str, None]):
 270		if value is not None and type(value) != str:
 271			raise TypeError(f"list_delimiter must be a str or None: {value}")
 272		if value is not None:
 273			self._validate_special(_SpecialValue.LIST, value)
 274		self._list_delimiter = value
 275
 276
 277	@property
 278	def none_values(self) -> set[str]:
 279		if self._trim:
 280			return {value.strip() for value in self._original_none_values}
 281		else:
 282			return self._original_none_values
 283
 284	@none_values.setter
 285	def none_values(self, values: Iterable[str]):
 286		if not isinstance(values, Iterable):
 287			raise TypeError(f"none_values must be an Iterable: {values}")
 288		for i, value in enumerate(values):
 289			if type(value) != str:
 290				raise TypeError(f"each item in none_values must be a str: {value} at index {i}")
 291		self._original_none_values = set(values)
 292		if self._trim:
 293			values = (value.strip() for value in values)
 294		if not self._none_case_sensitive:
 295			values = (value.lower() for value in values)
 296		self._match_none_values = set(values)
 297
 298
 299	@property
 300	def none_case_sensitive(self) -> bool:
 301		return self._none_case_sensitive
 302
 303	@none_case_sensitive.setter
 304	def none_case_sensitive(self, value: bool):
 305		if type(value) != bool:
 306			raise TypeError(f"none_case_sensitive must be a bool: {value}")
 307		if value != self._none_case_sensitive:
 308			self._none_case_sensitive = value
 309			self.none_values = self._original_none_values
 310
 311
 312	@property
 313	def true_values(self) -> set[str]:
 314		if self._trim:
 315			return {value.strip() for value in self._original_true_values}
 316		else:
 317			return self._original_true_values
 318
 319	@true_values.setter
 320	def true_values(self, values: Iterable[str]):
 321		if not isinstance(values, Iterable):
 322			raise TypeError(f"true_values must be an Iterable: {values}")
 323		for i, value in enumerate(values):
 324			if type(value) != str:
 325				raise TypeError(f"each item in true_values must be a str: {value} at index {i}")
 326		self._original_true_values = set(values)
 327		if self._trim:
 328			values = (value.strip() for value in values)
 329		if not self._bool_case_sensitive:
 330			values = (value.lower() for value in values)
 331		self._match_true_values = set(values)
 332
 333
 334	@property
 335	def false_values(self) -> set[str]:
 336		if self._trim:
 337			return {value.strip() for value in self._original_false_values}
 338		else:
 339			return self._original_false_values
 340
 341	@false_values.setter
 342	def false_values(self, values: Iterable[str]):
 343		if not isinstance(values, Iterable):
 344			raise TypeError(f"false_values must be an Iterable: {values}")
 345		for i, value in enumerate(values):
 346			if type(value) != str:
 347				raise TypeError(f"each item in false_values must be a str: {value} at index {i}")
 348		self._original_false_values = set(values)
 349		if self._trim:
 350			values = (value.strip() for value in values)
 351		if not self._bool_case_sensitive:
 352			values = (value.lower() for value in values)
 353		self._match_false_values = set(values)
 354
 355
 356	@property
 357	def bool_case_sensitive(self) -> bool:
 358		return self._bool_case_sensitive
 359
 360	@bool_case_sensitive.setter
 361	def bool_case_sensitive(self, value: bool):
 362		if type(value) != bool:
 363			raise TypeError(f"bool_case_sensitive must be a bool: {value}")
 364		if value != self._bool_case_sensitive:
 365			self._bool_case_sensitive = value
 366			self.true_values = self._original_true_values
 367			self.false_values = self._original_false_values
 368
 369
 370	@property
 371	def int_case_sensitive(self) -> bool:
 372		return self._int_case_sensitive
 373
 374	@int_case_sensitive.setter
 375	def int_case_sensitive(self, value: bool):
 376		if type(value) != bool:
 377			raise TypeError(f"int_case_sensitive must be a bool: {value}")
 378		self._int_case_sensitive = value
 379
 380
 381	@property
 382	def inf_values(self) -> set[str]:
 383		if self._trim:
 384			return {value.strip() for value in self._original_inf_values}
 385		else:
 386			return self._original_inf_values
 387
 388	@inf_values.setter
 389	def inf_values(self, values: Iterable[str]):
 390		if not isinstance(values, Iterable):
 391			raise TypeError(f"inf_values must be an Iterable: {values}")
 392		for i, value in enumerate(values):
 393			if type(value) != str:
 394				raise TypeError(f"each item in inf_values must be a str: {value} at index {i}")
 395		self._original_inf_values = set(values)
 396		if self._trim:
 397			values = (value.strip() for value in values)
 398		if not self._float_case_sensitive:
 399			values = (value.lower() for value in values)
 400		self._match_inf_values = set(values)
 401
 402
 403	@property
 404	def nan_values(self) -> set[str]:
 405		values = self._original_nan_values
 406		if self._trim:
 407			return {value.strip() for value in self._original_nan_values}
 408		else:
 409			return self._original_nan_values
 410
 411	@nan_values.setter
 412	def nan_values(self, values: Iterable[str]):
 413		if not isinstance(values, Iterable):
 414			raise TypeError(f"nan_values must be an Iterable: {values}")
 415		for i, value in enumerate(values):
 416			if type(value) != str:
 417				raise TypeError(f"each item in nan_values must be a str: {value} at index {i}")
 418		self._original_nan_values = set(values)
 419		if self._trim:
 420			values = (value.strip() for value in values)
 421		if not self._float_case_sensitive:
 422			values = (value.lower() for value in values)
 423		self._match_nan_values = set(values)
 424
 425
 426	@property
 427	def float_case_sensitive(self) -> bool:
 428		return self._float_case_sensitive
 429
 430	@float_case_sensitive.setter
 431	def float_case_sensitive(self, value: bool):
 432		if type(value) != bool:
 433			raise TypeError(f"float_case_sensitive must be a bool: {value}")
 434		if value != self._float_case_sensitive:
 435			self._float_case_sensitive = value
 436			self.inf_values = self._original_inf_values
 437			self.nan_values = self._original_nan_values
 438
 439
 440	@property
 441	def case_sensitive(self) -> Union[bool, None]:
 442		if (
 443			self._none_case_sensitive == self._bool_case_sensitive and
 444			self._none_case_sensitive == self._int_case_sensitive and
 445			self._none_case_sensitive == self._float_case_sensitive
 446		):
 447			return self._none_case_sensitive
 448		else:
 449			return None
 450
 451	@case_sensitive.setter
 452	def case_sensitive(self, value: Union[bool, None]):
 453		if value is not None and type(value) != bool:
 454			raise TypeError(f"case_sensitive must be a bool or None: {value}")
 455		if value is not None:
 456			self.none_case_sensitive = value
 457			self.int_case_sensitive = value
 458			self.bool_case_sensitive = value
 459			self.float_case_sensitive = value
 460
 461
 462	def is_none(self, value: str) -> bool:
 463		"""
 464			Check if a string represents the value None
 465
 466			Only strings that match the values in <code><var>parser</var>.none_values</code> will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on <code><var>parser</var>.none_case_sensitive</code>, which is False by default.
 467
 468			Arguments
 469			---------
 470			`value`
 471			: string to be checked
 472
 473			Returns
 474			-------
 475			whether it is None
 476
 477			Examples
 478			--------
 479			```python
 480			parser = TypeParser()
 481			parser.is_none("")     # True
 482			parser.is_none("abc")  # False
 483			```
 484		"""
 485		if self._trim:
 486			value = value.strip()
 487		if not self._bool_case_sensitive:
 488			value = value.lower()
 489
 490		if value in self._match_none_values:
 491			return True
 492		else:
 493			return False
 494
 495
 496	def is_bool(self, value: str) -> bool:
 497		"""
 498			Check if a string represents a bool
 499
 500			Only strings that match the values in <code><var>parser</var>.true_values</code> and <code><var>parser</var>.false_values</code> will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on <code><var>parser</var>.bool_case_sensitive</code>, which is False by default.
 501
 502			Arguments
 503			---------
 504			`value`
 505			: string to be checked
 506
 507			Returns
 508			-------
 509			whether it is a bool
 510
 511			Examples
 512			--------
 513			```python
 514			parser = TypeParser()
 515			parser.is_bool("true")  # True
 516			parser.is_bool("")      # True
 517			parser.is_bool("abc")   # False
 518			```
 519		"""
 520		if self._trim:
 521			value = value.strip()
 522
 523		if not self._bool_case_sensitive:
 524			value = value.lower()
 525		if value in self._match_true_values:
 526			return True
 527		if value in self._match_false_values:
 528			return True
 529
 530		return False
 531
 532
 533	def is_int(self, value: str, *, allow_negative: bool=True, allow_sign: bool=True, allow_scientific: bool=True) -> bool:
 534		"""
 535			Check if a string represents an int
 536
 537			Arguments
 538			---------
 539			`value`
 540			: string to be checked
 541
 542			Keyword arguments
 543			-----------------
 544
 545			`allow_negative`
 546			: whether to accept negative values. Since negative values are always indicated with a negative sign, `allow_sign` must also be True (which is the default setting) for this to have any effect.
 547
 548			`allow_sign`
 549			: whether to accept values prepended with a sign character. If False, it implies that `allow_negative` is False also.
 550
 551			`allow_scientific`
 552			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
 553
 554			Returns
 555			-------
 556			whether it is an int
 557
 558			Examples
 559			--------
 560			```python
 561			parser = TypeParser()
 562			parser.is_int("0")    # True
 563			parser.is_int("-1")   # True
 564			parser.is_int("abc")  # False
 565			parser.is_int("")     # False
 566			```
 567		"""
 568		if self._trim:
 569			value = value.strip()
 570
 571		if len(value) == 0:
 572			return False
 573
 574		if allow_scientific:
 575			value, exp = _decompose_string_pair(value, self._scientific_char, self._int_case_sensitive)
 576			if exp is not None:
 577				return self.is_int(
 578					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
 579				) and self.is_int(
 580					exp, allow_sign=True, allow_negative=False, allow_scientific=False
 581				)
 582
 583		if value[0] in self._sign_chars:
 584			if len(value) == 1:
 585				return False
 586			if not allow_sign:
 587				return False
 588			if not allow_negative and value[0] in self._negative_chars:
 589				return False
 590			value = value[1:]
 591		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
 592			return False
 593
 594		prev_separated = False
 595		for c in value:
 596			if c in self._digit_separators:
 597				if prev_separated:
 598					return False
 599				prev_separated = True
 600			else:
 601				prev_separated = False
 602				if c not in self._digit_chars:
 603					return False
 604		return True
 605
 606
 607	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
 608		"""
 609			Check if a string represents a float (or equivalently, a Decimal)
 610
 611			This function will also return True if the string represents an int.
 612
 613			Alias: `is_decimal()`
 614
 615			Arguments
 616			---------
 617			`value`
 618			: string to be checked
 619
 620			Keyword arguments
 621			-----------------
 622
 623			`allow_scientific`
 624			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
 625
 626			`allow_inf`
 627			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended with a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 628
 629			`allow_nan`
 630			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 631
 632			Returns
 633			-------
 634			whether it is a float or Decimal
 635
 636			Examples
 637			--------
 638			```python
 639			parser = TypeParser()
 640			parser.is_float("1.")       # True
 641			parser.is_float("12.3e-2")  # True
 642			parser.is_float("abc")      # False
 643			parser.is_float("")         # False
 644			```
 645		"""
 646		if self._trim:
 647			value = value.strip()
 648
 649		if len(value) > 0 and value[0] in self._sign_chars:
 650			value = value[1:]
 651
 652		if self._float_case_sensitive:
 653			special_value = value
 654		else:
 655			special_value = value.lower()
 656		if allow_inf and special_value in self._match_inf_values:
 657			return True
 658		if allow_nan and special_value in self._match_nan_values:
 659			return True
 660
 661		if len(value) == 0:
 662			return False
 663
 664		if allow_scientific:
 665			value, exp = _decompose_string_pair(value, self._scientific_char, self._float_case_sensitive)
 666			if exp is not None:
 667				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
 668
 669		value, frac = _decompose_string_pair(value, self._float_separator, self._float_case_sensitive)
 670		if frac is not None:
 671			if value == "" and frac == "":
 672				return False
 673			return (
 674				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
 675			) and (
 676				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
 677			)
 678
 679		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)
 680
 681
 682	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
 683		"""
 684			Alias of `is_float()`
 685		"""
 686		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)
 687
 688
 689	def parse_none(self, value: str) -> None:
 690		"""
 691			Parse a string and return it as the value None if possible
 692
 693			Only strings that match the values in <code><var>parser</var>.none_values</code> will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on <code><var>parser</var>.none_case_sensitive</code>, which is False by default.
 694
 695			Arguments
 696			---------
 697			`value`
 698			: string to be parsed
 699
 700			Returns
 701			-------
 702			parsed None value
 703
 704			Raises
 705			------
 706			`ValueError` if `value` cannot be parsed
 707
 708			Examples
 709			--------
 710			```python
 711			parser = TypeParser()
 712			parser.parse_none("")     # None
 713			parser.parse_none("abc")  # raises ValueError
 714			```
 715		"""
 716		if self.is_none(value):
 717			return None
 718		else:
 719			raise ValueError(f"not a none value: {value}")
 720
 721
 722	def parse_bool(self, value: str) -> bool:
 723		"""
 724			Parse a string and return it as a bool if possible
 725
 726			Only strings that match the values in <code><var>parser</var>.true_values</code> and <code><var>parser</var>.false_values</code> will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on <code><var>parser</var>.bool_case_sensitive</code>, which is False by default.
 727
 728			Arguments
 729			---------
 730			`value`
 731			: string to be parsed
 732
 733			Returns
 734			-------
 735			parsed bool value
 736
 737			Raises
 738			------
 739			`ValueError` if `value` cannot be parsed
 740
 741			Examples
 742			--------
 743			```python
 744			parser = TypeParser()
 745			parser.parse_bool("true")   # True
 746			parser.parse_bool("FALSE")  # False
 747			```
 748		"""
 749		if self._trim:
 750			value = value.strip()
 751
 752		if self._bool_case_sensitive:
 753			special_value = value
 754		else:
 755			special_value = value.lower()
 756
 757		if special_value in self._match_true_values:
 758			return True
 759		if special_value in self._match_false_values:
 760			return False
 761
 762		raise ValueError(f"not a boolean: {value}")
 763
 764
 765	def parse_int(self, value: str, *, allow_negative: bool=True, allow_sign: bool=True, allow_scientific: bool=True) -> int:
 766		"""
 767			Parse a string and return it as an int if possible
 768
 769			If the string represents a bool, it will be converted to `1` for True and `0` for False.
 770
 771			Arguments
 772			---------
 773			`value`
 774			: string to be parsed
 775
 776			Keyword arguments
 777			-----------------
 778
 779			`allow_negative`
 780			: whether to accept negative values. Since negative values are always indicated with a negative sign, `allow_sign` must also be True (which is the default setting) for this to have any effect.
 781
 782			`allow_sign`
 783			: whether to accept values prepended with a sign character. If False, it implies that `allow_negative` is False also.
 784
 785			`allow_scientific`
 786			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
 787
 788			Returns
 789			-------
 790			parsed int value
 791
 792			Raises
 793			------
 794			`ValueError` if `value` cannot be parsed
 795
 796			Examples
 797			--------
 798			```python
 799			parser = TypeParser()
 800			parser.parse_int("0")    # 0
 801			parser.parse_int("-1")   # -1
 802			parser.parse_int("2e3")  # 2000
 803			```
 804		"""
 805		if self._trim:
 806			value = value.strip()
 807
 808		if self.is_int(value, allow_negative=allow_negative, allow_sign=allow_sign, allow_scientific=allow_scientific):
 809			if allow_scientific:
 810				value, exp = _decompose_string_pair(value, self._scientific_char, self._int_case_sensitive)
 811				if exp is not None:
 812					if value[0] in (self._negative_chars - {self._negative_char}):
 813						value = self._negative_char + value[1:]
 814					return int(value) * (10 ** int(exp))
 815
 816			if value[0] in (self._negative_chars - {self._negative_char}):
 817				value = self._negative_char + value[1:]
 818			return int(value)
 819
 820		elif self.is_bool(value):
 821			return int(self.parse_bool(value))
 822		else:
 823			raise ValueError(f"not an integer: {value}")
 824
 825
 826	def _parse_floatlike(self,
 827		value: str,
 828		converter: Callable[[Union[str, bool]], _FloatLike],
 829		inf_value: _FloatLike,
 830		nan_value: _FloatLike,
 831		*,
 832		allow_scientific: bool=True,
 833		allow_inf: bool=True,
 834		allow_nan: bool=True
 835	) -> _FloatLike:
 836		if self._trim:
 837			value = value.strip()
 838		if self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan):
 839			if self._float_case_sensitive:
 840				special_value = value
 841			else:
 842				special_value = value.lower()
 843			if allow_inf and special_value in self._match_inf_values:
 844				return inf_value
 845			if allow_nan and special_value in self._match_nan_values:
 846				return nan_value
 847
 848			if len(value) > 0 and value[0] in self._sign_chars:
 849				positive_part = value[1:]
 850				if self._float_case_sensitive:
 851					special_value = positive_part
 852				else:
 853					special_value = positive_part.lower()
 854				if allow_inf and special_value in self._match_inf_values:
 855					if value[0] in self._negative_chars:
 856						return -1 * inf_value
 857					else:
 858						return inf_value
 859				if allow_nan and special_value in self._match_nan_values:
 860					return nan_value
 861
 862				if value[0] in self._negative_chars:
 863					value = self._negative_char + positive_part
 864			return converter(value)
 865		elif self.is_bool(value):
 866			return converter(self.parse_bool(value))
 867		else:
 868			raise ValueError(f"not a {_FloatLike.__name__}: {value}")
 869
 870
 871	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
 872		"""
 873			Parse a string and return it as a (non-exact) float if possible
 874
 875			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
 876
 877			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
 878
 879			Arguments
 880			---------
 881			`value`
 882			: string to be parsed
 883
 884			Keyword arguments
 885			-----------------
 886
 887			`allow_scientific`
 888			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
 889
 890			`allow_inf`
 891			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended with a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 892
 893			`allow_nan`
 894			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 895
 896			Returns
 897			-------
 898			parsed float value
 899
 900			Raises
 901			------
 902			`ValueError` if `value` cannot be parsed
 903
 904			Examples
 905			--------
 906			```python
 907			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
 908			parser.parse_float("1.")       # 1.
 909			parser.parse_float("1.23e2")   # 123.
 910			parser.parse_float("1.23e-2")  # 0.0123
 911			parser.parse_float("inf")      # math.inf
 912			```
 913		"""
 914		return self._parse_floatlike(value, float, math.inf, math.nan,
 915			allow_scientific=allow_scientific,
 916			allow_inf=allow_inf,
 917			allow_nan=allow_nan,
 918		)
 919
 920
 921	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
 922		"""
 923			Parse a string and return it as an exact Decimal if possible
 924
 925			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
 926
 927			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
 928
 929			Arguments
 930			---------
 931			`value`
 932			: string to be parsed
 933
 934			Keyword arguments
 935			-----------------
 936
 937			`allow_scientific`
 938			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
 939
 940			`allow_inf`
 941			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended with a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 942
 943			`allow_nan`
 944			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
 945
 946			Returns
 947			-------
 948			parsed Decimal value
 949
 950			Raises
 951			------
 952			`ValueError` if `value` cannot be parsed
 953
 954			Examples
 955			--------
 956			```python
 957			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
 958			parser.parse_decimal("1.")       # Decimal(1)
 959			parser.parse_decimal("1.23e2")   # Decimal(123)
 960			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
 961			parser.parse_decimal("inf")      # Decimal(math.inf)
 962			```
 963		"""
 964		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
 965			allow_scientific=allow_scientific,
 966			allow_inf=allow_inf,
 967			allow_nan=allow_nan,
 968		)
 969
 970
 971	def infer(self, value: str) -> AnyValueType:
 972		"""
 973			Infer the underlying type of a string
 974
 975			Also check for inline lists if <code><var>parser</var>.list_delimiter</code> is not None.
 976
 977			Arguments
 978			---------
 979			`value`
 980			: the string for which the type should be inferred
 981
 982			Returns
 983			-------
 984			inferred type
 985
 986			Examples
 987			--------
 988			```python
 989			parser = TypeParser()
 990			parser.infer("true")  # bool
 991			parser.infer("2.0")   # float
 992			parser.infer("abc")   # str
 993			```
 994		"""
 995		if self.is_none(value):
 996			return NoneType
 997		if self.is_bool(value):
 998			return bool
 999		if self.is_int(value):
1000			return int
1001		if self.is_float(value):
1002			if self._use_decimal:
1003				return Decimal
1004			else:
1005				return float
1006
1007		if self._trim:
1008			value = value.strip()
1009
1010		if self._list_delimiter is not None and self._list_delimiter in value:
1011			subvalues = value.split(self._list_delimiter)
1012			if self._trim:
1013				subvalues = [subvalue.strip() for subvalue in subvalues]
1014			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
1015			r = list[reduced_type]
1016			return r
1017
1018		return GenericValue
1019
1020
1021	def infer_series(self, values: Iterable[str]) -> AnyValueType:
1022		"""
1023			Infer the underlying common type of a series of strings
1024
1025			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
1026
1027			Arguments
1028			---------
1029			`values`
1030			: series of strings for which the type should be inferred
1031
1032			Returns
1033			-------
1034			inferred type
1035
1036			Examples
1037			--------
1038			```python
1039			parser = TypeParser()
1040			parser.infer_series(["1", "2", "3.4"])       # float
1041			parser.infer_series(["true", "false", "2"])  # int
1042			parser.infer_series(["1", "2.3", "abc"])     # str
1043			```
1044		"""
1045		return reduce_types(self.infer(value) for value in values)
1046
1047
1048	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
1049		"""
1050			Infer the underlying common type for each column of a table of strings
1051
1052			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1053
1054			Note that the individual inferred types of every value in the table must be able to fit into memory.
1055
1056			Arguments
1057			---------
1058			`rows`
1059			: table of strings for which the types should be inferred, in row-major order
1060
1061			Returns
1062			-------
1063			inferred types
1064
1065			Examples
1066			--------
1067			```python
1068			parser = TypeParser()
1069			parser.infer_table([
1070				["1",   "true",  "1"],
1071				["2",   "false", "2.3"],
1072				["3.4", "2",     "abc"],
1073			])
1074			# [float, int, str]
1075			```
1076		"""
1077		rows_iter = iter(rows)
1078		first_row = next(rows_iter, None)
1079		if first_row is None:
1080			return []
1081
1082		num_cols = len(first_row)
1083		if num_cols == 0:
1084			return []
1085
1086		table = _TypeTable([[self.infer(value)] for value in first_row])
1087		for row in rows_iter:
1088			table.add_row([self.infer(value) for value in row])
1089
1090		return [reduce_types(col) for col in table.cols]
1091
1092
1093	def convert(self, value: str, target_type: AnyValueType) -> AnyValue:
1094		"""
1095			Convert a string to the specified target type if possible
1096
1097			Valid values for `target_type` include any return value from `infer()`, `infer_series()` and `infer_table()`. To infer and convert the string automatically, use `parse()`, `parse_series()` or `parse_table()` instead.
1098
1099			Arguments
1100			---------
1101			`value`
1102			: the string to be converted
1103
1104			`target_type`
1105			: type to which the value should be converted
1106
1107			Returns
1108			-------
1109			converted value
1110
1111			Raises
1112			-------
1113			`ValueError`
1114			: if `value` cannot be converted to `target_type`
1115
1116			`TypeError`
1117			: if `target_type` is not a valid type
1118
1119			Examples
1120			--------
1121			```python
1122			parser = TypeParser()
1123			parser.convert("true", bool)  # True
1124			parser.convert("2", int)      # 2
1125			parser.convert("2", float)    # 2.
1126			```
1127		"""
1128		base, type_args = _decompose_type(target_type)
1129		if base == NoneType:
1130			return self.parse_none(value)
1131		elif base == bool:
1132			return self.parse_bool(value)
1133		elif base == int:
1134			return self.parse_int(value)
1135		elif base == Decimal:
1136			return self.parse_decimal(value)
1137		elif base == float:
1138			return self.parse_float(value)
1139		elif base == str:
1140			return value
1141		elif base == Nullable:
1142			if self.is_none(value):
1143				return None
1144			else:
1145				if type_args is not None and len(type_args) == 1 and type_args[0] != str:
1146					inner_type = type_args[0]
1147					return self.convert(value, inner_type)
1148				else:
1149					return value
1150		elif base == list:
1151			subvalues = value.split(self._list_delimiter)
1152			if self._trim:
1153				subvalues = [subvalue.strip() for subvalue in subvalues]
1154			if type_args is not None and len(type_args) == 1 and type_args[0] != str:
1155				subtype = type_args[0]
1156				return [self.convert(subvalue, subtype) for subvalue in subvalues]
1157			else:
1158				return subvalues
1159		else:
1160			raise TypeError(f"cannot convert to type: {target_type}")
1161
1162
1163	def parse(self, value: str) -> AnyValue:
1164		"""
1165			Parse a string and convert it to its underlying type
1166
1167			Arguments
1168			---------
1169			`value`
1170			: the string to be parsed
1171
1172			Returns
1173			-------
1174			converted value
1175
1176			Examples
1177			--------
1178			```python
1179			parser = TypeParser()
1180			parser.parse("true")  # True
1181			parser.parse("2.0")   # 2.
1182			parser.parse("abc")   # "abc"
1183			```
1184		"""
1185		return self.convert(value, self.infer(value))
1186
1187
1188	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
1189		"""
1190			Parse a series of strings and convert them to their underlying common type
1191
1192			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
1193
1194			Arguments
1195			---------
1196			`values`
1197			: series of strings to be parsed
1198
1199			Returns
1200			-------
1201			converted values
1202
1203			Examples
1204			--------
1205			```python
1206			parser = TypeParser()
1207			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
1208			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
1209			parser.parse_series(["true", "false", ""])  # [True, False, None]
1210			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
1211			```
1212		"""
1213		inferred = self.infer_series(values)
1214		return [self.convert(value, inferred) for value in values]
1215
1216
1217	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
1218		"""
1219			Parse a table of strings and convert them to the underlying common type of each column
1220
1221			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1222
1223			Note that the type to which the values should be converted is determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
1224
1225			This is a function that computes the entire table and returns it all at once. The generator function `iterate_table()` behaves analogously, except that it computes and yields each row one at a time instead.
1226
1227			Arguments
1228			---------
1229			`rows`
1230			: table of strings to be parsed, in row-major order
1231
1232			`iterator`
1233			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
1234
1235			Returns
1236			-------
1237			converted table of values, in row-major order
1238
1239			Examples
1240			--------
1241			```python
1242			parser = TypeParser()
1243			table = parser.parse_table([
1244				["1", "5",   "true",  "1"],
1245				["2", "6.7", "false", "2.3"],
1246				["3", "8.0", "",      "abc"],
1247			]):
1248			assert table == [
1249				[1, 5.,  True,  "1"],
1250				[2, 6.7, False, "2.3"],
1251				[3, 8.,  None,  "abc"],
1252			]
1253			```
1254		"""
1255		return [converted_row for converted_row in self.iterate_table(rows)]
1256
1257
1258	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
1259		"""
1260			Parse a table of strings for the underlying common type of each column, then convert and yield each row
1261
1262			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1263
1264			This is a generator function that computes and yields each row one at a time. However, note that in order to determine the types to which each column should be converted, the individual inferred types of every value in the table must still be able to fit into memory.
1265
1266			The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists instead.
1267
1268			Arguments
1269			---------
1270			`rows`
1271			: table of strings to be parsed, in row-major order
1272
1273			Yields
1274			------
1275			each row of converted table values
1276
1277			Examples
1278			--------
1279			```python
1280			parser = TypeParser()
1281			table = parser.iterate_table([
1282				["1",   "true",  "1"],
1283				["2",   "false", "2.3"],
1284				["3.4", "2",     "abc"],
1285			]):
1286			assert next(table) == [1.,  1, "1"]
1287			assert next(table) == [2.,  0, "2.3"]
1288			assert next(table) == [3.4, 2, "abc"]
1289			```
1290		"""
1291		inferred_types = self.infer_table(rows)
1292
1293		for row in rows:
1294			yield [self.convert(value, inferred) for value, inferred in zip(row, inferred_types)]

A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.

The behaviour of the parser and the type inference can be configured either in the constructor or using mutable properties of a parser instance. See the constructor documentation for the list of available options.

TypeParser( *, trim: bool = True, use_decimal: bool = False, list_delimiter: Optional[str] = None, none_values: Iterable[str] = [''], none_case_sensitive: bool = False, true_values: Iterable[str] = ['true'], false_values: Iterable[str] = ['false'], bool_case_sensitive: bool = False, int_case_sensitive: bool = False, inf_values: Iterable[str] = [], nan_values: Iterable[str] = [], float_case_sensitive: bool = False, case_sensitive: Optional[bool] = None) View Source

 67	def __init__(self,
 68		*,
 69		trim: bool=True,
 70		use_decimal: bool=False,
 71		list_delimiter: Optional[str]=None,
 72		none_values: Iterable[str]=[""],
 73		none_case_sensitive: bool=False,
 74		true_values: Iterable[str]=["true"],
 75		false_values: Iterable[str]=["false"],
 76		bool_case_sensitive: bool=False,
 77		int_case_sensitive: bool=False,
 78		inf_values: Iterable[str]=[],
 79		nan_values: Iterable[str]=[],
 80		float_case_sensitive: bool=False,
 81		case_sensitive: Optional[bool]=None,
 82	):
 83		"""
 84			Initialise a new parser
 85
 86			The behaviour of the parser and the type inference can be configured either in the constructor or using mutable properties of a parser instance. For example,
 87
 88			```python
 89			parser = TypeParser(list_delimiter=",")
 90			assert parser.list_delimiter == ","
 91			parser.list_delimiter = ";"
 92			assert parser.list_delimiter == ";"
 93			```
 94
 95			Keyword arguments
 96			-----------------
 97			`trim`
 98			: whether leading and trailing whitespace should be stripped from strings
 99
100			`use_decimal`
101			: whether non-integer numeric values should be inferred to be Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer the type (`infer()` `infer_series()`, `infer_table()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
102
103			`list_delimiter`
104			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead. Note that this setting is unaffected by <code><var>parser</var>.trim</code> and <code><var>parser</var>.case_sensitive</code>, and will always be used verbatim.
105
106			`none_values`
107			: list of strings that represent the value `None`
108
109			`none_case_sensitive`
110			: whether matches against `none_values` should be made in a case-sensitive manner
111
112			`true_values`
113			: list of strings that represent the bool value `True`
114
115			`false_values`
116			: list of strings that represent the bool value `False`
117
118			`bool_case_sensitive`
119			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
120
121			`int_case_sensitive`
122			: whether checks for int should be done in a case-sensitive manner. This only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
123
124			`inf_values`
125			: list of strings that represent the float or Decimal value of infinity. Each of the strings can also be prepended with a negative sign to represent negative infinity.
126
127			`nan_values`
128			: list of strings that represent a float or Decimal that is NaN (not a number)
129
130			`float_case_sensitive`
131			: whether checks for float or Decimal should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
132
133			`case_sensitive`
134			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, discarding any individual settings.
135
136			Raises
137			------
138			`ValueError` if any of the options would lead to ambiguities during parsing
139		"""
140
141		self._trim: bool = False
142		self._use_decimal: bool = False
143		self._list_delimiter: Union[str, None] = None
144		self._match_none_values: set[str] = set()
145		self._original_none_values: set[str] = set()
146		self._none_case_sensitive: bool = False
147		self._match_true_values: set[str] = set()
148		self._original_true_values: set[str] = set()
149		self._match_false_values: set[str] = set()
150		self._original_false_values: set[str] = set()
151		self._bool_case_sensitive: bool = False
152		self._int_case_sensitive: bool = False
153		self._match_inf_values: set[str] = set()
154		self._original_inf_values: set[str] = set()
155		self._match_nan_values: set[str] = set()
156		self._original_nan_values: set[str] = set()
157		self._float_case_sensitive: bool = False
158
159		# Unconfigurable default values
160		self._negative_char = "-"
161		self._negative_chars = {self._negative_char, "−"}
162		self._sign_chars = self._negative_chars | {"+"}
163		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
164		self._digit_separators = {"_"}
165		self._scientific_char = "e"
166		self._float_separator = "."
167		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
168		# special_chars = self._reserved_chars | self._list_delimiter
169
170		# Configured values
171
172		self.trim = trim
173		self.use_decimal = use_decimal
174		self.list_delimiter = list_delimiter
175
176		self.none_case_sensitive = none_case_sensitive
177		self.bool_case_sensitive = bool_case_sensitive
178		self.int_case_sensitive = int_case_sensitive
179		self.float_case_sensitive = float_case_sensitive
180		self.case_sensitive = case_sensitive
181
182		self.none_values = none_values
183
184		self.true_values = true_values
185		self.false_values = false_values
186
187		self.inf_values = inf_values
188		self.nan_values = nan_values
189
190		# Check if any special values conflict
191		for name, special_values in [
192			(_SpecialValue.LIST, [self._list_delimiter] if self._list_delimiter is not None else []),
193			(_SpecialValue.NONE, self._match_none_values),
194			(_SpecialValue.TRUE, self._match_true_values),
195			(_SpecialValue.FALSE, self._match_false_values),
196			(_SpecialValue.INF, self._match_inf_values),
197			(_SpecialValue.NAN, self._match_nan_values),
198		]:
199			for special_value in special_values:
200				self._validate_special(name, special_value)

Initialise a new parser

The behaviour of the parser and the type inference can be configured either in the constructor or using mutable properties of a parser instance. For example,

parser = TypeParser(list_delimiter=",")
assert parser.list_delimiter == ","
parser.list_delimiter = ";"
assert parser.list_delimiter == ";"

Keyword arguments

trim : whether leading and trailing whitespace should be stripped from strings

use_decimal : whether non-integer numeric values should be inferred to be Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer the type (infer() infer_series(), infer_table()), and does not affect methods where the type is explicitly specified (is_float(), is_decimal(), parse_float(), parse_decimal()).

list_delimiter : the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead. Note that this setting is unaffected by parser.trim and parser.case_sensitive, and will always be used verbatim.

none_values : list of strings that represent the value None

none_case_sensitive : whether matches against none_values should be made in a case-sensitive manner

true_values : list of strings that represent the bool value True

false_values : list of strings that represent the bool value False

bool_case_sensitive : whether matches against true_values and false_values should be made in a case-sensitive manner

int_case_sensitive : whether checks for int should be done in a case-sensitive manner. This only applies to values given in scientific notation, where the mantissa and exponent usually are separated by e.

inf_values : list of strings that represent the float or Decimal value of infinity. Each of the strings can also be prepended with a negative sign to represent negative infinity.

nan_values : list of strings that represent a float or Decimal that is NaN (not a number)

float_case_sensitive : whether checks for float or Decimal should be done in a case-sensitive manner. This applies to matches against inf_values and nan_values, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by e.

case_sensitive : whether all matches should be made in a case-sensitive manner. Sets all of none_case_sensitive, bool_case_sensitive, int_case_sensitive, float_case_sensitive to the same value, discarding any individual settings.

Raises

ValueError if any of the options would lead to ambiguities during parsing

def is_none(self, value: str) -> bool: View Source

462	def is_none(self, value: str) -> bool:
463		"""
464			Check if a string represents the value None
465
466			Only strings that match the values in <code><var>parser</var>.none_values</code> will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on <code><var>parser</var>.none_case_sensitive</code>, which is False by default.
467
468			Arguments
469			---------
470			`value`
471			: string to be checked
472
473			Returns
474			-------
475			whether it is None
476
477			Examples
478			--------
479			```python
480			parser = TypeParser()
481			parser.is_none("")     # True
482			parser.is_none("abc")  # False
483			```
484		"""
485		if self._trim:
486			value = value.strip()
487		if not self._bool_case_sensitive:
488			value = value.lower()
489
490		if value in self._match_none_values:
491			return True
492		else:
493			return False

Check if a string represents the value None

Only strings that match the values in parser.none_values will be interpreted as None. The default accepted values are [""], i.e. an empty string. The case sensitivity of this matching depends on parser.none_case_sensitive, which is False by default.

Arguments

value : string to be checked

Returns

whether it is None

Examples

parser = TypeParser()
parser.is_none("")     # True
parser.is_none("abc")  # False

def is_bool(self, value: str) -> bool: View Source

496	def is_bool(self, value: str) -> bool:
497		"""
498			Check if a string represents a bool
499
500			Only strings that match the values in <code><var>parser</var>.true_values</code> and <code><var>parser</var>.false_values</code> will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on <code><var>parser</var>.bool_case_sensitive</code>, which is False by default.
501
502			Arguments
503			---------
504			`value`
505			: string to be checked
506
507			Returns
508			-------
509			whether it is a bool
510
511			Examples
512			--------
513			```python
514			parser = TypeParser()
515			parser.is_bool("true")  # True
516			parser.is_bool("")      # True
517			parser.is_bool("abc")   # False
518			```
519		"""
520		if self._trim:
521			value = value.strip()
522
523		if not self._bool_case_sensitive:
524			value = value.lower()
525		if value in self._match_true_values:
526			return True
527		if value in self._match_false_values:
528			return True
529
530		return False

Check if a string represents a bool

Only strings that match the values in parser.true_values and parser.false_values will be interpreted as booleans. The default accepted values are ["true"] and ["false"] respectively. The case sensitivity of this matching depends on parser.bool_case_sensitive, which is False by default.

Arguments

value : string to be checked

Returns

whether it is a bool

Examples

parser = TypeParser()
parser.is_bool("true")  # True
parser.is_bool("")      # True
parser.is_bool("abc")   # False

def is_int( self, value: str, *, allow_negative: bool = True, allow_sign: bool = True, allow_scientific: bool = True) -> bool: View Source

533	def is_int(self, value: str, *, allow_negative: bool=True, allow_sign: bool=True, allow_scientific: bool=True) -> bool:
534		"""
535			Check if a string represents an int
536
537			Arguments
538			---------
539			`value`
540			: string to be checked
541
542			Keyword arguments
543			-----------------
544
545			`allow_negative`
546			: whether to accept negative values. Since negative values are always indicated with a negative sign, `allow_sign` must also be True (which is the default setting) for this to have any effect.
547
548			`allow_sign`
549			: whether to accept values prepended with a sign character. If False, it implies that `allow_negative` is False also.
550
551			`allow_scientific`
552			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
553
554			Returns
555			-------
556			whether it is an int
557
558			Examples
559			--------
560			```python
561			parser = TypeParser()
562			parser.is_int("0")    # True
563			parser.is_int("-1")   # True
564			parser.is_int("abc")  # False
565			parser.is_int("")     # False
566			```
567		"""
568		if self._trim:
569			value = value.strip()
570
571		if len(value) == 0:
572			return False
573
574		if allow_scientific:
575			value, exp = _decompose_string_pair(value, self._scientific_char, self._int_case_sensitive)
576			if exp is not None:
577				return self.is_int(
578					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
579				) and self.is_int(
580					exp, allow_sign=True, allow_negative=False, allow_scientific=False
581				)
582
583		if value[0] in self._sign_chars:
584			if len(value) == 1:
585				return False
586			if not allow_sign:
587				return False
588			if not allow_negative and value[0] in self._negative_chars:
589				return False
590			value = value[1:]
591		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
592			return False
593
594		prev_separated = False
595		for c in value:
596			if c in self._digit_separators:
597				if prev_separated:
598					return False
599				prev_separated = True
600			else:
601				prev_separated = False
602				if c not in self._digit_chars:
603					return False
604		return True

Check if a string represents an int

Arguments

value : string to be checked

Keyword arguments

allow_negative : whether to accept negative values. Since negative values are always indicated with a negative sign, allow_sign must also be True (which is the default setting) for this to have any effect.

allow_sign : whether to accept values prepended with a sign character. If False, it implies that allow_negative is False also.

allow_scientific : whether to accept scientific notation. If True, strings of the form "MeX" will be interpreted as the expression M * (10 ** X), where M is the mantissa/significand and X is the exponent. Note that M must be an integer and X must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.

Returns

whether it is an int

Examples

parser = TypeParser()
parser.is_int("0")    # True
parser.is_int("-1")   # True
parser.is_int("abc")  # False
parser.is_int("")     # False

def is_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool: View Source

607	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
608		"""
609			Check if a string represents a float (or equivalently, a Decimal)
610
611			This function will also return True if the string represents an int.
612
613			Alias: `is_decimal()`
614
615			Arguments
616			---------
617			`value`
618			: string to be checked
619
620			Keyword arguments
621			-----------------
622
623			`allow_scientific`
624			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
625
626			`allow_inf`
627			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended with a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
628
629			`allow_nan`
630			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
631
632			Returns
633			-------
634			whether it is a float or Decimal
635
636			Examples
637			--------
638			```python
639			parser = TypeParser()
640			parser.is_float("1.")       # True
641			parser.is_float("12.3e-2")  # True
642			parser.is_float("abc")      # False
643			parser.is_float("")         # False
644			```
645		"""
646		if self._trim:
647			value = value.strip()
648
649		if len(value) > 0 and value[0] in self._sign_chars:
650			value = value[1:]
651
652		if self._float_case_sensitive:
653			special_value = value
654		else:
655			special_value = value.lower()
656		if allow_inf and special_value in self._match_inf_values:
657			return True
658		if allow_nan and special_value in self._match_nan_values:
659			return True
660
661		if len(value) == 0:
662			return False
663
664		if allow_scientific:
665			value, exp = _decompose_string_pair(value, self._scientific_char, self._float_case_sensitive)
666			if exp is not None:
667				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
668
669		value, frac = _decompose_string_pair(value, self._float_separator, self._float_case_sensitive)
670		if frac is not None:
671			if value == "" and frac == "":
672				return False
673			return (
674				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
675			) and (
676				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
677			)
678
679		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)

Check if a string represents a float (or equivalently, a Decimal)

This function will also return True if the string represents an int.

Alias: is_decimal()

Arguments

value : string to be checked

Keyword arguments

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in parser.inf_values (empty set by default) are interpreted as infinity, or as negative infinity if prepended with a negative sign. The case sensitivity of this matching depends on parser.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in parser.nan_values (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on parser.float_case_sensitive, which is False by default.

Returns

whether it is a float or Decimal

Examples

parser = TypeParser()
parser.is_float("1.")       # True
parser.is_float("12.3e-2")  # True
parser.is_float("abc")      # False
parser.is_float("")         # False

def is_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool: View Source

682	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
683		"""
684			Alias of `is_float()`
685		"""
686		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)

Alias of is_float()

def parse_none(self, value: str) -> None: View Source

689	def parse_none(self, value: str) -> None:
690		"""
691			Parse a string and return it as the value None if possible
692
693			Only strings that match the values in <code><var>parser</var>.none_values</code> will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on <code><var>parser</var>.none_case_sensitive</code>, which is False by default.
694
695			Arguments
696			---------
697			`value`
698			: string to be parsed
699
700			Returns
701			-------
702			parsed None value
703
704			Raises
705			------
706			`ValueError` if `value` cannot be parsed
707
708			Examples
709			--------
710			```python
711			parser = TypeParser()
712			parser.parse_none("")     # None
713			parser.parse_none("abc")  # raises ValueError
714			```
715		"""
716		if self.is_none(value):
717			return None
718		else:
719			raise ValueError(f"not a none value: {value}")

Parse a string and return it as the value None if possible

Arguments

value : string to be parsed

Returns

parsed None value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_none("")     # None
parser.parse_none("abc")  # raises ValueError

def parse_bool(self, value: str) -> bool: View Source

722	def parse_bool(self, value: str) -> bool:
723		"""
724			Parse a string and return it as a bool if possible
725
726			Only strings that match the values in <code><var>parser</var>.true_values</code> and <code><var>parser</var>.false_values</code> will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on <code><var>parser</var>.bool_case_sensitive</code>, which is False by default.
727
728			Arguments
729			---------
730			`value`
731			: string to be parsed
732
733			Returns
734			-------
735			parsed bool value
736
737			Raises
738			------
739			`ValueError` if `value` cannot be parsed
740
741			Examples
742			--------
743			```python
744			parser = TypeParser()
745			parser.parse_bool("true")   # True
746			parser.parse_bool("FALSE")  # False
747			```
748		"""
749		if self._trim:
750			value = value.strip()
751
752		if self._bool_case_sensitive:
753			special_value = value
754		else:
755			special_value = value.lower()
756
757		if special_value in self._match_true_values:
758			return True
759		if special_value in self._match_false_values:
760			return False
761
762		raise ValueError(f"not a boolean: {value}")

Parse a string and return it as a bool if possible

Arguments

value : string to be parsed

Returns

parsed bool value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_bool("true")   # True
parser.parse_bool("FALSE")  # False

def parse_int( self, value: str, *, allow_negative: bool = True, allow_sign: bool = True, allow_scientific: bool = True) -> int: View Source

765	def parse_int(self, value: str, *, allow_negative: bool=True, allow_sign: bool=True, allow_scientific: bool=True) -> int:
766		"""
767			Parse a string and return it as an int if possible
768
769			If the string represents a bool, it will be converted to `1` for True and `0` for False.
770
771			Arguments
772			---------
773			`value`
774			: string to be parsed
775
776			Keyword arguments
777			-----------------
778
779			`allow_negative`
780			: whether to accept negative values. Since negative values are always indicated with a negative sign, `allow_sign` must also be True (which is the default setting) for this to have any effect.
781
782			`allow_sign`
783			: whether to accept values prepended with a sign character. If False, it implies that `allow_negative` is False also.
784
785			`allow_scientific`
786			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>M</var> must be an integer and <var>X</var> must be a non-negative integer, even in cases where the expression would evaluate mathematically to an integer.
787
788			Returns
789			-------
790			parsed int value
791
792			Raises
793			------
794			`ValueError` if `value` cannot be parsed
795
796			Examples
797			--------
798			```python
799			parser = TypeParser()
800			parser.parse_int("0")    # 0
801			parser.parse_int("-1")   # -1
802			parser.parse_int("2e3")  # 2000
803			```
804		"""
805		if self._trim:
806			value = value.strip()
807
808		if self.is_int(value, allow_negative=allow_negative, allow_sign=allow_sign, allow_scientific=allow_scientific):
809			if allow_scientific:
810				value, exp = _decompose_string_pair(value, self._scientific_char, self._int_case_sensitive)
811				if exp is not None:
812					if value[0] in (self._negative_chars - {self._negative_char}):
813						value = self._negative_char + value[1:]
814					return int(value) * (10 ** int(exp))
815
816			if value[0] in (self._negative_chars - {self._negative_char}):
817				value = self._negative_char + value[1:]
818			return int(value)
819
820		elif self.is_bool(value):
821			return int(self.parse_bool(value))
822		else:
823			raise ValueError(f"not an integer: {value}")

Parse a string and return it as an int if possible

If the string represents a bool, it will be converted to 1 for True and 0 for False.

Arguments

value : string to be parsed

Keyword arguments

allow_sign : whether to accept values prepended with a sign character. If False, it implies that allow_negative is False also.

Returns

parsed int value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_int("0")    # 0
parser.parse_int("-1")   # -1
parser.parse_int("2e3")  # 2000

def parse_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> float: View Source

871	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
872		"""
873			Parse a string and return it as a (non-exact) float if possible
874
875			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
876
877			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
878
879			Arguments
880			---------
881			`value`
882			: string to be parsed
883
884			Keyword arguments
885			-----------------
886
887			`allow_scientific`
888			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
889
890			`allow_inf`
891			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended with a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
892
893			`allow_nan`
894			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
895
896			Returns
897			-------
898			parsed float value
899
900			Raises
901			------
902			`ValueError` if `value` cannot be parsed
903
904			Examples
905			--------
906			```python
907			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
908			parser.parse_float("1.")       # 1.
909			parser.parse_float("1.23e2")   # 123.
910			parser.parse_float("1.23e-2")  # 0.0123
911			parser.parse_float("inf")      # math.inf
912			```
913		"""
914		return self._parse_floatlike(value, float, math.inf, math.nan,
915			allow_scientific=allow_scientific,
916			allow_inf=allow_inf,
917			allow_nan=allow_nan,
918		)

Parse a string and return it as a (non-exact) float if possible

If the string represents a bool, it will be converted to 1. for True and 0. for False. If the string represents an int, it will be converted to a float also.

Behaves analogously to parse_decimal(), except that that returns an exact Decimal instead.

Arguments

value : string to be parsed

Keyword arguments

Returns

parsed float value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_float("1.")       # 1.
parser.parse_float("1.23e2")   # 123.
parser.parse_float("1.23e-2")  # 0.0123
parser.parse_float("inf")      # math.inf

def parse_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> decimal.Decimal: View Source

921	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
922		"""
923			Parse a string and return it as an exact Decimal if possible
924
925			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
926
927			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
928
929			Arguments
930			---------
931			`value`
932			: string to be parsed
933
934			Keyword arguments
935			-----------------
936
937			`allow_scientific`
938			: whether to accept scientific notation. If True, strings of the form <code>"<var>M</var>e<var>X</var>"</code> will be interpreted as the expression <code><var>M</var> * (10 ** <var>X</var>)</code>, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
939
940			`allow_inf`
941			: whether to accept positive and negative infinity values. If True, strings that match the values in <code><var>parser</var>.inf_values</code> (empty set by default) are interpreted as infinity, or as negative infinity if prepended with a negative sign. The case sensitivity of this matching depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
942
943			`allow_nan`
944			: whether to accept NaN (not a number) representations. If True, strings that match the values in <code><var>parser</var>.nan_values</code> (empty set by default) are interpeted as NaN. The case sensitivity of this matching also depends on <code><var>parser</var>.float_case_sensitive</code>, which is False by default.
945
946			Returns
947			-------
948			parsed Decimal value
949
950			Raises
951			------
952			`ValueError` if `value` cannot be parsed
953
954			Examples
955			--------
956			```python
957			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
958			parser.parse_decimal("1.")       # Decimal(1)
959			parser.parse_decimal("1.23e2")   # Decimal(123)
960			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
961			parser.parse_decimal("inf")      # Decimal(math.inf)
962			```
963		"""
964		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
965			allow_scientific=allow_scientific,
966			allow_inf=allow_inf,
967			allow_nan=allow_nan,
968		)

Parse a string and return it as an exact Decimal if possible

If the string represents a bool, it will be converted to Decimal(1) for True and Decimal(0) for False. If the string represents an int, it will be converted to a Decimal also.

Behaves analogously to parse_float(), except that that returns a non-exact float instead.

Arguments

value : string to be parsed

Keyword arguments

Returns

parsed Decimal value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_decimal("1.")       # Decimal(1)
parser.parse_decimal("1.23e2")   # Decimal(123)
parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
parser.parse_decimal("inf")      # Decimal(math.inf)

def infer( self, value: str) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]: View Source

 971	def infer(self, value: str) -> AnyValueType:
 972		"""
 973			Infer the underlying type of a string
 974
 975			Also check for inline lists if <code><var>parser</var>.list_delimiter</code> is not None.
 976
 977			Arguments
 978			---------
 979			`value`
 980			: the string for which the type should be inferred
 981
 982			Returns
 983			-------
 984			inferred type
 985
 986			Examples
 987			--------
 988			```python
 989			parser = TypeParser()
 990			parser.infer("true")  # bool
 991			parser.infer("2.0")   # float
 992			parser.infer("abc")   # str
 993			```
 994		"""
 995		if self.is_none(value):
 996			return NoneType
 997		if self.is_bool(value):
 998			return bool
 999		if self.is_int(value):
1000			return int
1001		if self.is_float(value):
1002			if self._use_decimal:
1003				return Decimal
1004			else:
1005				return float
1006
1007		if self._trim:
1008			value = value.strip()
1009
1010		if self._list_delimiter is not None and self._list_delimiter in value:
1011			subvalues = value.split(self._list_delimiter)
1012			if self._trim:
1013				subvalues = [subvalue.strip() for subvalue in subvalues]
1014			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
1015			r = list[reduced_type]
1016			return r
1017
1018		return GenericValue

Infer the underlying type of a string

Also check for inline lists if parser.list_delimiter is not None.

Arguments

value : the string for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer("true")  # bool
parser.infer("2.0")   # float
parser.infer("abc")   # str

def infer_series( self, values: Iterable[str]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]: View Source

1021	def infer_series(self, values: Iterable[str]) -> AnyValueType:
1022		"""
1023			Infer the underlying common type of a series of strings
1024
1025			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
1026
1027			Arguments
1028			---------
1029			`values`
1030			: series of strings for which the type should be inferred
1031
1032			Returns
1033			-------
1034			inferred type
1035
1036			Examples
1037			--------
1038			```python
1039			parser = TypeParser()
1040			parser.infer_series(["1", "2", "3.4"])       # float
1041			parser.infer_series(["true", "false", "2"])  # int
1042			parser.infer_series(["1", "2.3", "abc"])     # str
1043			```
1044		"""
1045		return reduce_types(self.infer(value) for value in values)

Infer the underlying common type of a series of strings

If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Arguments

values : series of strings for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer_series(["1", "2", "3.4"])       # float
parser.infer_series(["true", "false", "2"])  # int
parser.infer_series(["1", "2.3", "abc"])     # str

def infer_table( self, rows: Iterable[Sequence[str]]) -> list[typing.Type[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]]: View Source

1048	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
1049		"""
1050			Infer the underlying common type for each column of a table of strings
1051
1052			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1053
1054			Note that the individual inferred types of every value in the table must be able to fit into memory.
1055
1056			Arguments
1057			---------
1058			`rows`
1059			: table of strings for which the types should be inferred, in row-major order
1060
1061			Returns
1062			-------
1063			inferred types
1064
1065			Examples
1066			--------
1067			```python
1068			parser = TypeParser()
1069			parser.infer_table([
1070				["1",   "true",  "1"],
1071				["2",   "false", "2.3"],
1072				["3.4", "2",     "abc"],
1073			])
1074			# [float, int, str]
1075			```
1076		"""
1077		rows_iter = iter(rows)
1078		first_row = next(rows_iter, None)
1079		if first_row is None:
1080			return []
1081
1082		num_cols = len(first_row)
1083		if num_cols == 0:
1084			return []
1085
1086		table = _TypeTable([[self.infer(value)] for value in first_row])
1087		for row in rows_iter:
1088			table.add_row([self.infer(value) for value in row])
1089
1090		return [reduce_types(col) for col in table.cols]

Infer the underlying common type for each column of a table of strings

For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the individual inferred types of every value in the table must be able to fit into memory.

Arguments

rows : table of strings for which the types should be inferred, in row-major order

Returns

inferred types

Examples

parser = TypeParser()
parser.infer_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
])
# [float, int, str]

def convert( self, value: str, target_type: Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]) -> Union[str, int, float, decimal.Decimal, bool, NoneType, list]: View Source

1093	def convert(self, value: str, target_type: AnyValueType) -> AnyValue:
1094		"""
1095			Convert a string to the specified target type if possible
1096
1097			Valid values for `target_type` include any return value from `infer()`, `infer_series()` and `infer_table()`. To infer and convert the string automatically, use `parse()`, `parse_series()` or `parse_table()` instead.
1098
1099			Arguments
1100			---------
1101			`value`
1102			: the string to be converted
1103
1104			`target_type`
1105			: type to which the value should be converted
1106
1107			Returns
1108			-------
1109			converted value
1110
1111			Raises
1112			-------
1113			`ValueError`
1114			: if `value` cannot be converted to `target_type`
1115
1116			`TypeError`
1117			: if `target_type` is not a valid type
1118
1119			Examples
1120			--------
1121			```python
1122			parser = TypeParser()
1123			parser.convert("true", bool)  # True
1124			parser.convert("2", int)      # 2
1125			parser.convert("2", float)    # 2.
1126			```
1127		"""
1128		base, type_args = _decompose_type(target_type)
1129		if base == NoneType:
1130			return self.parse_none(value)
1131		elif base == bool:
1132			return self.parse_bool(value)
1133		elif base == int:
1134			return self.parse_int(value)
1135		elif base == Decimal:
1136			return self.parse_decimal(value)
1137		elif base == float:
1138			return self.parse_float(value)
1139		elif base == str:
1140			return value
1141		elif base == Nullable:
1142			if self.is_none(value):
1143				return None
1144			else:
1145				if type_args is not None and len(type_args) == 1 and type_args[0] != str:
1146					inner_type = type_args[0]
1147					return self.convert(value, inner_type)
1148				else:
1149					return value
1150		elif base == list:
1151			subvalues = value.split(self._list_delimiter)
1152			if self._trim:
1153				subvalues = [subvalue.strip() for subvalue in subvalues]
1154			if type_args is not None and len(type_args) == 1 and type_args[0] != str:
1155				subtype = type_args[0]
1156				return [self.convert(subvalue, subtype) for subvalue in subvalues]
1157			else:
1158				return subvalues
1159		else:
1160			raise TypeError(f"cannot convert to type: {target_type}")

Convert a string to the specified target type if possible

Valid values for target_type include any return value from infer(), infer_series() and infer_table(). To infer and convert the string automatically, use parse(), parse_series() or parse_table() instead.

Arguments

value : the string to be converted

target_type : type to which the value should be converted

Returns

converted value

Raises

ValueError : if value cannot be converted to target_type

TypeError : if target_type is not a valid type

Examples

parser = TypeParser()
parser.convert("true", bool)  # True
parser.convert("2", int)      # 2
parser.convert("2", float)    # 2.

def parse( self, value: str) -> Union[str, int, float, decimal.Decimal, bool, NoneType, list]: View Source

1163	def parse(self, value: str) -> AnyValue:
1164		"""
1165			Parse a string and convert it to its underlying type
1166
1167			Arguments
1168			---------
1169			`value`
1170			: the string to be parsed
1171
1172			Returns
1173			-------
1174			converted value
1175
1176			Examples
1177			--------
1178			```python
1179			parser = TypeParser()
1180			parser.parse("true")  # True
1181			parser.parse("2.0")   # 2.
1182			parser.parse("abc")   # "abc"
1183			```
1184		"""
1185		return self.convert(value, self.infer(value))

Parse a string and convert it to its underlying type

Arguments

value : the string to be parsed

Returns

converted value

Examples

parser = TypeParser()
parser.parse("true")  # True
parser.parse("2.0")   # 2.
parser.parse("abc")   # "abc"

def parse_series( self, values: Iterable[str]) -> list[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, list]]: View Source

1188	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
1189		"""
1190			Parse a series of strings and convert them to their underlying common type
1191
1192			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
1193
1194			Arguments
1195			---------
1196			`values`
1197			: series of strings to be parsed
1198
1199			Returns
1200			-------
1201			converted values
1202
1203			Examples
1204			--------
1205			```python
1206			parser = TypeParser()
1207			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
1208			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
1209			parser.parse_series(["true", "false", ""])  # [True, False, None]
1210			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
1211			```
1212		"""
1213		inferred = self.infer_series(values)
1214		return [self.convert(value, inferred) for value in values]

Parse a series of strings and convert them to their underlying common type

If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Arguments

values : series of strings to be parsed

Returns

converted values

Examples

parser = TypeParser()
parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
parser.parse_series(["true", "false", ""])  # [True, False, None]
parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]

def parse_table( self, rows: Iterable[Sequence[str]]) -> list[list[typing.Union[str, int, float, decimal.Decimal, bool, NoneType, list]]]: View Source

1217	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
1218		"""
1219			Parse a table of strings and convert them to the underlying common type of each column
1220
1221			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1222
1223			Note that the type to which the values should be converted is determined by `infer_table()`, and so the individual inferred types of every value in the table must be able to fit into memory.
1224
1225			This is a function that computes the entire table and returns it all at once. The generator function `iterate_table()` behaves analogously, except that it computes and yields each row one at a time instead.
1226
1227			Arguments
1228			---------
1229			`rows`
1230			: table of strings to be parsed, in row-major order
1231
1232			`iterator`
1233			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
1234
1235			Returns
1236			-------
1237			converted table of values, in row-major order
1238
1239			Examples
1240			--------
1241			```python
1242			parser = TypeParser()
1243			table = parser.parse_table([
1244				["1", "5",   "true",  "1"],
1245				["2", "6.7", "false", "2.3"],
1246				["3", "8.0", "",      "abc"],
1247			]):
1248			assert table == [
1249				[1, 5.,  True,  "1"],
1250				[2, 6.7, False, "2.3"],
1251				[3, 8.,  None,  "abc"],
1252			]
1253			```
1254		"""
1255		return [converted_row for converted_row in self.iterate_table(rows)]

Parse a table of strings and convert them to the underlying common type of each column

For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the type to which the values should be converted is determined by infer_table(), and so the individual inferred types of every value in the table must be able to fit into memory.

This is a function that computes the entire table and returns it all at once. The generator function iterate_table() behaves analogously, except that it computes and yields each row one at a time instead.

Arguments

rows : table of strings to be parsed, in row-major order

iterator : whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.

Returns

converted table of values, in row-major order

Examples

parser = TypeParser()
table = parser.parse_table([
	["1", "5",   "true",  "1"],
	["2", "6.7", "false", "2.3"],
	["3", "8.0", "",      "abc"],
]):
assert table == [
	[1, 5.,  True,  "1"],
	[2, 6.7, False, "2.3"],
	[3, 8.,  None,  "abc"],
]

def iterate_table( self, rows: Iterable[Sequence[str]]) -> Iterator[list[Union[str, int, float, decimal.Decimal, bool, NoneType, list]]]: View Source

1258	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
1259		"""
1260			Parse a table of strings for the underlying common type of each column, then convert and yield each row
1261
1262			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
1263
1264			This is a generator function that computes and yields each row one at a time. However, note that in order to determine the types to which each column should be converted, the individual inferred types of every value in the table must still be able to fit into memory.
1265
1266			The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists instead.
1267
1268			Arguments
1269			---------
1270			`rows`
1271			: table of strings to be parsed, in row-major order
1272
1273			Yields
1274			------
1275			each row of converted table values
1276
1277			Examples
1278			--------
1279			```python
1280			parser = TypeParser()
1281			table = parser.iterate_table([
1282				["1",   "true",  "1"],
1283				["2",   "false", "2.3"],
1284				["3.4", "2",     "abc"],
1285			]):
1286			assert next(table) == [1.,  1, "1"]
1287			assert next(table) == [2.,  0, "2.3"]
1288			assert next(table) == [3.4, 2, "abc"]
1289			```
1290		"""
1291		inferred_types = self.infer_table(rows)
1292
1293		for row in rows:
1294			yield [self.convert(value, inferred) for value, inferred in zip(row, inferred_types)]

Parse a table of strings for the underlying common type of each column, then convert and yield each row

This is a generator function that computes and yields each row one at a time. However, note that in order to determine the types to which each column should be converted, the individual inferred types of every value in the table must still be able to fit into memory.

The function parse_table() behaves analogously, except that it computes the entire table and returns it as a list of lists instead.

Arguments

rows : table of strings to be parsed, in row-major order

Yields

each row of converted table values

Examples

parser = TypeParser()
table = parser.iterate_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
]):
assert next(table) == [1.,  1, "1"]
assert next(table) == [2.,  0, "2.3"]
assert next(table) == [3.4, 2, "abc"]

def reduce_types( types: Iterable[Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, Nullable, list]]: View Source

156def reduce_types(types: Iterable[AnyValueType]) -> AnyValueType:
157	"""
158		Reduce multiple types into a single common type.
159
160		If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.
161
162		This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).
163
164		Arguments
165		---------
166		`types`
167		: types to be reduced
168
169		Returns
170		-------
171		common reduced type
172
173		Examples
174		--------
175		```python
176		reduce_types([int, float])        # float
177		reduce_types([bool, int])         # int
178		reduce_types([int, float, str])   # str
179		```
180	"""
181	reduced_type: Union[AnyValueType, None] = None
182	for t in types:
183		if reduced_type is None:
184			reduced_type = t
185		elif t != reduced_type:
186			reduced_type = _merge_types(reduced_type, t)
187		if reduced_type == _TerminalValue:
188			return _TerminalValue
189
190	if reduced_type is None:
191		# types is empty
192		return GenericValue
193	else:
194		return reduced_type

Reduce multiple types into a single common type.

If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.

This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).

Arguments

types : types to be reduced

Returns

common reduced type

Examples

reduce_types([int, float])        # float
reduce_types([bool, int])         # int
reduce_types([int, float, str])   # str

class Nullable(typing.Generic[~S]): View Source

22class Nullable(Generic[S]):
23	"""
24		Dummy container type that represents a scalar (`S`) that could also be None
25
26		The type annotation `Nullable[S]` is treated as equivalent to `Union[S, types.NoneType]`, which will accept either a value of type `S` or the value `None`.
27
28		This class should not be instantiated.
29	"""
30	pass

Dummy container type that represents a scalar (S) that could also be None

The type annotation Nullable[S] is treated as equivalent to Union[S, types.NoneType], which will accept either a value of type S or the value None.

This class should not be instantiated.