Source code for structa.conversions

# structa: an application for analyzing repetitive data structures
#
# Copyright (c) 2020-2021 Dave Jones <dave@waveform.org.uk>
#
# SPDX-License-Identifier: GPL-2.0-or-later

import re

from dateutil.parser import parse
from dateutil.relativedelta import relativedelta

from .collections import Counter, FrozenCounter


[docs]def try_conversion(sample, conversion, threshold=0): """ Given a :class:`~collections.Counter` *sample* of strings, call the specified *conversion* on each string returning the set of converted values. *conversion* must be a callable that accepts a single string parameter and returns the converted value. If the *conversion* fails it must raise a :exc:`ValueError` exception. If *threshold* is specified (defaults to 0), it defines the number of "bad" conversions (which result in :exc:`ValueError` being raised) that will be ignored. If *threshold* is exceeded, then :exc:`ValueError` will be raised (or rather passed through from the underlying *conversion*). Likewise, if *threshold* is not exceeded, but zero conversions are successful then :exc:`ValueError` will also be raised. """ assert isinstance(sample, (Counter, FrozenCounter)) assert sample result = Counter() if threshold: assert threshold > 0 for item, count in sample.items(): try: result[conversion(item)] += count except ValueError: # XXX and TypeError? threshold -= count if threshold < 0: raise if result: return result else: # If threshold permitted us to get to this point but we managed to # convert absolutely nothing, that's not success! raise ValueError('zero successful conversions') else: for item, count in sample.items(): result[conversion(item)] += count return result
[docs]def parse_bool(s, false='0', true='1'): """ Convert the string *s* (stripped and lower-cased) to a bool, if it matches either the *false* string (defaults to '0') or *true* (defaults to '1'). If it matches neither, raises a :exc:`ValueError`. """ try: return { false: False, true: True, }[s.strip().lower()] except KeyError: raise ValueError('not a valid bool {!r}'.format(s))
_SUFFIXES = [ # This ordering is important; the minutes regex must be checked *before* # the months regex as one is a legitimate subset of the other ('microseconds', 'm(icro)?s(ec(ond)?s?)?'), ('seconds', 's(ec(ond)?s?)?'), ('minutes', 'mi(n(ute)?s?)?'), ('hours', 'h((ou)?rs?)?'), ('days', 'd(ays?)?'), ('weeks', 'w((ee)?ks?)?'), ('months', 'm(on(th)?s?)?'), ('years', 'y((ea)?rs?)?'), ] _SPANS = [ (span, re.compile(r'^(?:(?P<num>[+-]?\d+)\s*{}\b)'.format(suffix))) for span, suffix in _SUFFIXES ]
[docs]def parse_duration(s): """ Convert the string *s* to a :class:`~dateutil.relativedelta.relativedelta`. The string must consist of white-space and/or comma separated values which are a number followed by a suffix indicating duration. For example: >>> parse_duration('1s') relativedelta(seconds=+1) >>> parse_duration('5 minutes, 30 seconds') relativedelta(minutes=+5, seconds=+30) >>> parse_duration('1 year') relativedelta(years=+1) Note that some suffixes like "m" can be ambiguous; using common abbreviations should avoid ambiguity: >>> parse_duration('1 m') relativedelta(months=+1) >>> parse_duration('1 min') relativedelta(minutes=+1) >>> parse_duration('1 mon') relativedelta(months=+1) The set of possible durations, and their recognized suffixes is as follows: * *Microseconds*: microseconds, microsecond, microsec, micros, micro, mseconds, msecond, msecs, msec, ms * *Seconds*: seconds, second, secs, sec, s * *Minutes*: minutes, minute, mins, min, mi * *Hours*: hours, hour, hrs, hr, h * *Days*: days, day, d * *Weeks*: weeks, week, wks, wk, w * *Months*: months, month, mons, mon, mths, mth, m * *Years*: years, year, yrs, yr, y If conversion fails, :exc:`ValueError` is raised. """ spans = {span: 0 for span, regex in _SPANS} t = s while True: t = t.lstrip(' \t\n,') if not t: return relativedelta(**spans) for span, regex in _SPANS: m = regex.search(t) if m: spans[span] += int(m.group('num')) # XXX This only truncates from the start; that in turn means # that things must be ordered year/month/day/hour/etc. Make # the algorithm order agnostic t = t[len(m.group(0)):] break else: raise ValueError('invalid duration {}'.format(s))
[docs]def parse_duration_or_timestamp(s): """ Convert the string *s* to a :class:`~datetime.datetime` or a :class:`~dateutil.relativedelta.relativedelta`. Duration conversion is attempted to and, if this fails, date-time conversion is attempted. A :exc:`ValueError` is raised if both conversions fail. """ try: return parse_duration(s) except ValueError: return parse(s)