Source code for zulu.parser

"""The parser module."""

from datetime import datetime, timedelta
from itertools import groupby

from babel.dates import (
    LC_TIME,
    format_datetime as _format_datetime,
    format_timedelta as _format_timedelta,
)
from dateutil.tz import gettz, tzlocal, tzutc
import iso8601
import pytimeparse

from .helpers import NUMBER_TYPES


UTC = tzutc()
EPOCH = datetime(1970, 1, 1, tzinfo=UTC)

ISO8601 = "ISO8601"
TIMESTAMP = "timestamp"
DEFAULT_PARSE_DATETIME_FORMATS = (ISO8601, TIMESTAMP)


# Subset of Unicode date field patterns from:
# https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
# that are supported as an alternative to Python's strptime/strftime directives. This mapping is
# used to convert the Unicode pattern (the dict keys) to the best matched strptime directive (the
# values). For values that are tuples, the first item is the directive used for string parsing while
# the second item is the directive used for string formatting. The second items are platform
# dependent and may not work on all systems.
DATE_PATTERN_TO_DIRECTIVE = {
    "yyyy": "%Y",  # Year with century
    "yy": "%y",  # Year without century
    "YYYY": "%Y",  # Year with century
    "YY": "%y",  # Year without century
    "MMMM": "%B",  # Month's full name
    "MMM": "%b",  # Month's abbreviated name
    "MM": "%m",  # Month padded
    "M": "%m",  # Month not padded
    "DDD": "%j",  # Day of the year padded
    "DD": "%j",  # Day of the year padded
    "D": "%j",  # Day of the year not padded
    "dd": "%d",  # Day of the month padded
    "d": "%d",  # Day of the month not padded
    "EEEE": "%A",  # Weekday's full name
    "EEE": "%a",  # Weekday's abbreviated name
    "EE": "%a",  # Weekday's abbreviated name
    "E": "%a",  # Weekday's abbreviated name
    "eee": "%a",  # Weekday's abbreviated name
    "ee": "%w",  # Weekday as decimal padded
    "e": "%w",  # Weekday as decimal not padded
    "HH": "%H",  # Hour-24 padded
    "H": "%H",  # Hour-24 not padded
    "hh": "%I",  # Hour-12 padded
    "h": "%I",  # Hour-12 not padded
    "mm": "%M",  # Minute padded
    "m": "%M",  # Minute not padded
    "ss": "%S",  # Second padded
    "s": "%S",  # Second not padded
    "SSSSSS": "%f",  # Microsecond padded
    "SSSSS": "%f",  # Microsecond padded
    "SSSS": "%f",  # Microsecond padded
    "SSS": "%f",  # Microsecond padded
    "SS": "%f",  # Microsecond padded
    "S": "%f",  # Microsecond not padded
    "a": "%p",  # am or pm
    "z": "%z",  # UTC offset without separator
    "Z": "%z",  # UTC offset without separator
}

TIMEDELTA_GRANULARITIES = ("second", "minute", "hour", "day", "week", "month", "year")

TIMEDELTA_FORMATS = ("long", "short", "narrow")


[docs] class ParseError(Exception): """Exception raised when an object cannot be parsed as a datetime.""" pass
def parse_datetime(obj, formats=None, default_tz=None): """ Attempt to parse `obj` as a ``datetime`` using a list of `formats`. If no timezone information is found in `obj` and `default_tz` is set, then the naive datetime object will be shifted to the default timezone. Args: obj (str|datetime): Object to parse. formats (str|list, optional): List of string formats to use when parsing. Defaults to ``["ISO8601", "X"]``. default_tz (None|str|tzinfo, optional): Default timezone to use when parsed datetime object does not contain a timezone. Defaults to ``UTC``. Returns: datetime Raises: ValueError: When `default_tz` is an unrecognized timezone. ParseError: When `obj` can't be parsed as a datetime. """ if default_tz is None: default_tz = UTC if not is_valid_timezone(default_tz): raise ValueError(f"Unrecognized timezone: {default_tz}") if is_valid_datetime(obj): return obj if formats is None: formats = DEFAULT_PARSE_DATETIME_FORMATS elif not isinstance(formats, (list, tuple)): formats = [formats] dt = _parse_datetime_formats(obj, formats) if dt.tzinfo is None and default_tz is not None: dt = dt.replace(tzinfo=get_timezone(default_tz)) if not has_valid_timezone(dt): # pragma: no cover raise ParseError("Timezone offset must be strictly between -24/+24 " "hours") return dt def _parse_datetime_formats(obj, formats): """Parse `obj` as datetime using list of `formats`.""" dt = None errors = {} for format in formats: try: dt = _parse_datetime_format(obj, format) except Exception as exc: errors[format] = str(exc) dt = None else: break if dt is None: err = ", ".join(f'"{format}" ({errors[format]})' for format in formats) raise ParseError(f'Value "{obj}" does not match any format in [{err}]') return dt def _parse_datetime_format(obj, format): """Parse `obj` as datetime using `format`.""" if format.upper() == ISO8601: return iso8601.parse_date(obj, default_timezone=None) elif format.lower() == TIMESTAMP: return datetime.fromtimestamp(obj, UTC) else: if "%" not in format: format = _date_pattern_to_directive(format) return datetime.strptime(obj, format) def format_datetime(dt, format=None, tz=None, locale=LC_TIME): """ Return string formatted datetime, `dt`, using format directives or pattern in `format`. If timezone, `tz`, is supplied, the datetime will be shifted to that timezone before being formatted. Args: dt (datetime): A datetime instance. format (str, optional): Datetime format string. Defaults to ``None`` which uses ISO-8601 format. tz (None|str|tzinfo, optional): Timezone to shift `dt` to before formatting. locale (str|Locale, optional): A ``Locale`` object or locale identifier. Defaults to system default. Returns: str """ if not isinstance(dt, datetime): raise TypeError( f"zulu.parser.format()'s first argument must be a datetime, not {type(dt).__name__}" ) # pragma: no cover if format is not None and not isinstance(format, str): raise TypeError( f"zulu.parser.format()'s second argument must be a string or None," f" not {type(format).__name__}" ) # pragma: no cover if not is_valid_timezone(tz): # pragma: no cover raise ValueError(f"Unrecognized timezone: {tz}") if format is None: format = ISO8601 if tz is not None: dt = dt.astimezone(tz) if format == ISO8601: return dt.isoformat() elif "%" in format: return dt.strftime(format) else: # Deviate from TR35 and treat "Y" as "y" when formatting. TR35 defines "Y" as 'Year in # "Week of Year" based calendars in which the year transition occurs on a week boundary'. # This has caused lots of confusion to users and is honestly not worth sticking with the # standard to accommodate. # Users should instead use %G, %V and a weekday directive (%A, %a, %w, or %u). format = format.replace("Y", "y") return _format_datetime(dt, format, locale=locale) def _date_pattern_to_directive(format): """Convert date pattern format to strptime/strftime directives.""" return "".join( DATE_PATTERN_TO_DIRECTIVE.get(token, token) for token in _tokenize_date_pattern(format) ) def _tokenize_date_pattern(format): """ Return list of date pattern tokens. This groups tokens by repeating characters so that each set of repeating characters is a list item (e.g. ``'YY-MM-dd'`` becomes ``['YY', '-', 'MM', '-', 'dd']``). """ return ["".join(group) for key, group in groupby(format)] def parse_timedelta(obj): """ Attempt to parse `obj` as a ``timedelta`` from a string formatted duration. Args: obj (str|number|timedelta): Object to parse. Returns: timedelta Raises: TypeError: When `obj` is not a string or timedelta. ParseError: When `obj` can't be parsed as a timedelta. """ if isinstance(obj, timedelta): return obj is_string = isinstance(obj, str) is_number = isinstance(obj, NUMBER_TYPES) if not is_string and not is_number: raise TypeError(f"Expected string or number type, not {type(obj).__name__}") if is_string: seconds = pytimeparse.parse(obj) if seconds is None: raise ParseError(f'Value "{obj}" is not a recognized duration format') else: seconds = obj return timedelta(seconds=seconds) def format_timedelta( delta, format="long", granularity="second", threshold=0.85, add_direction=False, locale=LC_TIME, ): """ Return timedelta as a formatted string. Args: format (str, optional): Can be one of "long", "short", or "narrow". Defaults to `"long"`. granularity (str, optional): The smallest unit that should be displayed. The value can be one of "year", "month", "week", "day", "hour", "minute" or "second". Defaults to `"second"`. threshold (float, optional): Factor that determines at which point the presentation switches to the next higher unit. Defaults to `0.85`. add_direction (bool, optional): If ``True`` the return value will include directional information (e.g. `'1 hour ago'`, `'in 1 hour'`). Defaults to ``False``. locale (str|Locale, optional): A ``Locale`` object or locale identifier. Defaults to system default. Returns: str """ if granularity not in TIMEDELTA_GRANULARITIES: units = ", ".join(f'"{unit}"' for unit in TIMEDELTA_GRANULARITIES) raise ValueError(f'Time delta granularity must be one of {units}, not "{granularity}"') if format not in TIMEDELTA_FORMATS: formats = ", ".join(f'"{format}"' for format in TIMEDELTA_FORMATS) raise ValueError(f'Time delta format must be one of {formats}, not "{format}"') return _format_timedelta( delta, granularity=granularity, threshold=threshold, add_direction=add_direction, format=format, locale=locale, ) def get_timezone(tz): """ Coerce `tz` into a `tzinfo` compatible object. If ``tz == 'local'``, then the system's local timezone will be used. If `tz` is a string other than ``'local'``, it will be passed to ``dateutil.tz.gettz(tz)``. Otherwise, `tz` will be returned as-is. """ if tz is None: tz = UTC elif tz == "local": tz = tzlocal() elif isinstance(tz, str): tz_string = tz tz = gettz(tz) if tz is None: raise ValueError(f"Unrecognized timezone string: {tz_string}") return tz def get_timestamp(dt): """Return timestamp for datetime, `dt`.""" return (dt - EPOCH).total_seconds() def is_valid_datetime(obj): """ Return whether `obj` is an instance of ``datetime`` or contains date and time attributes. Returns: bool """ if isinstance(obj, datetime): return True else: attrs = ( "year", "month", "day", "hour", "minute", "second", "microsecond", "tzinfo", ) return all(hasattr(obj, attr) for attr in attrs) def is_valid_timezone(tz): """ Return whether `tz` is a valid timezone. Returns: bool """ try: get_timezone(tz) except Exception: return False else: return True def has_valid_timezone(dt): """ Return whether `dt` has a valid timezone with a UTC offset strictly between -24/+24 hours. Returns: bool """ try: dt.astimezone(UTC) except Exception: # pragma: no cover return False else: return True