MoFin/venv/lib/python3.12/site-packages/exchange_calendars/utils/pandas_utils.py

import datetime
from zoneinfo import ZoneInfo

import numpy as np
import pandas as pd

from exchange_calendars.calendar_helpers import UTC


def days_at_time(
    dates: pd.DatetimeIndex,
    time: datetime.time | None,
    tz: ZoneInfo,
    day_offset: int,
) -> pd.DatetimeIndex:
    """Return UTC DatetimeIndex of given dates at a given time.

    Parameters
    ----------
    dates
        Dates or date (timezone naive with no time component).

    time
        The time to apply as an offset to each day in `dates`.

    tz
        The timezone in which to interpret `time`.

    day_offset
        Number of days by which to offset each date in `dates`.

    Returns
    -------
    pd.DatetimeIndex
        DatetimeIndex comprising Timestamp evaluted from `dates` and `time`
        with `dates` offset by `day_offset` and `time` interpreted as having
        timezone `tz`. DatetimeIndex has UTC timezone.

    Examples
    --------
    In the example below, the times switch from 13:45 to 12:45 UTC because
    March 13th is the daylight savings transition for America/New_York. All
    the times are still 8:45 when interpreted in America/New_York.

    >>> import pandas as pd; import datetime; import pprint
    >>> dts = pd.date_range('2016-03-12', '2016-03-14')
    >>> dts_845 = days_at_time(dts, datetime.time(8, 45), 'America/New_York', 0)
    >>> pprint.pprint([str(dt) for dt in dts_845])
    ['2016-03-12 13:45:00+00:00',
     '2016-03-13 12:45:00+00:00',
     '2016-03-14 12:45:00+00:00']
    """
    if time is None:
        return pd.DatetimeIndex([None for _ in dates]).tz_localize(UTC)

    if len(dates) == 0:
        return dates.tz_localize(UTC)

    # Offset days without tz to avoid timezone issues.
    delta = pd.Timedelta(
        days=day_offset,
        hours=time.hour,
        minutes=time.minute,
        seconds=time.second,
    )
    return (dates + delta).tz_localize(tz).tz_convert(UTC)


def vectorized_sunday_to_monday(dtix):
    """A vectorized implementation of
    :func:`pandas.tseries.holiday.sunday_to_monday`.

    Parameters
    ----------
    dtix : pd.DatetimeIndex
        The index to shift sundays to mondays.

    Returns
    -------
    sundays_as_mondays : pd.DatetimeIndex
        ``dtix`` with all sundays moved to the next monday.
    """
    values = dtix.values.copy()
    values[dtix.weekday == 6] += np.timedelta64(1, "D")
    return pd.DatetimeIndex(values)


def longest_run(ser: pd.Series) -> pd.Index:
    """Get the longest run of consecutive True values in a Series.

    Function can be used to find the longest run of values that meet a
    condition.

    Parameters
    ----------
    ser
        pd.Series of bool dtype.
            Index should reflect values against which a condition was
                assessed.
            Values should reflect whether corresponding index value
                met the condition.

    Return
    ------
    pd.Index
        Slice of `ser` index that corresponds with the longest run of
            consecutive True values.

    Examples
    --------
    >>> arr = np.arange(0, 88)
    >>> ser = pd.Series(arr, index=arr)
    >>> bv = (
    ...     ((ser >= 10) & (ser < 16))
    ...     | ((ser >= 30) & (ser <= 40))
    ...     | ((ser >= 55) & (ser < 61))
    ... )
    >>> longest_run(bv)
    Index([30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], dtype='int64')
    >>> pd.testing.assert_index_equal(longest_run(bv), ser.index[30:41])
    """
    # group Trues by only adding to sum when value False.
    trues_grouped = (~ser).cumsum()[ser]  # and only take True Values
    group_sizes = trues_grouped.value_counts()  # count each run
    max_run_size = group_sizes.max()
    max_run_group_id = group_sizes[group_sizes == max_run_size].index[0]
    return trues_grouped[trues_grouped == max_run_group_id].index


def indexes_union(indexes: list[pd.Index]) -> pd.Index:
    """Return union of multiple pd.Index objects.

    Parameters
    ----------
    indexes
        Index objects to be joined. All indexes must be of same dtype.

    Examples
    --------
    >>> index1 = pd.date_range('2021-05-01 12:20', periods=2, freq='1h')
    >>> index2 = pd.date_range('2021-05-02 17:10', periods=2, freq='22min')
    >>> index3 = pd.date_range('2021-05-03', periods=2, freq='1D')
    >>> indexes_union([index1, index2, index3])
    DatetimeIndex(['2021-05-01 12:20:00', '2021-05-01 13:20:00',
                   '2021-05-02 17:10:00', '2021-05-02 17:32:00',
                   '2021-05-03 00:00:00', '2021-05-04 00:00:00'],
                  dtype='datetime64[us]', freq=None)
    """
    index = indexes[0]
    for indx in indexes[1:]:
        index = index.union(indx)
    return index