MoFin/venv/lib/python3.12/site-packages/yfinance/multi.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# yfinance - market data downloader
# https://github.com/ranaroussi/yfinance
#
# Copyright 2017-2019 Ran Aroussi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import print_function

import logging
import threading
import time as _time
import traceback
from typing import Union

import multitasking as _multitasking
import pandas as _pd
import numpy as _np
from ._http import new_session

from . import Ticker, utils
from .data import YfData
from .config import YfConfig
from .const import period_default


class _DownloadCtx:
    """Per-call scratch state for download(). Concurrent calls each get
    their own instance, so no shared mutation between threads."""
    __slots__ = ('dfs', 'errors', 'tracebacks', 'isins', 'progress_bar', 'lock')

    def __init__(self):
        self.dfs = {}
        self.errors = {}
        self.tracebacks = {}
        self.isins = {}
        self.progress_bar = None
        self.lock = threading.Lock()

@utils.log_indent_decorator
def download(tickers, start=None, end=None, actions=False, threads=True,
             ignore_tz=None, group_by='column', auto_adjust=True, back_adjust=False,
             repair=False, keepna=False, progress=True, period=period_default, interval="1d",
             prepost=False, rounding=False, timeout=10, session=None,
             multi_level_index=True) -> Union[_pd.DataFrame, None]:
    """
    Download yahoo tickers
    :Parameters:
        tickers : str, list
            List of tickers to download
        period : str
            Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
            Default: '1mo' if start & end None
            Either Use period parameter or use start and end
        interval : str
            Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
            Intraday data cannot extend last 60 days
        start: str
            Download start date string (YYYY-MM-DD) or _datetime, inclusive.
            Default is 99 years ago
            E.g. for start="2020-01-01", the first data point will be on "2020-01-01"
        end: str
            Download end date string (YYYY-MM-DD) or _datetime, exclusive.
            Default is now
            E.g. for end="2023-01-01", the last data point will be on "2022-12-31"
        group_by : str
            Group by 'ticker' or 'column' (default)
        prepost : bool
            Include Pre and Post market data in results?
            Default is False
        auto_adjust: bool
            Adjust all OHLC automatically? Default is True
        repair: bool
            Detect currency unit 100x mixups and attempt repair
            Default is False
        keepna: bool
            Keep NaN rows returned by Yahoo?
            Default is False
        actions: bool
            Download dividend + stock splits data. Default is False
        threads: bool / int
            How many threads to use for mass downloading. Default is True
        ignore_tz: bool
            When combining from different timezones, ignore that part of datetime.
            Default depends on interval. Intraday = False. Day+ = True.
        rounding: bool
            Optional. Round values to 2 decimal places?
        timeout: None or float
            If not None stops waiting for a response after given number of
            seconds. (Can also be a fraction of a second e.g. 0.01)
        session: None or Session
            Optional. Pass your own session object to be used for all requests
        multi_level_index: bool
            Optional. Always return a MultiIndex DataFrame? Default is True
    """
    return _download_impl(
        _DownloadCtx(),
        tickers, start=start, end=end, actions=actions, threads=threads,
        ignore_tz=ignore_tz, group_by=group_by, auto_adjust=auto_adjust,
        back_adjust=back_adjust, repair=repair, keepna=keepna, progress=progress,
        period=period, interval=interval, prepost=prepost, rounding=rounding,
        timeout=timeout, session=session, multi_level_index=multi_level_index,
    )


def _download_impl(ctx, tickers, start=None, end=None, actions=False, threads=True,
                   ignore_tz=None, group_by='column', auto_adjust=True, back_adjust=False,
                   repair=False, keepna=False, progress=True, period=period_default, interval="1d",
                   prepost=False, rounding=False, timeout=10, session=None,
                   multi_level_index=True):
    logger = utils.get_yf_logger()
    session = session or new_session()

    YfData(session=session)

    if logger.isEnabledFor(logging.DEBUG):
        if threads:
            # multi-threaded log messages would interleave; serialize.
            logger.debug('Disabling multithreading because DEBUG logging enabled')
            threads = False
        if progress:
            progress = False

    if ignore_tz is None:
        ignore_tz = interval[-1] not in ('m', 'h')

    tickers = tickers if isinstance(
        tickers, (list, set, tuple)) else tickers.replace(',', ' ').split()

    _tickers_ = []
    for ticker in tickers:
        if utils.is_isin(ticker):
            isin = ticker
            ticker = utils.get_ticker_by_isin(ticker)
            ctx.isins[ticker] = isin
        _tickers_.append(ticker)

    tickers = list(set([t.upper() for t in _tickers_]))

    if progress:
        ctx.progress_bar = utils.ProgressBar(len(tickers), 'completed')

    if threads:
        if threads is True:
            threads = min([len(tickers), _multitasking.cpu_count() * 2])
        _multitasking.set_max_threads(threads)
        for i, ticker in enumerate(tickers):
            _download_one_threaded(ctx, ticker, period=period, interval=interval,
                                   start=start, end=end, prepost=prepost,
                                   actions=actions, auto_adjust=auto_adjust,
                                   back_adjust=back_adjust, repair=repair, keepna=keepna,
                                   progress=(progress and i > 0),
                                   rounding=rounding, timeout=timeout)
        while True:
            with ctx.lock:
                if len(ctx.dfs) >= len(tickers):
                    break
            _time.sleep(0.01)
    else:
        for i, ticker in enumerate(tickers):
            _download_one(ctx, ticker, period=period, interval=interval,
                          start=start, end=end, prepost=prepost,
                          actions=actions, auto_adjust=auto_adjust,
                          back_adjust=back_adjust, repair=repair, keepna=keepna,
                          rounding=rounding, timeout=timeout)
            if progress:
                ctx.progress_bar.animate()

    if progress:
        ctx.progress_bar.completed()

    if ctx.errors:
        logger.error('\n%.f Failed download%s:' % (
            len(ctx.errors), 's' if len(ctx.errors) > 1 else ''))

        errors = {}
        for ticker, err in ctx.errors.items():
            err = err.replace(f'${ticker}: ', '')
            errors.setdefault(err, []).append(ticker)
        for err, syms in errors.items():
            logger.error(f'{syms}: ' + err)

        tbs = {}
        for ticker, tb in ctx.tracebacks.items():
            tb = tb.replace(f'${ticker}: ', '')
            tbs.setdefault(tb, []).append(ticker)
        for tb, syms in tbs.items():
            logger.debug(f'{syms}: ' + tb)

    if ignore_tz:
        for tkr, df in ctx.dfs.items():
            if df is not None and df.shape[0] > 0:
                df.index = df.index.tz_localize(None)
    ctx.dfs = reindex_dfs(ctx.dfs, ignore_tz)
    try:
        data = _pd.concat(ctx.dfs.values(), axis=1, sort=True,
                          keys=ctx.dfs.keys(), names=['Ticker', 'Price'])
    except Exception:
        data = _pd.concat(ctx.dfs.values(), axis=1, sort=True,
                          keys=ctx.dfs.keys(), names=['Ticker', 'Price'])
    data.rename(columns=ctx.isins, inplace=True)

    if group_by == 'column' and isinstance(data.columns, _pd.MultiIndex):
        data.columns = data.columns.swaplevel(0, 1)
        data.sort_index(level=0, axis=1, inplace=True)

    if not multi_level_index and len(tickers) == 1:
        data = data.droplevel(0 if group_by == 'ticker' else 1, axis=1).rename_axis(None, axis=1)

    return data

def reindex_dfs(dfs, ignore_tz):
    if ignore_tz:
        for tkr in dfs.keys():
            if (dfs[tkr] is not None) and (not dfs[tkr].empty):
                dfs[tkr].index = dfs[tkr].index.tz_localize(None)
    else:
        # Align each df to most common timezone.
        # Compare strings since np.unique can't handle tz objects
        tzs = [str(df.index.tz) for df in dfs.values() if df is not None and not df.empty]
        if tzs:
            # Find most common timezone
            unique_tzs, counts = _np.unique(tzs, return_counts=True)
            tz_mode = unique_tzs[counts.argmax()]
            for tkr in dfs.keys():
                if (dfs[tkr] is not None) and (not dfs[tkr].empty):
                    dfs[tkr].index = dfs[tkr].index.tz_convert(tz_mode)

    idx = None
    for df in dfs.values():
        if df is not None and not df.empty:
            idx = df.index if idx is None else idx.union(df.index)
    if idx is None:
        idx = _pd.DatetimeIndex([])
    for key, df in dfs.items():
        dfs[key] = df.reindex(idx)

    return dfs

@_multitasking.task
def _download_one_threaded(ctx, ticker, start=None, end=None,
                           auto_adjust=False, back_adjust=False, repair=False,
                           actions=False, progress=True, period=None,
                           interval="1d", prepost=False,
                           keepna=False, rounding=False, timeout=10):
    _download_one(ctx, ticker, start, end, auto_adjust, back_adjust, repair,
                  actions, period, interval, prepost, rounding,
                  keepna, timeout)
    if progress:
        ctx.progress_bar.animate()


def _download_one(ctx, ticker, start=None, end=None,
                  auto_adjust=False, back_adjust=False, repair=False,
                  actions=False, period=None, interval="1d",
                  prepost=False, rounding=False,
                  keepna=False, timeout=10):
    data = None
    sym = ticker.upper()

    backup = YfConfig.network.hide_exceptions
    YfConfig.network.hide_exceptions = False
    try:
        tkr = Ticker(ticker)
        data = tkr.history(
            period=period, interval=interval,
            start=start, end=end, prepost=prepost,
            actions=actions, auto_adjust=auto_adjust,
            back_adjust=back_adjust, repair=repair,
            rounding=rounding, keepna=keepna, timeout=timeout
        )
        with ctx.lock:
            ctx.dfs[sym] = data
            # PriceHistory records soft errors (e.g. delisted, missing tz)
            # without raising; surface them so download() can log them.
            ph = tkr._price_history
            if ph is not None and ph._last_error is not None:
                ctx.errors[sym] = ph._last_error
    except Exception as e:
        with ctx.lock:
            ctx.dfs[sym] = utils.empty_df()
            ctx.errors[sym] = repr(e)
            ctx.tracebacks[sym] = traceback.format_exc()

    YfConfig.network.hide_exceptions = backup

    return data