# -*- coding: utf-8 -*-
# Author: Jev Kuznetsov <jev.kuznetsov@gmail.com>
# License: BSD
"""
.. ipython:: python
:suppress:
import pandas as pd
np.set_printoptions(precision=2, suppress=True)
pd.options.display.max_rows= 6
Yahoo Finance
====================
This module enables easy access to data provided by Yahoo Finance.
.. note::
This service may stop without notice, Yahoo does not seem to like people accessing their
data automatically. Breaking the service already happened in early 2017. This module includes
a workaround that works ... for now.
Getting historic data
-----------------------
The module is usually imported as follows:
.. ipython:: python
from tradingWithPython import yahooFinance as yf
Singe symbol
---------------------
Then, to get raw yahoo finance data for a symbol use :func:`~lib.yahooFinance.getSymbolData`
.. ipython:: python
df = yf.getSymbolData("SPY")
df.head()
We can also normalize OHLC with the *adj_close* data column. After normalization,
the *close* column will be equal to *adj_close* , so the latter is omitted from the result.
.. ipython:: python
df = yf.getSymbolData("SPY",adjust=True)
df.head()
Multiple symbols
-------------------------
:func:`~lib.yahooFinance.getHistoricData` will accept one ore more symbols and download them
while displaying a progress bar.
.. ipython:: python
symbols = ['XLE','USO','SPY']
data = yf.getHistoricData(symbols)
The data will be a multi-index DataFrame:
.. ipython:: python
data.columns
To select a symbol, simply use
.. ipython:: python
data['SPY']
Or with cross-section (see `Advanced indexing <https://pandas.pydata.org/pandas-docs/stable/advanced.html>`_)
.. ipython:: python
data.xs('close',level=1,axis=1)
Functions
==========
.. autofunction:: tradingWithPython.lib.yahooFinance.getSymbolData
.. autofunction:: tradingWithPython.lib.yahooFinance.getHistoricData
"""
import urllib.request
import numpy as np
import requests # interaction with the web
import os # file system operations
import yaml # human-friendly data format
import re # regular expressions
import pandas as pd # pandas... the best time series library out there
import datetime as dt # date and time functions
import io
from tradingWithPython.lib.extra import ProgressBar
dateTimeFormat = "%Y%m%d %H:%M:%S"
def parseStr(s):
''' convert string to a float or string '''
f = s.strip()
if f[0] == '"':
return f.strip('"')
elif f=='N/A':
return np.nan
else:
try: # try float conversion
prefixes = {'M':1e6, 'B': 1e9}
prefix = f[-1]
if prefix in prefixes: # do we have a Billion/Million character?
return float(f[:-1])*prefixes[prefix]
else: # no, convert to float directly
return float(f)
except ValueError: # failed, return original string
return s
#def getQuote(symbols): has been disabled by Yahoo :-(
[docs]def getHistoricData(symbols, **options):
'''
get data from Yahoo finance and return pandas dataframe
Will get OHLCV data frame if sinle symbol is provided.
If many symbols are provided, it will return a wide panel
Parameters
------------
symbols : str or list
Yahoo finanance symbol or a list of symbols
sDate : tuple (optional)
start date (y,m,d)
adjust : bool
T/[F] adjust data based on adj_close
Returns
---------
DataFrame, multi-index
'''
assert isinstance(symbols,(list,str)), 'Input must be a string symbol or a list of symbols'
if isinstance(symbols,str):
return getSymbolData(symbols,**options)
else:
data = {}
print('Downloading data:')
p = ProgressBar(len(symbols))
for idx,symbol in enumerate(symbols):
p.animate(idx+1)
data[symbol] = getSymbolData(symbol,verbose=False,**options)
return pd.concat(data,axis=1, names=['symbol','ohlcv'])
[docs]def getSymbolData(symbol, sDate=(2000,1,1), adjust=False, verbose=True, dumpDest=None):
"""
get data from Yahoo finance and return pandas dataframe
Parameters
-----------
symbol : str
Yahoo finanance symbol
sDate : tuple , default (2000,1,1)
start date (y,m,d)
adjust : bool , default False
use adjusted close values to correct OHLC. adj_close will be ommited
verbose : bool , default True
print output
dumpDest : str, default None
dump raw data for debugging
Returns
---------
DataFrame
"""
period1 = int(dt.datetime(*sDate).timestamp()) # convert to seconds since epoch
period2 = int(dt.datetime.now().timestamp())
params = (symbol, period1, period2, _token['crumb'])
url = "https://query1.finance.yahoo.com/v7/finance/download/{0}?period1={1}&period2={2}&interval=1d&events=history&crumb={3}".format(*params)
data = requests.get(url, cookies={'B':_token['cookie']})
data.raise_for_status() # raise error in case of bad request
if dumpDest is not None:
fName = symbol+'_dump.csv'
with open(os.path.join(dumpDest, fName),'w') as fid:
fid.write(data.text)
buf = io.StringIO(data.text) # create a buffer
df = pd.read_csv(buf,index_col=0,parse_dates=True, na_values=['null']) # convert to pandas DataFrame
# rename columns
newNames = [c.lower().replace(' ','_') for c in df.columns]
renames = dict(zip(df.columns,newNames))
df = df.rename(columns=renames)
if verbose:
print(('Got %i days of data' % len(df)))
if adjust:
return _adjust(df,removeOrig=True).round(2)
else:
return df.round(2)
def _adjust(df, removeOrig=False):
'''
_adjustust hist data based on adj_close field
'''
c = df['close']/df['adj_close']
df['adj_open'] = df['open']/c
df['adj_high'] = df['high']/c
df['adj_low'] = df['low']/c
if removeOrig:
df=df.drop(['open','close','high','low'],axis=1)
renames = dict(list(zip(['adj_open','adj_close','adj_high','adj_low'],['open','close','high','low'])))
df=df.rename(columns=renames)
return df
def loadToken():
"""
get cookie and crumb from APPL page or disk.
force = overwrite disk data
"""
refreshDays = 30 # refreh cookie every x days
# set destinatioin file
dataDir = os.path.expanduser('~')+'/twpData'
dataFile = dataFile = os.path.join(dataDir,'yahoo_cookie.yml')
try : # load file from disk
data = yaml.load(open(dataFile,'r'))
age = (dt.datetime.now()- dt.datetime.strptime( data['timestamp'], dateTimeFormat) ).days
assert age < refreshDays, 'cookie too old'
except (AssertionError,FileNotFoundError): # file not found
if not os.path.exists(dataDir):
os.mkdir(dataDir)
data = getToken(dataFile)
return data
def getToken(fName = None):
""" get cookie and crumb from yahoo """
url = 'https://uk.finance.yahoo.com/quote/AAPL/history' # url for a ticker symbol, with a download link
r = requests.get(url) # download page
txt = r.text # extract html
cookie = r.cookies['B'] # the cooke we're looking for is named 'B'
pattern = re.compile('.*"CrumbStore":\{"crumb":"(?P<crumb>[^"]+)"\}')
for line in txt.splitlines():
m = pattern.match(line)
if m is not None:
crumb = m.groupdict()['crumb']
assert r.status_code == 200 # check for succesful download
# save to disk
data = {'crumb': crumb, 'cookie':cookie, 'timestamp':dt.datetime.now().strftime(dateTimeFormat)}
if fName is not None: # save to file
with open(fName,'w') as fid:
yaml.dump(data,fid)
return data
#-------------- get token
_token = loadToken() # get token from disk or yahoo
#--------------tests------------
# to be executed with pytest
def test_getToken():
''' download token '''
print('getting token')
getToken()
def test_initToken():
''' remove and get token '''
dataDir = os.path.expanduser('~')+'/twpData'
dataFile = dataFile = os.path.join(dataDir,'yahoo_cookie.yml')
if os.path.exists(dataFile):
os.remove(dataFile)
loadToken()
assert os.path.exists(dataFile)
def test_download():
vxx = getSymbolData('SPY')
assert len(vxx) > 4000