Unable to use request_cache to cache the yfinance response
Describe bug
I want to cache the yfinance response so I am using request_cache and overriding the session, But I am getting Too Many Requests. Rate limited. Try after a while.
requestCacheSession = requests_cache.CachedSession('yfinance.cache', expire_after=3600*2, impersonate="chrome")
stock = yf.Ticker(symbolNew, session=requestCacheSession)
Can anyone help
yfinance version - 0.2.59 python version - 3.10
Simple code that reproduces your problem
requestCacheSession = requests_cache.CachedSession('yfinance.cache', expire_after=3600*2, impersonate="chrome")
stock = yf.Ticker(symbolNew, session=requestCacheSession)
Debug log from yf.enable_debug_mode()
Too Many Requests. Rate limited. Try after a while.
Bad data proof
No response
yfinance version
0.2.59
Python version
No response
Operating system
No response
Because then you are overriding using curl_cffi session
requests_cache officially does not work with curl_cffi https://github.com/JWCook/requests-ratelimiter/issues/112
Will have to remove from docs, and maybe disable session argument
Hi, I solved it specifying manually the headers, here my pices of code:
class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
})
...
def _get_internal_cache(self, max_requests: int = 2, in_seconds: int = 10):
return self.CachedLimiterSession(
limiter=Limiter(RequestRate(max_requests, Duration.SECOND * in_seconds)),
bucket_class=MemoryQueueBucket,
backend=FileCache(os.path.join(self.cache_path, "_yfinance_file_cache")),
)
...
ticker = yf.Ticker(symbol, session=self._get_internal_cache())
@cybtow Did you really solve it? _get_internal_cache isn't returning anything.
Yes, I solved it, those are pieces of my code it's not completed, I tried to adapted here, let me copy and paste more, these pieces os code belongs an extensive class which I cannot paste here totally:
class MarketYahooFinance(Market, IMarket):
class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
})
def __init__(self, timezone: str = 'Europe/Madrid', isCache=True):
super().__init__(timezone, isCache)
self.yfinance_session = self._get_internal_cache()
def _get_internal_cache(self, max_requests: int = 2, in_seconds: int = 10):
self.CachedLimiterSession(
limiter=Limiter(RequestRate(max_requests, Duration.SECOND * in_seconds)),
bucket_class=MemoryQueueBucket,
backend=FileCache(os.path.join(self.cache_path, "_yfinance_file_cache")),
)
....
def download_instrument_info(self, symbol: str) -> tuple:
try:
ticker = yf.Ticker(symbol, session=self.yfinance_session)
....
Based on https://ranaroussi.github.io/yfinance/advanced/caching.html, your code should be something like this (note, your code is using self.requestSession rather than requestCacheSession ):
requestCacheSession = requests_cache.CachedSession('yfinance.cache', expire_after=3600*2, impersonate="chrome")
requestCacheSession.headers['User-agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
stock = yf.Ticker(symbolNew, session=requestCacheSession)
or try this:
requestCacheSession = requests_cache.CachedSession('yfinance.cache', expire_after=3600*2, impersonate="chrome")
self.headers.update({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'})
stock = yf.Ticker(symbolNew, session=requestCacheSession)
I just ran my script right now and it's not working again. Yesterday, it was doing that. I'll research a bit more if I find a solution.
Research what session is actually being used by yfinance, I think you will be surprised
I've tried some options, only two worked, one without specifying a session, and other (the last one) specifying a custom session.
I think it'll be nice that yfinance could be updated to work with others options too.
Here my basic code, just tested using the v0.2.59 of yfinance
import yfinance as yf
try:
stock = yf.Ticker("AAPL")
print(stock.history(period="5d")) # it works
except Exception as e:
print(e)
# #############################################################
import yfinance as yf
from requests import Session
from requests_cache import CacheMixin, FileCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
from pyrate_limiter import Duration, RequestRate, Limiter
class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
})
self.cache = FileCache("_yfinance_file_cache")
def request(self, *args, timeout=15, **kwargs):
return super().request(*args, timeout=timeout, **kwargs)
session = CachedLimiterSession(
limiter=Limiter(RequestRate(2, Duration.SECOND * 5)),
bucket_class=MemoryQueueBucket,
backend=FileCache("_yfinance_file_cache"),
)
try:
stock = yf.Ticker("AAPL", session=session)
print(stock.history(period="5d")) # it DOESN'T work
except Exception as e:
print(e)
exit(0)
# ###############################################################
import yfinance as yf
from requests import Session
from requests_cache import CacheMixin, FileCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
from pyrate_limiter import Duration, RequestRate, Limiter
class CachedLimiterSession2(CacheMixin, LimiterMixin, Session):
pass
session2 = CachedLimiterSession2(
limiter=Limiter(RequestRate(2, Duration.SECOND * 5)),
bucket_class=MemoryQueueBucket,
backend=FileCache("_yfinance_file_cache"),
)
try:
stock = yf.Ticker("AAPL", session=session2)
print(stock.history(period="5d")) # it DOESN'T work
except Exception as e:
print(e)
# ###############################################################
import yfinance as yf
import requests_cache
session3 = requests_cache.CachedSession('yfinance.cache', expire_after=3600*2, impersonate="chrome")
try:
stock = yf.Ticker("AAPL", session=session3)
print(stock.history(period="5d")) # it DOESN'T work
except Exception as e:
print(e)
# ###############################################################
import yfinance as yf
from curl_cffi import requests
from requests_cache import install_cache
from requests_ratelimiter import LimiterSession, MemoryQueueBucket
from pyrate_limiter import Duration, RequestRate, Limiter
session = requests.Session(impersonate="chrome")
install_cache(
cache_name="_yfinance_file_cache",
session=session
)
limiter = Limiter(RequestRate(2, Duration.SECOND * 5))
limited_session = LimiterSession(
session=session,
limiter=limiter,
bucket_class=MemoryQueueBucket,
)
def limited_request(*args, timeout=15, **kwargs):
return session.request(*args, timeout=timeout, **kwargs)
limited_session.request = limited_request
try:
stock = yf.Ticker("AAPL", session=limited_session) # it works !!
print(stock.history(period="5d"))
except Exception as e:
print(e)
# ###################################################################
@ValueRaider curious to know that session ;)
curious to know that session ;)
I can't tell you. You edit the source code to print.
stock = yf.Ticker("AAPL", session=limited_session) # it works !!
Does it actually enforce rate-limiting? I never got it working, and officially it will not work.
You're right, rate-limiting is not being taken into account at all, I cehcked with a list of tickers and all of them were taken quickly without restrictions. Checking on the Internet, I remember to see pieces of code where that worked in the past. Do you know if currently there is some way or customization to make it operative again?
And could you tell me the file#line where I have to print it when you have a while?
I'm new using this library and almost python, any information here about if you know if that will be updated/fixed is appreciated.
Thank you in advance.
@cybtow I tried your code and I am getting the following error
INFO:requests_ratelimiter.requests_ratelimiter:Rate limit exceeded for https://query1.finance.yahoo.com/v1/test/getcrumb; filling limiter bucket
filling limiter bucket
[*********************100%***********************] 2 of 3 completed
flask-app-1 | ERROR:yfinance:
flask-app-1 | 3 Failed downloads:
flask-app-1 | ERROR: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')
Because then you are overriding using curl_cffi session
requests_cache officially does not work with curl_cffi JWCook/requests-ratelimiter#112
Will have to remove from docs, and maybe disable session argument
Please don't remove the session argument, I would say it's a pretty important feature.
You can mount your Requests Session object to use CFFI with curl-adapter
https://pypi.org/project/curl-adapter/1.0.0.post3/
This works, and you can pass the session object as your normal Requests Session.
import requests
from curl_adapter import CurlCffiAdapter
session = requests.Session()
session.mount("http://", CurlCffiAdapter())
session.mount("https://", CurlCffiAdapter())
Can we add caching logic in yfinance library and extend it via a functional parameter if users would like to cache the data for faster response time
@cybtow, from the test, using the v0.2.59 of finance, which piece of code worked for you?
Hi, I put several tests, only these two worked for me, one without session and other with custom session. Take into account rate-limiting is not working.
import yfinance as yf
try:
stock = yf.Ticker("AAPL")
print(stock.history(period="5d")) # it works
except Exception as e:
print(e)
# #############################################################
import yfinance as yf
from curl_cffi import requests
from requests_cache import install_cache
from requests_ratelimiter import LimiterSession, MemoryQueueBucket
from pyrate_limiter import Duration, RequestRate, Limiter
session = requests.Session(impersonate="chrome")
install_cache(
cache_name="_yfinance_file_cache",
session=session
)
limiter = Limiter(RequestRate(2, Duration.SECOND * 5))
limited_session = LimiterSession(
session=session,
limiter=limiter,
bucket_class=MemoryQueueBucket,
)
def limited_request(*args, timeout=15, **kwargs):
return session.request(*args, timeout=timeout, **kwargs)
limited_session.request = limited_request
try:
stock = yf.Ticker("AAPL", session=limited_session) # it works !!
print(stock.history(period="5d"))
except Exception as e:
print(e)
# ###################################################################
@deeleeramone I just tried curl_adapter, obviously had to disable some things inside data.py. If it works that's great, can restore requests support, but I can't get it working:
EDIT: I got it working with 0.2.57. I'll restore requests support asap.
But how to combine with requests_cache or requests_ratelimiter?