qlib
qlib copied to clipboard
Error during sync China stock data from yahoo.
🐛 Bug Description
Error during sync China stock data from yahoo.
To Reproduce
Steps to reproduce the behavior:
- run "python scripts/data_collector/yahoo/collector.py update_data_to_bin --qlib_data_1d_dir <data_dir> --interval 1d --region CN
- The following error occurred:
| INFO | collector:get_instrument_list:207 - get HS stock symbols......
Traceback (most recent call last):
File "/home/cai/Desktop/myprogram/git/qlib/scripts/data_collector/yahoo/collector.py", line 1021, in
fire.Fire(Run) File "/home/cai/miniconda3/lib/python3.12/site-packages/fire/core.py", line 135, in Fire component_trace = _Fire(component, args, parsed_flag_args, context, name) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/cai/miniconda3/lib/python3.12/site-packages/fire/core.py", line 468, in _Fire component, remaining_args = _CallAndUpdateTrace( ^^^^^^^^^^^^^^^^^^^^ File "/home/cai/miniconda3/lib/python3.12/site-packages/fire/core.py", line 684, in _CallAndUpdateTrace component = fn(*varargs, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^ File "/home/cai/Desktop/myprogram/git/qlib/scripts/data_collector/yahoo/collector.py", line 988, in update_data_to_bin self.download_data(delay=delay, start=trading_date, end=end_date, check_data_length=check_data_length) File "/home/cai/Desktop/myprogram/git/qlib/scripts/data_collector/yahoo/collector.py", line 802, in download_data super(Run, self).download_data(max_collector_count, delay, start, end, check_data_length, limit_nums) File "/home/cai/Desktop/myprogram/git/qlib/scripts/data_collector/base.py", line 402, in download_data _class( File "/home/cai/Desktop/myprogram/git/qlib/scripts/data_collector/yahoo/collector.py", line 86, in init super(YahooCollector, self).init( File "/home/cai/Desktop/myprogram/git/qlib/scripts/data_collector/base.py", line 80, in init self.instrument_list = sorted(set(self.get_instrument_list())) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/cai/Desktop/myprogram/git/qlib/scripts/data_collector/yahoo/collector.py", line 208, in get_instrument_list symbols = get_hs_stock_symbols() ^^^^^^^^^^^^^^^^^^^^^^ File "/home/cai/Desktop/myprogram/git/qlib/scripts/data_collector/utils.py", line 235, in get_hs_stock_symbols symbols |= _get_symbol() ^^^^^^^^^^^^^ File "/home/cai/Desktop/myprogram/git/qlib/scripts/data_collector/utils.py", line 219, in _get_symbol raise ValueError("The complete list of stocks is not available.") ValueError: The complete list of stocks is not available.
Expected Behavior
Screenshot
Environment
Note: User could run cd scripts && python collect_info.py all under project directory to get system information
and paste them here directly.
- Qlib version: 0.9.6.99
- Python version: 12
- OS (
Windows,Linux,MacOS): Ubuntu Linux
https://github.com/microsoft/qlib/blob/e7a1b5ea1ff4412792eeb606a639e0dde422053a/scripts/data_collector/utils.py#L205
This API only returns 100 items now.Here is a fix:
def _get_symbol():
"""
Get the stock pool from a web page and process it into the format required by yahooquery.
Format of data retrieved from the web page: 600519, 000001
The data format required by yahooquery: 600519.ss, 000001.sz
Returns
-------
set: Returns the set of symbol codes.
Examples:
-------
{600000.ss, 600001.ss, 600002.ss, 600003.ss, ...}
"""
base_url = "http://99.push2.eastmoney.com/api/qt/clist/get"
params = {
"pn": 1, # Page number
"pz": 100, # Page size
"po": 1,
"np": 1,
"fs": "m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048",
"fields": "f12",
}
_symbols = []
while True:
# Print the full URL before making the request
full_url = requests.Request('GET', base_url, params=params).prepare().url
print(f"Requesting URL: {full_url}")
retry_count = 2 # 设置重试次数为 2 次(包括首次请求)
for attempt in range(retry_count):
try:
resp = requests.get(base_url, params=params, timeout=2) # 设置超时时间为 2 秒
resp.raise_for_status()
break # 请求成功则退出重试循环
except (requests.exceptions.HTTPError, requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
if attempt == retry_count - 1: # 如果达到最大重试次数仍失败,则抛出异常
raise requests.exceptions.HTTPError(
f"Request to {base_url} failed with status code {resp.status_code}"
) from e
time.sleep(1) # 重试前等待 1 秒
try:
data = resp.json()["data"]["diff"]
_symbols.extend([_v["f12"] for _v in data])
except Exception as e:
logger.warning("An error occurred while extracting data from the response.")
raise
# If the number of returned items is less than the page size, we have reached the end
if len(data) < params["pz"]:
break
# Move to the next page
params["pn"] += 1
time.sleep(0.05) # Sleep for 50ms after each request
if len(_symbols) < 3900:
raise ValueError("The complete list of stocks is not available.")
# Add suffix after the stock code to conform to yahooquery standard, otherwise the data will not be fetched.
_symbols = [
_symbol + ".ss" if _symbol.startswith("6") else _symbol + ".sz" if _symbol.startswith(("0", "3")) else None
for _symbol in _symbols
]
_symbols = [_symbol for _symbol in _symbols if _symbol is not None]
return set(_symbols)
Hi, @awfssv The static stock pool is currently unavailable, you can try pulling this PR and retrying.