pandas-datareader icon indicating copy to clipboard operation
pandas-datareader copied to clipboard

requests.exceptions.SSLError: HTTPSConnectionPool(host='stats.oecd.org', port=443): Max retries exceeded with url: /SDMX-JSON/data/PRICES_CPI/all/all (Caused by SSLError(SSLError(1, '[SSL: UNSAFE_LEGACY_RENEGOTIATION_DISABLED] unsafe legacy renegotiation disabled (_ssl.c:997)')))

Open Ignacio-Ibarra opened this issue 2 years ago • 0 comments

Using your package...I've tried to run my example.py

import pandas_datareader.data as web
import datetime

df = web.DataReader('PRICES_CPI', 'oecd')

print(df[['Argentina']])

I get...

requests.exceptions.SSLError: HTTPSConnectionPool(host='stats.oecd.org', port=443): Max retries exceeded with url: /SDMX-JSON/data/PRICES_CPI/all/all (Caused by SSLError(SSLError(1, '[SSL: UNSAFE_LEGACY_RENEGOTIATION_DISABLED] unsafe legacy renegotiation disabled (_ssl.c:997)')))

I found that this issue could be solved with this

import requests
import urllib3
import ssl
import json


class CustomHttpAdapter (requests.adapters.HTTPAdapter):
    # "Transport adapter" that allows us to use custom ssl_context.

    def __init__(self, ssl_context=None, **kwargs):
        self.ssl_context = ssl_context
        super().__init__(**kwargs)

    def init_poolmanager(self, connections, maxsize, block=False):
        self.poolmanager = urllib3.poolmanager.PoolManager(
            num_pools=connections, maxsize=maxsize,
            block=block, ssl_context=self.ssl_context)

def get_legacy_session():
    ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
    ctx.options |= 0x4  # OP_LEGACY_SERVER_CONNECT
    session = requests.session()
    session.mount('https://', CustomHttpAdapter(ctx))
    return session


r = get_legacy_session().get('http://stats.oecd.org/SDMX-JSON/data/<dataset identifier>/<filter expression>/<agency name>[ ?<additional parameters>]')

status = r.status_code
print(status)

# 200

I try to change your code in you _utils.py like this:

import datetime as dt
from pandas import to_datetime
import requests
import urllib3
import ssl

class CustomHttpAdapter (requests.adapters.HTTPAdapter):
    # "Transport adapter" that allows us to use custom ssl_context.

    def __init__(self, ssl_context=None, **kwargs):
        self.ssl_context = ssl_context
        super().__init__(**kwargs)

    def init_poolmanager(self, connections, maxsize, block=False):
        self.poolmanager = urllib3.poolmanager.PoolManager(
            num_pools=connections, maxsize=maxsize,
            block=block, ssl_context=self.ssl_context)

def get_legacy_session():
    ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
    ctx.options |= 0x4  # OP_LEGACY_SERVER_CONNECT
    session = requests.session()
    session.mount('https://', CustomHttpAdapter(ctx))
    return session



from pandas_datareader.compat import is_number


class SymbolWarning(UserWarning):
    pass


class RemoteDataError(IOError):
    pass


def _sanitize_dates(start, end):
    """
    Return (timestamp_start, timestamp_end) tuple
    if start is None - default is 5 years before the current date
    if end is None - default is today

    Parameters
    ----------
    start : str, int, date, datetime, Timestamp
        Desired start date
    end : str, int, date, datetime, Timestamp
        Desired end date
    """
    if is_number(start):
        # regard int as year
        start = dt.datetime(start, 1, 1)
    start = to_datetime(start)

    if is_number(end):
        end = dt.datetime(end, 1, 1)
    end = to_datetime(end)

    if start is None:
        # default to 5 years before today
        today = dt.date.today()
        start = today - dt.timedelta(days=365 * 5)
    if end is None:
        # default to today
        end = dt.date.today()
    try:
        start = to_datetime(start)
        end = to_datetime(end)
    except (TypeError, ValueError):
        raise ValueError("Invalid date format.")
    if start > end:
        raise ValueError("start must be an earlier date than end")
    return start, end


def _init_session(session):
    if session is None:
        # session = requests.Session()
        session = get_legacy_session() # HERE IS THE CHANGE I MADE
        # do not set requests max_retries here to support arbitrary pause
    else:
        if not isinstance(session, requests.Session):
            raise TypeError("session must be a request.Session")
    return session

But it freezes when I run my example.py file. I was developing a oecd api wrapper but I found your package amazing, because it solves the heavy task of parsing the ouptut of api response to a pandas dataframe. Any help?

Ignacio-Ibarra avatar Jun 06 '23 20:06 Ignacio-Ibarra