PyHive
PyHive copied to clipboard
Connection keeps giving me 'Failed to open transport'
Hi. I have a Hive connection where I insert tons of data and also run 'Insert Overwrite' queries. Here is my connection file:
from pyhive import hive
from app.settings import Settings
import logging
logging.getLogger('pyhive').setLevel(logging.ERROR)
class Hive():
def __init__(self):
settings = Settings.DATABASE.get('CLOUDERA')
self.db = hive.Connection(
host=settings.get('HIVE').get('HOST'),
port=settings.get('HIVE').get('PORT'),
username=settings.get('USERNAME'),
password=settings.get('PASSWORD'),
database=settings.get('DATABASE'),
auth='CUSTOM'
)
def __call__(self):
settings = Settings.DATABASE.get('CLOUDERA')
self.db = hive.Connection(
host=settings.get('HIVE').get('HOST'),
port=settings.get('HIVE').get('PORT'),
username=settings.get('USERNAME'),
password=settings.get('PASSWORD'),
database=settings.get('DATABASE'),
auth='CUSTOM'
)
return self
def get(self, sql):
try:
cursor = self.db.cursor()
cursor.execute(sql)
data = cursor.fetchall()
return data
except Exception as err:
print(err, level='critical')
raise err
finally:
cursor.close()
def execute_query(self, sql):
try:
cursor = self.db.cursor()
cursor.execute(sql)
return True
except hive.DatabaseError as err:
print(err, level='critical')
raise err
except Exception as err:
print(err, level='critical')
raise err
finally:
cursor.close()
def insert(self, table_name, data):
try:
sql = f"INSERT INTO {table_name} VALUES {', '.join(data)}"
cursor = self.db.cursor()
cursor.execute(sql)
return True
except hive.DatabaseError as err:
print(err, level='critical')
raise err
except Exception as err:
print(err, level='critical')
raise err
finally:
cursor.close()
In my data load processing, many queries are executed, but some of them gets a 'Failed to open transport(tries_left=3)' error. Here is the error:
[30/10/2020 00:59:49] - ERROR - Failed to open transport (tries_left=3)
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/impala/hiveserver2.py", line 1009, in _execute
return func(request)
File "/usr/local/lib/python3.7/site-packages/thriftpy2/thrift.py", line 219, in _req
return self._recv(_api)
File "/usr/local/lib/python3.7/site-packages/thriftpy2/thrift.py", line 231, in _recv
fname, mtype, rseqid = self._iprot.read_message_begin()
File "/usr/local/lib/python3.7/site-packages/thriftpy2/protocol/binary.py", line 373, in read_message_begin
self.trans, strict=self.strict_read)
File "/usr/local/lib/python3.7/site-packages/thriftpy2/protocol/binary.py", line 165, in read_message_begin
sz = unpack_i32(inbuf.read(4))
File "/usr/local/lib/python3.7/site-packages/thriftpy2/transport/base.py", line 60, in read
return readall(self._read, sz)
File "/usr/local/lib/python3.7/site-packages/thriftpy2/transport/base.py", line 12, in readall
chunk = read_fn(sz - have)
File "/usr/local/lib/python3.7/site-packages/thriftpy2/transport/buffered/__init__.py", line 41, in _read
buf = self._trans.read(max(rest_len, self._buf_size))
File "/usr/local/lib/python3.7/site-packages/thriftpy2/transport/socket.py", line 132, in read
message='TSocket read 0 bytes')
thriftpy2.transport.base.TTransportException: TTransportException(type=4, message='TSocket read 0 bytes')
In this example, the third hive query failed all the times, but when executed by itself, it resolved with no error. I tried to remake the connection using call, but it didn't work as well...
Thanks in advance.
Oh, about my versions:
PyHive==0.6.2
thrift==0.13.0
thrift-sasl==0.4.2
thriftpy2==0.4.11
I'm using Python3.7.6 on a Oracle Linux 7.8