pandas-to-postgres
pandas-to-postgres copied to clipboard
No option to append to tables instead of truncating them
Thanks for the project it helps with the slow process for Dataframe to_sql
and it's more straight to proceed with odoo or others library.
For now i have an issue that i am doing dataframe copy inside a for loop but it seems to overwrite table each time it push data.
connection_string = 'postgresql://postgres:test_password@localhost:5432/data'
sql_engine = create_engine(connection_string, echo=False, pool_size=10, max_overflow=-1)
Base = automap_base()
Base.prepare(sql_engine, reflect=True)
my_model = Base.metadata.tables['my_model']
for index, x in enumerate(flat_list_of_bucketes[10:]):
if len(pd.read_sql('SELECT * FROM imported_files WHERE file_path=%(file_path)s', con=sql_engine, params={'file_path': x['path']})) == 0:
exec_by = 'hostname: ' + platform.node() + ', python_version: '+ platform.python_version() + ', created_at: ' + dt.datetime.utcnow().strftime('%d-%M-%YT%H:%m')
now = dt.datetime.utcnow()
t = TicToc()
print('start reading the file :' + x['path'])
with t, fs.open(x['path']) as f:
df = pd.read_csv(f, compression='gzip', header=0, low_memory=False)
t.toc('reading data takes ', restart=True)
df['id'] = [uuid.uuid1() for _ in range(len(df.index))]
df['created_by'] = exec_by
df['created_at'] = now
t.toc('cleansing data takes: ', restart=True)
with sql_engine.connect() as c:
DataFrameCopy(df.copy(), conn=c, table_obj=my_model)
t.toc('data save to postgres in: ')
total_pushed_rows = total_pushed_rows + len(df)
print('Total pushed rows :' + str(total_pushed_rows))
df_meta_info = pd.DataFrame(data={'file_path': [x['path']],
'imported_at': [now],
'imported_by': [exec_by],
'meta_data':[str(x)]})
df_meta_info.to_sql('imported_files', con=sql_engine, if_exists='append', index=False)
t.toc('pushing metadata to postgres takes ', restart=True)
print('file was saved to the database :' + df_meta_info['file_path'][0])
print('total of % of processed files are :' + str(index / total_num_of_files) + '%')
Trying to figure out if i missing any options to append
similair to the to_sql or i need to manage the commit myself, but no clue for now any help on how to solve this?