kuzu
kuzu copied to clipboard
Issues importing from CSVs that contain strings with quotes
Having issues importing from CSVs that contain strings with quotes in them. When I load the csv through pandas it works as expected.
import kuzu
import csv
import pandas as pd
target_vertices_field_names = ["ID", "Name", "Quote"]
data = [
{"ID": 1, "Name": "John Doe", "Quote": 'This is a "quote"'},
{"ID": 2, "Name": "Jane Smith", "Quote": 'Another, "example" here'}
]
with open('/tmp/output.csv', 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=data[0].keys(), quotechar='"', escapechar='\\')
writer.writeheader()
for row in data:
writer.writerow(row)
with open('/tmp/output.csv', 'r') as file:
print(file.read())
db = kuzu.Database("kuzu_test_db")
conn = kuzu.Connection(db)
try:
conn.execute(
"CREATE NODE TABLE Test("
"ID INT64,"
"Name STRING, "
"Quote STRING, "
"PRIMARY KEY (ID))"
)
except:
pass
response = conn.execute(f'COPY Test FROM (LOAD WITH HEADERS (ID INT64, Name STRING, Quote STRING) FROM "/tmp/output.csv" (HEADER=true) WHERE NOT EXISTS {{MATCH (t:Test) WHERE t.ID = ID}} RETURN *)')
while response.has_next():
print(f"Inserted Test {response.get_next()}")
# df = pd.read_csv('/tmp/output.csv')
# response = conn.execute(f'COPY Test FROM (LOAD WITH HEADERS (ID INT64, Name STRING, Quote STRING) FROM df WHERE NOT EXISTS {{MATCH (t:Test) WHERE t.ID = ID}} RETURN *)')
# while response.has_next():
# print(f"Inserted Test {response.get_next()}")
response = conn.execute('MATCH (t:Test) RETURN *')
while response.has_next():
print(response.get_next())
Depending on how i format the csv i get different errros:
writer = csv.DictWriter(csvfile, fieldnames=data[0].keys(), quotechar='"', escapechar='\\', quoting=csv.QUOTE_ALL)
"ID","Name","Quote"
"1","John Doe","This is a ""quote"""
"2","Jane Smith","Another, ""example"" here"
RuntimeError: Copy exception: Error in file /tmp/output.csv on line 2: quote should be followed by end of file, end of value, end of row or another quote.
writer = csv.DictWriter(csvfile, fieldnames=data[0].keys(), quotechar='"', escapechar='\\', quoting=csv.QUOTE_NONE)
ID,Name,Quote
1,John Doe,This is a \"quote\"
2,Jane Smith,Another\, \"example\" here
RuntimeError: Copy exception: Error in file /tmp/output.csv, on line 3: expected 3 values per row, but got more.
Tested on version 0.4.1