`Attr` 's `fill` attribute doesn't behave as expected.
Hi,
I have encountered multiple bugs related to Attr's fill attribute. Here is a minimal code repro.
import tiledb as tdb
import numpy as np
print(f"attr1: {tdb.Attr("attr1", np.dtype('c8, c8'), 1 ).fill!r}") # wrong: no structured dtype
print(f"attr2: {tdb.Attr("attr2", np.dtype('c8, c8'), 1 + 2j ).fill!r}") # wrong: no structured dtype
print(f"attr3: {tdb.Attr("attr3", np.dtype('f4, f4'), 1).fill!r}") # wrong: dtype is complex
print(f"attr4: {tdb.Attr("attr4", np.dtype('f4, f4'), 1+2j).fill!r}") # wrong: dtype is complex
print(f"attr5: {tdb.Attr("attr5", np.dtype('f4, f4'), (1, 2)).fill!r}") # wrong: dtype is complex
try:
print(f"attr6: {tdb.Attr("attr6", np.dtype('f4, f4, f4'), 1 + 2j).fill!r}")
except TypeError as e:
print("attr6: This TypeError is expected!") # as expected
print(f"attr7: {tdb.Attr("attr7", np.dtype('f4, f4, f4'), 1).fill!r}") # as expected
print(f"attr8: {tdb.Attr("attr8", np.dtype('f4, f4, f4'), (1, 2, 3)).fill!r}") # as expected
print(f"attr9: {tdb.Attr("attr9", np.dtype('c8, c8, c8'), 1).fill!r}") # wrong: dtype is np.dtype('f4, f4, f4')
try:
print(f"attr10: {tdb.Attr("attr10", np.dtype('c8, c8, c8'), 1 + 2j).fill!r}")
except TypeError as e:
print("attr11: This TypeError is not expected. Errror message:", e)
print(f"attr11: {tdb.Attr("attr11", np.dtype('c8, c8, c8'), (1, 2, 3)).fill!r}") # wrong: dtype is np.dtype('f4, f4, f4')
try:
print(f"attr12: {tdb.Attr("attr12", np.dtype('c8, c8, c8'), (1 + 2j, 2 + 3j, 3 + 4j)).fill!r}")
except TypeError as e:
print(f"attr12: This TypeError is not expected. Errror message:", e)
I get
attr1: array([1.+0.j], dtype=complex64)
attr2: array([1.+2.j], dtype=complex64)
attr3: array([1.+0.j], dtype=complex64)
attr4: array([1.+2.j], dtype=complex64)
attr5: array([1.+2.j], dtype=complex64)
attr6: This TypeError is expected!
attr7: array([(1., 1., 1.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr8: array([(1., 2., 3.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr9: array([(1., 1., 1.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr11: This TypeError is not expected. Errror message: float() argument must be a string or a real number, not 'complex'
attr11: array([(1., 2., 3.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr12: This TypeError is not expected. Errror message: float() argument must be a string or a real number, not 'complex'
while I expected:
attr1: array([(1.+0.j, 1.+0.j)], dtype=[('f0', '<c8'), ('f1', '<c8')])
attr2: array([(1.+2.j, 1.+2.j)], dtype=[('f0', '<c8'), ('f1', '<c8')])
attr3: array([(1., 1.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr4: TypeError!
attr5: array([(1, 2.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr6: This TypeError is expected!
attr7: array([(1., 1., 1.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr8: array([(1., 2., 3.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr9: array([(1.+0.j, 1.+0.j, 1.+0.j)], , dtype=[('f0', '<c8'), ('f1', '<c8'), ('f2', '<c8')])
attr10: array([(1.+2.j, 1.+2.j, 1.+2.j)], , dtype=[('f0', '<c8'), ('f1', '<c8'), ('f2', '<c8')])
attr11: array([(1.+0.j, 2.+0.j, 3.+0.j)], , dtype=[('f0', '<c8'), ('f1', '<c8'), ('f2', '<c8')])
attr12: array([(1.+2.j, 2.+3.j, 3.+4.j)], , dtype=[('f0', '<c8'), ('f1', '<c8'), ('f2', '<c8')])
Thanks :)
By the way I shoudl maybe add that dtype is wrong too:
print(f"attr1: {tdb.Attr("attr1", np.dtype('c8, c8'), 1 ).dtype!r}") # wrong: no structured dtype
print(f"attr2: {tdb.Attr("attr2", np.dtype('c8, c8'), 1 + 2j ).dtype!r}") # wrong: no structured dtype
print(f"attr3: {tdb.Attr("attr3", np.dtype('f4, f4'), 1).dtype!r}") # wrong: dtype is complex
print(f"attr4: {tdb.Attr("attr4", np.dtype('f4, f4'), 1+2j).dtype!r}") # wrong: dtype is complex
print(f"attr5: {tdb.Attr("attr5", np.dtype('f4, f4'), (1, 2)).dtype!r}") # wrong: dtype is complex
try:
print(f"attr6: {tdb.Attr("attr6", np.dtype('f4, f4, f4'), 1 + 2j).dtype!r}")
except TypeError as e:
print("attr6: This TypeError is expected!") # as expected
print(f"attr7: {tdb.Attr("attr7", np.dtype('f4, f4, f4'), 1).dtype!r}") # as expected
print(f"attr8: {tdb.Attr("attr8", np.dtype('f4, f4, f4'), (1, 2, 3)).dtype!r}") # as expected
print(f"attr9: {tdb.Attr("attr9", np.dtype('c8, c8, c8'), 1).dtype!r}") # wrong: dtype is np.dtype('f4, f4, f4')
try:
print(f"attr10: {tdb.Attr("attr10", np.dtype('c8, c8, c8'), 1 + 2j).dtype!r}")
except TypeError as e:
print("attr11: This TypeError is not expected. Errror message:", e)
print(f"attr11: {tdb.Attr("attr11", np.dtype('c8, c8, c8'), (1, 2, 3)).dtype!r}") # wrong: dtype is np.dtype('f4, f4, f4')
try:
print(f"attr12: {tdb.Attr("attr12", np.dtype('c8, c8, c8'), (1 + 2j, 2 + 3j, 3 + 4j)).dtype!r}")
except TypeError as e:
print(f"attr12: This TypeError is not expected. Errror message:", e)
and got
[1.+2.j 3.+4.j 5.+6.j] complex64
attr1: array([1.+0.j], dtype=complex64)
attr2: array([1.+2.j], dtype=complex64)
attr3: array([1.+0.j], dtype=complex64)
attr4: array([1.+2.j], dtype=complex64)
attr5: array([1.+2.j], dtype=complex64)
attr6: This TypeError is expected!
attr7: array([(1., 1., 1.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr8: array([(1., 2., 3.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr9: array([(1., 1., 1.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr11: This TypeError is not expected. Errror message: float() argument must be a string or a real number, not 'complex'
attr11: array([(1., 2., 3.)], dtype=[('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr12: This TypeError is not expected. Errror message: float() argument must be a string or a real number, not 'complex'
attr1: dtype('complex64')
attr2: dtype('complex64')
attr3: dtype('complex64')
attr4: dtype('complex64')
attr5: dtype('complex64')
attr6: This TypeError is expected!
attr7: dtype([('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr8: dtype([('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr9: dtype([('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr11: This TypeError is not expected. Errror message: float() argument must be a string or a real number, not 'complex'
attr11: dtype([('f0', '<f4'), ('f1', '<f4'), ('f2', '<f4')])
attr12: This TypeError is not expected. Errror message: float() argument must be a string or a real number, not 'complex'
After exploring a little more, I sort of understand what is happening now. TileDB arrays do not support complex numbers natively. But for some arbitrary reason, you decided that 2-cell floats tiledb attributes were going to be converted into complex numbers. I believe you should just drop completely support for complex otherwise we will find ourselves with absurd situations like in this code repro
import tempfile
from pathlib import Path
import numpy as np
import tiledb
tmpdir = Path(tempfile.mkdtemp(prefix="tiledb_demo_"))
array_uri = str(tmpdir / "example_array")
dim = tiledb.Dim(name="i", domain=(0, 9), tile=10, dtype=np.int32)
dom = tiledb.Domain(dim)
a_f4 = tiledb.Attr(name="a_f4", dtype=np.float32)
a_f4_2 = tiledb.Attr(name="a_f4_2", dtype=np.dtype("f4, f4"))
a_f4_3 = tiledb.Attr(name="a_f4_3", dtype=np.dtype("f4, f4, f4"))
a_f8 = tiledb.Attr(name="a_f8", dtype=np.float64)
a_f8_2 = tiledb.Attr(name="a_f8_2", dtype=np.dtype("f8, f8"))
a_f8_3 = tiledb.Attr(name="a_f8_3", dtype=np.dtype("f8, f8, f8"))
schema = tiledb.ArraySchema(
domain=dom,
attrs=[a_f4,
a_f4_2,
a_f4_3,
a_f8,
a_f8_2,
a_f8_3,],
sparse=False,
)
tiledb.Array.create(array_uri, schema)
# ---------- Populate ----------
N = 10
def f_doubles_complex(n): return [complex(k, k+1.) for k in map(float, range(n))]
def f_triples_float(n): return [(k, k+1., k+2.) for k in map(float, range(n))]
payload = {
"a_f4": np.arange(N, dtype=np.float32),
"a_f4_2": np.array(f_doubles_complex(N), dtype=np.complex64),
"a_f4_3": np.array(f_triples_float(N), dtype="f4, f4, f4"),
"a_f8": np.arange(N, dtype=np.float64),
"a_f8_2": np.array(f_doubles_complex(N), dtype=np.complex128),
"a_f8_3": np.array(f_triples_float(N), dtype="f8, f8, f8"),
}
with tiledb.open(array_uri, "w") as A:
A[:] = payload
# ---------- Read back a quick preview ----------
with tiledb.open(array_uri, "r") as A:
out = A[:]
for name in [
"a_f4",
"a_f4_2",
"a_f4_3",
"a_f8",
"a_f8_2",
"a_f8_3"
]:
arr = out[name]
print(f"{name:7s} dtype={arr.dtype} sample={arr[:3]}")
Attempting to write a 2-cell attribute with dtype np.dtype('f4, f4') results in an error!!