JLD.jl icon indicating copy to clipboard operation
JLD.jl copied to clipboard

Issue loading CartesianIndex{N} object

Open andrewsteck opened this issue 8 years ago • 2 comments
trafficstars

Edited to add I'm on Julia 0.5.1, can supply versioninfo() if helpful.

Hi all,

For my application, I need to use a user defined type that has a field of CartesianIndex{3} - using this and the accompanying CartesianRange iterator have made my life much easier, but now they are not cooperating nicely with JLD.

For example this does not work:

using JLD
type type1
  a::CartesianIndex{3}
end
p = (2, 3, 4)
my_object = type1(CartesianIndex(p))
save("test.jld", "my_object", my_object)
test = load("test.jld", "my_object")

, and returns the error:

stored type Base.IteratorsMD.CartesianIndex{3} does not match currently loaded type in load at FileIO/src/loadsave.jl:45 in #load#13 at FileIO/src/loadsave.jl:45 in load at JLD/src/JLD.jl:1277 in #jldopen#11 at JLD/src/JLD.jl:256 in read at JLD/src/JLD.jl:357 in read at JLD/src/JLD.jl:381 in jldatatype at JLD/src/jld_types.jl:739 in jldatatype at JLD/src/jld_types.jl:749

I have found that dropping "{N}" fixes the issue with JLD:

using JLD
type type2
  a::CartesianIndex
end
p = (2, 3, 4)
my_object2 = type2(CartesianIndex(p))
save("test.jld", "my_object2", my_object2)
test = load("test.jld", "my_object2")

but unfortunately the less-specific type drastically slows down my use case.

Am I missing something here? Is there any way that I can save and load a custom type with CartesianIndex{N}? Thanks for your work and help!

andrewsteck avatar Jun 29 '17 19:06 andrewsteck

I thought I'd responded to this, but evidently I did not hit the "comment" button.

First, this seems to be fixed in JLD2. Second, I have to confess that @simonster may be the only one who really understands what might be happening here, so I may be totally off base. I've put a couple of hours into debugging this, and as far as I can tell this line seems to imply that because theCartesianIndex{3} field has a member_name = "I_" it doesn't trigger a descent into the next level when the file is loaded. Like I said, I could be way off base here (help!).

Here's the _types section from the test file:

   GROUP "_types" {
      DATATYPE "00000001" H5T_COMPOUND {
         H5T_REFERENCE { H5T_STD_REF_OBJECT } "I_";
      }
         ATTRIBUTE "julia type" {
            DATATYPE  H5T_STRING {
               STRSIZE 34;
               STRPAD H5T_STR_NULLTERM;
               CSET H5T_CSET_UTF8;
               CTYPE H5T_C_S1;
            }
            DATASPACE  SCALAR
            DATA {
            (0): "Base.IteratorsMD.CartesianIndex{3}"
            }
         }
      DATATYPE "00000002" H5T_COMPOUND {
         H5T_COMPOUND {
            H5T_REFERENCE { H5T_STD_REF_OBJECT } "I_";
         } "a_1";
      }
         ATTRIBUTE "julia type" {
            DATATYPE  H5T_STRING {
               STRSIZE 5;
               STRPAD H5T_STR_NULLTERM;
               CSET H5T_CSET_UTF8;
               CTYPE H5T_C_S1;
            }
            DATASPACE  SCALAR
            DATA {
            (0): "type1"
            }
         }
      DATATYPE "00000003" H5T_COMPOUND {
         H5T_STD_I64LE "1_";
         H5T_STD_I64LE "2_";
         H5T_STD_I64LE "3_";
      }
         ATTRIBUTE "julia type" {
            DATATYPE  H5T_STRING {
               STRSIZE 44;
               STRPAD H5T_STR_NULLTERM;
               CSET H5T_CSET_UTF8;
               CTYPE H5T_C_S1;
            }
            DATASPACE  SCALAR
            DATA {
            (0): "Core.Tuple{Core.Int64,Core.Int64,Core.Int64}"
            }
         }

In case it helps, I've currently got 5022d9bbf16f7e379c2e5e86c13406b17cceb893 checked out and for debugging I'm using this diff:

$ git diff
diff --git a/src/JLD.jl b/src/JLD.jl
index cf22dfe..27e3b31 100644
--- a/src/JLD.jl
+++ b/src/JLD.jl
@@ -3,6 +3,7 @@ __precompile__()
 module JLD
 using HDF5, FileIO, Compat
 using Compat.String
+using DebuggingUtilities
 
 import HDF5: close, dump, exists, file, getindex, setindex!, g_create, g_open, o_delete, name, names, read, write,
              HDF5ReferenceObj, HDF5BitsKind, ismmappable, readmmap
@@ -697,6 +698,7 @@ function write_compound(parent::Union{JldFile, JldGroup}, name::String,
                         s, wsession::JldWriteSession; kargs...)
     T = typeof(s)
     f = file(parent)
+    @showln T
     dtype = h5type(f, T, true)
     gen_h5convert(f, T)
 
diff --git a/src/jld_types.jl b/src/jld_types.jl
index 455e059..3f70166 100644
--- a/src/jld_types.jl
+++ b/src/jld_types.jl
@@ -38,6 +38,7 @@ function JldTypeInfo(parent::JldFile, types::TypesType, commit::Bool)
     offsets = Vector{Int}(length(types))
     offset = 0
     for i = 1:length(types)
+        @showln types[i] @which h5fieldtype(parent, types[i], commit)
         dtype = dtypes[i] = h5fieldtype(parent, types[i], commit)
         offsets[i] = offset
         offset += HDF5.h5t_get_size(dtype)
@@ -70,7 +71,9 @@ function commit_datatype(parent::JldFile, dtype::HDF5Datatype, T::ANY)
     a_write(dtype, name_type_attr, full_typename(parent, T))
 
     # Store in map
-    parent.jlh5type[T] = JldDatatype(dtype, id)
+    typ = JldDatatype(dtype, id)
+    @showln T=>typ
+    parent.jlh5type[T] = typ
 end
 
 # If parent is nothing, we are creating the datatype in memory for
@@ -311,6 +314,8 @@ else
 end
 
 function h5type(parent::JldFile, T::TupleType, commit::Bool)
+    @showln T parent.jlh5type
+    Base.show_backtrace(STDERR, backtrace())
     haskey(parent.jlh5type, T) && return parent.jlh5type[T]
     # Tuples should always be concretely typed, unless we're
     # reconstructing a tuple, in which case commit will be false
@@ -348,22 +353,27 @@ if INLINE_POINTER_IMMUTABLE
     h5fieldtype(parent::JldFile, T::ANY, commit::Bool) =
         isleaftype(T) && (!T.mutable || T.size == 0) ? h5type(parent, T, commit) : JLD_REF_TYPE
 else
-    h5fieldtype(parent::JldFile, T::ANY, commit::Bool) =
+    function h5fieldtype(parent::JldFile, T::ANY, commit::Bool)
+        @showln @which h5type(parent, T, commit)
         isleaftype(T) && (!T.mutable || T.size == 0) && datatype_pointerfree(T) ? h5type(parent, T, commit) : JLD_REF_TYPE
+    end
 end
 
 function h5type(parent::JldFile, T::ANY, commit::Bool)
     !isa(T, DataType) && unknown_type_err(T)
     T = T::DataType
 
+    @showln parent.jlh5type commit
     haskey(parent.jlh5type, T) && return parent.jlh5type[T]
     isleaftype(T) || error("unexpected non-leaf type ", T)
 
+    @showln T isopaque(T)
     if isopaque(T)
         # Empty type or non-basic bitstype
         id = HDF5.h5t_create(HDF5.H5T_OPAQUE, opaquesize(T))
     else
         # Compound type
+        @showln @which JldTypeInfo(parent, T.types, commit)
         typeinfo = JldTypeInfo(parent, T.types, commit)
         id = HDF5.h5t_create(HDF5.H5T_COMPOUND, typeinfo.size)
         for i = 1:length(typeinfo.offsets)
@@ -374,7 +384,9 @@ function h5type(parent::JldFile, T::ANY, commit::Bool)
 
     dtype = HDF5Datatype(id, parent.plain)
     if commit
+        @showln @which commit_datatype(parent, dtype, T)
         jlddtype = commit_datatype(parent, dtype, T)
+        @showln jlddtype
         if T.size == 0
             # to allow recovery of empty types, which HDF5 does not allow
             a_write(dtype, "empty", UInt8(1))
@@ -635,6 +647,7 @@ end
 
 # There is no point in specializing this
 function _gen_h5convert!(T::ANY)
+    @showln T
     parent = H5CONVERT_INFO[T]::JldFile
     dtype = parent.jlh5type[T].dtype
     istuple = isa(T, TupleType)
@@ -652,6 +665,7 @@ function _gen_h5convert!(T::ANY)
     else
         types = (T::DataType).types
     end
+    @showln istuple types
 
     getindex_fn = istuple ? (:getindex) : (:getfield)
     ex = Expr(:block)
@@ -678,6 +692,7 @@ function _gen_h5convert!(T::ANY)
         end
     end
     push!(args, nothing)
+    @showln T ex
     return ex
 end
 
@@ -726,6 +741,7 @@ function jldatatype(parent::JldFile, dtype::HDF5Datatype)
         end
 
         T = readas(T)
+        @showln typename T class_id
 
         if !(T in BUILTIN_TYPES)
             # Call jldatatype on dependent types to validate them and
@@ -734,8 +750,11 @@ function jldatatype(parent::JldFile, dtype::HDF5Datatype)
                 for i = 0:HDF5.h5t_get_nmembers(dtype.id)-1
                     member_name = HDF5.h5t_get_member_name(dtype.id, i)
                     idx = rsearchindex(member_name, "_")
+                    @showln i member_name idx sizeof(member_name)
                     if idx != sizeof(member_name)
+                        @showln string(pathtypes, '/', lpad(member_name[idx+1:end], 8, '0'))
                         member_dtype = HDF5.t_open(parent.plain, string(pathtypes, '/', lpad(member_name[idx+1:end], 8, '0')))
+                        @showln member_dtype @which jldatatype(parent, member_dtype)
                         jldatatype(parent, member_dtype)
                     end
                 end
@@ -745,13 +764,16 @@ function jldatatype(parent::JldFile, dtype::HDF5Datatype)
         end
 
         # Verify that types match
+        @showln T @which h5type(parent, T, false)
         newtype = h5type(parent, T, false).dtype
+        @showln newtype
         dtype == newtype || throw(TypeMismatchException(typename))
 
         # Store type in type index
         index = typeindex(parent, addr)
         parent.jlh5type[T] = JldDatatype(dtype, index)
         parent.h5jltype[addr] = T
+        @showln parent.jlh5type parent.h5jltype
         T
     else
         error("unrecognized HDF5 datatype class ", class_id)
@@ -782,10 +804,12 @@ function reconstruct_type(parent::JldFile, dtype::HDF5Datatype, savedname::Abstr
             membername = HDF5.h5t_get_member_name(dtype.id, i-1)
             idx = rsearchindex(membername, "_")
             fieldname = fieldnames[i] = Symbol(membername[1:idx-1])
+            @showln membername idx fieldname sizeof(membername)
 
             if idx != sizeof(membername)
                 # There is something past the underscore in the HDF5 field
                 # name, so the type is stored in file
+                @showln string(pathtypes, '/', lpad(membername[idx+1:end], 8, '0'))
                 memberdtype = HDF5.t_open(parent.plain, string(pathtypes, '/', lpad(membername[idx+1:end], 8, '0')))
                 fieldtypes[i] = jldatatype(parent, memberdtype)
             else
@@ -821,6 +845,7 @@ end
 # data in the file, and it only needs to be called once per type.
 # Revisit if this ever turns out to be a bottleneck.
 function typeindex(parent::JldFile, addr::HDF5.Haddr)
+    @showln addr
     gtypes = parent.plain[pathtypes]
     i = 1
     for x in gtypes
tim@diva:~/.julia/v0.6/JLD$

with DebuggingUtilities.

timholy avatar Jul 11 '17 22:07 timholy

Thanks for taking a look @timholy (and for all of the outstanding work on JLD and CartesianIndices). If this is fixed in JLD2, it probably doesn't need to be a high priority for now.

For my use case, I am working around for now by having parallel types - one for saving/loading with JLD, and the other for running my simulations with. I will plan on moving to JLD2 when I migrate to 0.6.0.

andrewsteck avatar Jul 11 '17 23:07 andrewsteck