argschema icon indicating copy to clipboard operation
argschema copied to clipboard

smart_merge fails with NumpyArray

Open djkapner opened this issue 6 years ago • 7 comments

I'm getting an error on validation when I've added a NumpyArray to an existing schema.

  File "/allen/programs/celltypes/workgroups/em-connectomics/danielk/conda/rm_production_mod/lib/python2.7/site-packages/argschema/argschema_parser.py", line 171, in __init__
    args = utils.smart_merge(jsonargs, argsdict)
  File "/allen/programs/celltypes/workgroups/em-connectomics/danielk/conda/rm_production_mod/lib/python2.7/site-packages/argschema/utils.py", line 210, in smart_merge
    elif a[key] == b[key]:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

my schema has this new entry:

corner_mask_radii = NumpyArray(
     dtype = np.int, 
     required=False,
     default=[0, 0, 0, 0],
     missing=[0, 0, 0, 0],
     description="radius of image mask corners, "
     "order (0, 0), (w, 0), (w, h), (0, h)")

https://github.com/AllenInstitute/argschema/blob/287454917aa5826292af9d80e234d70df351152e/argschema/utils.py#L210

djkapner avatar Aug 20 '18 18:08 djkapner

got around my immediate problem by switching from NumpyArray to List. The issue stands, though.

djkapner avatar Aug 20 '18 19:08 djkapner

can you remove the missing and try it again?

fcollman avatar Aug 20 '18 22:08 fcollman

I tried to reproduce the issue with this

from argschema.fields import NumpyArray
import numpy as np
class MySchema(argschema.ArgSchema):
    corner_mask_radii = NumpyArray(
     dtype = np.int, 
     required=False,
     default=[0, 0, 0, 0],
     missing=[0, 0, 0, 0],
     description="radius of image mask corners, "
     "order (0, 0), (w, 0), (w, h), (0, h)")
mod = argschema.ArgSchemaParser(input_data= {}, schema_type=MySchema, args=['--corner_mask_radii','[0,2,0,0]'])
print(mod.args['corner_mask_radii'])
mod = argschema.ArgSchemaParser(input_data= {}, schema_type=MySchema, args=[])
print(mod.args['corner_mask_radii'])

but couldn't... perhaps you can find a reduced complexity example for us to help figure out what is going wrong?

fcollman avatar Aug 21 '18 01:08 fcollman

This works:

import argschema
from argschema.fields import NumpyArray
import numpy as np

example = {
        'corner_mask_radii': [0, 2, 0, 0]
        }

class MySchema(argschema.ArgSchema):
    corner_mask_radii = NumpyArray(
     dtype = np.int, 
     required=False,
     default=[0, 0, 0, 0],
     missing=[0, 0, 0, 0],
     description="radius of image mask corners, "
     "order (0, 0), (w, 0), (w, h), (0, h)")
    
class MyOtherThing(argschema.ArgSchemaParser):
    default_schema = MySchema

    def run(self):
        print('MyOtherThing:', self.args['corner_mask_radii'])

class MyThing(argschema.ArgSchemaParser):
    default_schema = MySchema

    def run(self):
        print('MyThing:', self.args['corner_mask_radii'])

        args_for_input = dict(self.args)

        mot = MyOtherThing(
                input_data=args_for_input)
        mot.run()

if __name__ == '__main__':
    mmod = MyThing(input_data=example)
    mmod.run()

output:

$ python test.py 
('MyThing:', array([0, 2, 0, 0]))
('MyOtherThing:', array([0, 2, 0, 0]))

This throws the error:

import argschema
from argschema.fields import NumpyArray
import numpy as np

example = {
        'corner_mask_radii': [0, 2, 0, 0]
        }

class MySchema(argschema.ArgSchema):
    corner_mask_radii = NumpyArray(
     dtype = np.int, 
     required=False,
     default=[0, 0, 0, 0],
     missing=[0, 0, 0, 0],
     description="radius of image mask corners, "
     "order (0, 0), (w, 0), (w, h), (0, h)")
    
class MyOtherThing(argschema.ArgSchemaParser):
    default_schema = MySchema

    def run(self):
        print('MyOtherThing:', self.args['corner_mask_radii'])

class MyThing(argschema.ArgSchemaParser):
    default_schema = MySchema

    def run(self):
        print('MyThing:', self.args['corner_mask_radii'])

        args_for_input = dict(self.args)

        mot = MyOtherThing(
                input_data=args_for_input,
                args=['--output_json', 'out.json'])
        mot.run()

if __name__ == '__main__':
    mmod = MyThing(input_data=example)
    mmod.run()

output:

$ python test.py 
('MyThing:', array([0, 2, 0, 0]))
Traceback (most recent call last):
  File "test.py", line 39, in <module>
    mmod.run()
  File "test.py", line 34, in run
    args=['--output_json', 'out.json'])
  File "/allen/programs/celltypes/workgroups/em-connectomics/danielk/conda/rm_production_mod/lib/python2.7/site-packages/argschema/argschema_parser.py", line 171, in __init__
    args = utils.smart_merge(jsonargs, argsdict)
  File "/allen/programs/celltypes/workgroups/em-connectomics/danielk/conda/rm_production_mod/lib/python2.7/site-packages/argschema/utils.py", line 210, in smart_merge
    elif a[key] == b[key]:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

djkapner avatar Aug 21 '18 16:08 djkapner

here's the working version with List:

import argschema
from argschema.fields import List, Int
import numpy as np

example = {
        'corner_mask_radii': [0, 2, 0, 0]
        }

class MySchema(argschema.ArgSchema):
    corner_mask_radii = List(
     Int,            
     required=False,
     default=[0, 0, 0, 0],
     missing=[0, 0, 0, 0],
     description="radius of image mask corners, "
     "order (0, 0), (w, 0), (w, h), (0, h)")
    
class MyOtherThing(argschema.ArgSchemaParser):
    default_schema = MySchema

    def run(self):
        print('MyOtherThing:', self.args['corner_mask_radii'])

class MyThing(argschema.ArgSchemaParser):
    default_schema = MySchema

    def run(self):
        print('MyThing:', self.args['corner_mask_radii'])

        args_for_input = dict(self.args)

        mot = MyOtherThing(
                input_data=args_for_input,
                args=['--output_json', 'out.json'])
        mot.run()

if __name__ == '__main__':
    mmod = MyThing(input_data=example)
    mmod.run()

output:

$ python test.py 
/allen/programs/celltypes/workgroups/em-connectomics/danielk/conda/rm_production_mod/lib/python2.7/site-packages/argschema/utils.py:342: FutureWarning: '--corner_mask_radii' is using old-style command-line syntax with each element as a separate argument. This will not be supported in argschema after 2.0. See http://argschema.readthedocs.io/en/master/user/intro.html#command-line-specification for details.
  warnings.warn(warn_msg, FutureWarning)
('MyThing:', [0, 2, 0, 0])
('MyOtherThing:', [0, 2, 0, 0])

djkapner avatar Aug 21 '18 16:08 djkapner

removing the missing= does not solve the problem

djkapner avatar Aug 21 '18 16:08 djkapner

another, simpler way to reproduce the error:

import argschema
from argschema.fields import NumpyArray
import numpy as np

example = {
        'corner_mask_radii': [0, 2, 0, 0]
        }

class MySchema(argschema.ArgSchema):
    corner_mask_radii = NumpyArray(
     dtype=np.int,
     required=False,
     default=[0, 0, 0, 0],
     description="radius of image mask corners, "
     "order (0, 0), (w, 0), (w, h), (0, h)")

class MyThing(argschema.ArgSchemaParser):
    default_schema = MySchema

    def run(self):
        print('MyThing:', self.args['corner_mask_radii'])

if __name__ == '__main__':
    d = dict(example)
    mmod = MyThing(
           input_data=d)
    mmod.run()
    d2 = dict(mmod.args)
    mmod2 = MyThing(
            input_data=d2,
            args=['--output_json','out.json'])
    mmod2.run()

djkapner avatar Aug 21 '18 16:08 djkapner