json-schema-validator icon indicating copy to clipboard operation
json-schema-validator copied to clipboard

Error validating a JSON Schema against formal draft JSON Schema meta data

Open ericbroda opened this issue 4 years ago • 16 comments

My objective is to validate a JSON Schema against JSON Schema meta-data (ie. use the JSON schema meta-data as the "grammar" to validate an actual JSON Schema)

The issue is that obvious errors are not caught:

  • changing the type "array" to "sdfsdfsd" - no error caught
  • changing a "$ref" to point to a non-existing definition - no error caught

I have provided several files below:

  • jsonschema.json: this is the combined meta-data files from json-schema.org
  • test.schema.json: this is the schema to be validated (against jsonschema.json)
  • cde fragment to execute the validation.

(In the code fragment (later), this is the file "test.schema.json")

{
  "$id": "https://example.com/arrays.schema.json",
  "description": "A representation of a person, company, organization, or place",
  "type": "object",
  "required": [ "messages" ],
  "properties": {
    "messages": {
      "type": "array",
      "items": { "$ref": "#/$defs/message" }
    }
  },
  "$defs": {
    "message": {
      "type": "object",
      "required": [ "key", "data" ],
      "properties": {
        "key": {
          "$ref": "#/$defs/key"
        },
        "data": {
          "$ref": "#/$defs/data"
        }
      }
    },
    "key": {
      "type": "string"
    },
    "data": {
      "type": "object",
      "required": [ "veggieName", "veggieLike" ],
      "properties": {
        "veggieName": {
          "type": "string",
          "description": "The name of the vegetable."
        },
        "veggieLike": {
          "type": "boolean",
          "description": "Do I like this vegetable?"
        }
      }
    }
  }
}

The JSON Schema meta-data is a consolidation of meta-data files from JSON-Schema.org (https://json-schema.org/specification.html):

(in the code below, this is "jsonschema.json)

{
    "$id": "https://json-schema.org/draft/2020-12/meta/validation",
    "$vocabulary": {
        "https://json-schema.org/draft/2020-12/vocab/validation": true
    },
    "$dynamicAnchor": "meta",

    "title": "Validation vocabulary meta-schema",
    "type": ["object", "boolean"],
    "properties": {
        // validation.json
        "type": {
            "anyOf": [
                { "$ref": "#/$defs/simpleTypes" },
                {
                    "type": "array",
                    "items": { "$ref": "#/$defs/simpleTypes" },
                    "minItems": 1,
                    "uniqueItems": true
                }
            ]
        },
        "const": true,
        "enum": {
            "type": "array",
            "items": true
        },
        "multipleOf": {
            "type": "number",
            "exclusiveMinimum": 0
        },
        "maximum": {
            "type": "number"
        },
        "exclusiveMaximum": {
            "type": "number"
        },
        "minimum": {
            "type": "number"
        },
        "exclusiveMinimum": {
            "type": "number"
        },
        "maxLength": { "$ref": "#/$defs/nonNegativeInteger" },
        "minLength": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
        "pattern": {
            "type": "string",
            "format": "regex"
        },
        "maxItems": { "$ref": "#/$defs/nonNegativeInteger" },
        "minItems": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
        "uniqueItems": {
            "type": "boolean",
            "default": false
        },
        "maxContains": { "$ref": "#/$defs/nonNegativeInteger" },
        "minContains": {
            "$ref": "#/$defs/nonNegativeInteger",
            "default": 1
        },
        "maxProperties": { "$ref": "#/$defs/nonNegativeInteger" },
        "minProperties": { "$ref": "#/$defs/nonNegativeIntegerDefault0" },
        "required": { "$ref": "#/$defs/stringArray" },
        "dependentRequired": {
            "type": "object",
            "additionalProperties": {
                "$ref": "#/$defs/stringArray"
            }
        },
        // applicator.json
        "prefixItems": { "$ref": "#/$defs/schemaArray" },
        "items": { "$dynamicRef": "#meta" },
        "contains": { "$dynamicRef": "#meta" },
        "additionalProperties": { "$dynamicRef": "#meta" },
        "properties": {
            "type": "object",
            "additionalProperties": { "$dynamicRef": "#meta" },
            "default": {}
        },
        "patternProperties": {
            "type": "object",
            "additionalProperties": { "$dynamicRef": "#meta" },
            "propertyNames": { "format": "regex" },
            "default": {}
        },
        "dependentSchemas": {
            "type": "object",
            "additionalProperties": { "$dynamicRef": "#meta" },
            "default": {}
        },
        "propertyNames": { "$dynamicRef": "#meta" },
        "if": { "$dynamicRef": "#meta" },
        "then": { "$dynamicRef": "#meta" },
        "else": { "$dynamicRef": "#meta" },
        "allOf": { "$ref": "#/$defs/schemaArray" },
        "anyOf": { "$ref": "#/$defs/schemaArray" },
        "oneOf": { "$ref": "#/$defs/schemaArray" },
        "not": { "$dynamicRef": "#meta" },
        // content.json
        "contentEncoding": { "type": "string" },
        "contentMediaType": { "type": "string" },
        "contentSchema": { "$dynamicRef": "#meta" },
        // core.json
        "$id": {
            "$ref": "#/$defs/uriReferenceString",
            "$comment": "Non-empty fragments not allowed.",
            "pattern": "^[^#]*#?$"
        },
        "$schema": { "$ref": "#/$defs/uriString" },
        "$ref": { "$ref": "#/$defs/uriReferenceString" },
        "$anchor": { "$ref": "#/$defs/anchorString" },
        "$dynamicRef": { "$ref": "#/$defs/uriReferenceString" },
        "$dynamicAnchor": { "$ref": "#/$defs/anchorString" },
        "$vocabulary": {
            "type": "object",
            "propertyNames": { "$ref": "#/$defs/uriString" },
            "additionalProperties": {
                "type": "boolean"
            }
        },
        "$comment": {
            "type": "string"
        },
        "$defs": {
            "type": "object",
            "additionalProperties": { "$dynamicRef": "#meta" }
        },
        // meta-data.json
        "title": {
            "type": "string"
        },
        "description": {
            "type": "string"
        },
        "default": true,
        "deprecated": {
            "type": "boolean",
            "default": false
        },
        "readOnly": {
            "type": "boolean",
            "default": false
        },
        "writeOnly": {
            "type": "boolean",
            "default": false
        },
        "examples": {
            "type": "array",
            "items": true
        },
        // unevaluated.json
        "unevaluatedItems": { "$dynamicRef": "#meta" },
        "unevaluatedProperties": { "$dynamicRef": "#meta" },
        // format-annotation.json and format-assertion.json
        "format": { "type": "string" },
    },
    "$defs": {
        // validation.json
        "nonNegativeInteger": {
            "type": "integer",
            "minimum": 0
        },
        "nonNegativeIntegerDefault0": {
            "$ref": "#/$defs/nonNegativeInteger",
            "default": 0
        },
        "simpleTypes": {
            "enum": [
                "array",
                "boolean",
                "integer",
                "null",
                "number",
                "object",
                "string"
            ]
        },
        "stringArray": {
            "type": "array",
            "items": { "type": "string" },
            "uniqueItems": true,
            "default": []
        },
        // applicator.json
        "schemaArray": {
            "type": "array",
            "minItems": 1,
            "items": { "$dynamicRef": "#meta" }
        },
        // content.json (empty)
        // core.json
        "anchorString": {
            "type": "string",
            "pattern": "^[A-Za-z_][-A-Za-z0-9._]*$"
        },
        "uriString": {
            "type": "string",
            "format": "uri"
        },
        "uriReferenceString": {
            "type": "string",
            "format": "uri-reference"
        },
        // meta-data.json (empty)
        // unevaluated.json (empty)
        // format-annotation.json (empty)
        // format-assertion.json (empty)
    }
}

The code to perform this validation is as follows:

        : 
        private static Logger log = LoggerFactory.getLogger(some.class);
        :
        String schemaPath = "jsonschema.json";
        String dataPath = "test.schema.json";

        ObjectMapper mapper = new ObjectMapper();

        try {
            String schemaStr = readFile(schemaPath);

            JsonSchemaFactory factory = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V201909);
            JsonSchema schema = factory.getSchema(schemaStr);

            String dataStr = readFile(dataPath);
            JsonNode dataNode = mapper.readTree(dataStr);

            Set<ValidationMessage> errors = schema.validate(dataNode);
            for (ValidationMessage error : errors) {
                log.error("CLI:certifySchema error message:{}, details:{}, arguments:{}, path:{}, code:{}, type:{}",
                    error.getMessage(), error.getDetails(), error.getArguments(), error.getPath(), error.getCode(), error.getType());
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        public String readFile(String path) throws java.io.FileNotFoundException, java.io.IOException {
            String contents = null;
            try(BufferedReader br = new BufferedReader(new FileReader(path))) {
                 StringBuilder sb = new StringBuilder();
                 String line = br.readLine();

                 while (line != null) {
                     sb.append(line);
                     sb.append(System.lineSeparator());
                     line = br.readLine();
                 }
                 contents = sb.toString();
             }
             return contents;
         }

ericbroda avatar Oct 26 '21 21:10 ericbroda

@ericbroda I have created a test case to reproduce your issue successfully. Will debug into it to find out the root cause.

https://github.com/networknt/json-schema-validator/tree/issue475

stevehu avatar Oct 26 '21 22:10 stevehu

I also added v7 test case and it works. I guess the problem is that the $vocabulary keyword is not supported in the 2019-09 meta schema and the schema is not loaded completely. Or maybe due to the remote reference for the vocabulary.

stevehu avatar Oct 26 '21 22:10 stevehu

Two additional suggestions:

  1. Could we maintain the JSON Schema meta-data files available as local files (as a "resource") and cached to ensure high performance
  2. Could we add a new method to validate a schema (instead of validating dat against a schema)... this is a gap in almost all JSON schema tools and would make it much more intuitive to validate a JSON Schema

ericbroda avatar Oct 27 '21 17:10 ericbroda

A few more additional requests (these are "nice to have" and not urgent):

  1. When an error is found, we put out a ValidationMessage but it does not contain the line number in the schema to be validated where the error occurred - can the line number of the error be added (I realize the line can be inferred by ValidationMessage.path)
  2. Is it possible to flag unrecognized tags/items in the JSON schema to be validated? I know they are not strictly errors, but perhaps flag them as "warnings" (and put these warnings in the ValidationMessage)

ericbroda avatar Oct 27 '21 18:10 ericbroda

One more item which is very important: currently the code considers a JSON Schema to be valid even if there is an obvious error in the "$ref" value/pointer. Consider the following example below (note that the properties.messages.items.$ref points to a non-existent definition)... Can this be verified also?

{
  "$id": "https://example.com/arrays.schema.json",
  "description": "A representation of a person, company, organization, or place",
  "type": "object",
  "required": [ "messages" ],
  "properties": {
    "messages": {
      "type": "array",
      "items": { "$ref": "#/$defs/THIS-AN-INCORRECT-REFERENCE-AND-SHOULD-CAUSE-AN-ERROR" }
    }
  },
  "$defs": {
    "message": {
      "type": "object",
      "required": [ "key", "data" ],
      "properties": {
        "key": {
          "$ref": "#/$defs/key"
        },
        "data": {
          "$ref": "#/$defs/data"
        }
      }
    },
    "key": {
      "type": "string"
    },
    "data": {
      "type": "object",
      "required": [ "veggieName", "veggieLike" ],
      "properties": {
        "veggieName": {
          "type": "string",
          "description": "The name of the vegetable."
        },
        "veggieLike": {
          "type": "boolean",
          "description": "Do I like this vegetable?"
        }
      }
    }
  }
}

ericbroda avatar Oct 27 '21 18:10 ericbroda

A few more "errors" (or maybe "warnings") to try to validate:

  • invalid "$ref" to address when references do not resolve
  • invalid "required" item to address when a "required" field is not defined in the schema (warning?)
  • when an "array" type is missing an "items" field (warning?)
  • when an invalid or unsupported schema ("schema" field) is defined in the JSON schema
  • when an unrecognized tag (for example, "properties" or "type" or "$defs" is spelled incorrectly) is found (warning?)

ericbroda avatar Oct 29 '21 14:10 ericbroda

is there any progress on this issue? I was really interested to have a way to validate the schema itself either by default as part of getSchema api or with some config passed to it to indicate to validate json schema when its read. Also, manually downloading the meta schemas and validating against them doesnt seem to be long term approach anayways if schema standards change with next versions..

it would really help to have this feature.

Otherwise, I would like to know what can be done with following things today:

  1. I am using 1.0.57 schema validator
  2. I am using draft 7 schemas
  3. I would like to validate the schemas with meta schemas for draft 7. (https://json-schema.org/specification-links.html#draft-7) is meta schema specified in the specification enough to validate json schema itself?

It would be helpful to get info on which meta schema to use to validate draft 7 json schemas and does validate api work to validate json schema with meta schema and catches the errors like misplaces required, additionalProperties or illegal $ref etc.

palwe-prafulla avatar Nov 12 '21 19:11 palwe-prafulla

Not really a fix for this issue, but for 2019-09 draft I've combined all the individual meta-schema into a single file: https://gist.github.com/gareth-robinson/815239b36e397e484b77141b912d1b28, the same way the draft 7 schema was and that seems to work ok for validating a schema for me (though I've only done some basic testing) @palwe-prafulla, the meta-schema for draft 7 is also a single file so you should be able to use it to validate the fields in a schema. Note though if you have $ref fields in the schema you're validating this library won't recursively pull and validate those, since validating a schema requires more than validating plain JSON (as Eric has called out in his comments)

gareth-robinson avatar Nov 23 '21 09:11 gareth-robinson

@gareth-robinson I think your approach will work. We can put the combined meta schema into the resources folder so that it can be loaded.

stevehu avatar Nov 24 '21 20:11 stevehu

Hi Guys, thanks for the brilliant library and lots of effort to add new features.

We have plan to use validating a JSON Schema against formal draft JSON Schema meta data (2019-09) in our project. We were trying to find a library for this validation but most of them are complex and not customizable. It would be great to have JSON Schema meta data validation as built-in outstanding option of the library.

Quick question, since the branch https://github.com/networknt/json-schema-validator/tree/issue475 was created to fix some issues in the library. As far as I understand JSON Schema meta data validation (2019-09) cannot be used due to these issues.

Do you have plan to merge this branch and the fixes in master soon, for instance, in March or April this year?

Thank you.

dmitrydrugi avatar Feb 05 '22 09:02 dmitrydrugi

We still have some unsupported 2019-09 keywords and that is why we cannot use the meta schema that comes with it. The best option as described above is to manually resolve all the references and create a final meta schema to use from our resources folder. I would be happy to accept the PR if someone is taking the effort. Thanks.

stevehu avatar Feb 22 '22 01:02 stevehu

The bundled 2019-09 draft meta schema: https://gist.github.com/gareth-robinson/815239b36e397e484b77141b912d1b28, in https://github.com/networknt/json-schema-validator/issues/475#issuecomment-976306236 was very helpful for validating json schema against meta schema. Could someone provide a similar json for 2020-12 draft ?

Otherwise, whenever I do verification against meta schema, it gives me UnknownHostException www.https://json-schema.org, probably because we cant access URLs/URIs in the use case.

rishabh413 avatar Jun 18 '23 20:06 rishabh413

@rishabh413 Try adding a URITranslator that redirects the public resources to the embedded copies. We have embedded copies of Drafts 4, 6, 7, 2019-09 and 2021-12.

JsonSchemaFactory base = JsonSchemaFactory.getInstance(VersionFlag.SpecVersion.V202012);
JsonSchemaFactory factory = JsonSchemaFactory
    .builder(base)
    .addUriTranslator(URITranslator.prefix("https://json-schema.org", "resource:"))
    .build();
JsonSchema schema = factory.getSchema(/* my schema */);

fdutton avatar Jul 02 '23 23:07 fdutton

thanks @fdutton. It worked.

However, it's not giving any validation errors for below data which is invalid as per draft 2020-12 :

{  
  "type": "object",  
  "properties": {    
    "key": { 
      "title" : "My key", 
      "type": "blabla" 
    } 
  }
}

I am on release 1.0.84.

rishabh413 avatar Jul 03 '23 14:07 rishabh413

@rishabh413 We have several issues related to validating the schema itself for 2020-12 and 2019-09 due to the introduction of the vocabulary keyword (look for the issues tagged vocabulary). I just committed a change that partially fixes this for 2019-09 but have yet to fix 2020-12.

fdutton avatar Jul 03 '23 14:07 fdutton

Thanks! Will wait for a fix for 2020-12.

rishabh413 avatar Jul 03 '23 16:07 rishabh413

Thanks @justin-tay for fixing this in https://github.com/networknt/json-schema-validator/pull/931

https://github.com/networknt/json-schema-validator/issues/475#issuecomment-1618381541 now gives me below errors:

  • Json schema: $.properties.key.type: does not have a value in the enumeration [array, boolean, integer, null, number, object, string]
  • Json schema: $.properties.key.type: string found, array expected

I think second error is not correct. PTAL

rishabh413 avatar Mar 11 '24 01:03 rishabh413

The error is correct. If the failure is in an anyOf all the error messages will be returned since only if none matches will there be an assertion.

The following is the error message with the hierarchical output format.

{
  "valid" : false,
  "evaluationPath" : "",
  "schemaLocation" : "https://json-schema.org/draft/2020-12/schema#",
  "instanceLocation" : "",
  "details" : [ {
    "valid" : false,
    "evaluationPath" : "/allOf/1/$ref/properties/properties/additionalProperties/$dynamicRef/allOf/3/$ref/properties/type/anyOf/0/$ref",
    "schemaLocation" : "https://json-schema.org/draft/2020-12/meta/validation#/$defs/simpleTypes",
    "instanceLocation" : "/properties/key/type",
    "errors" : {
      "enum" : "does not have a value in the enumeration [array, boolean, integer, null, number, object, string]"
    }
  }, {
    "valid" : false,
    "evaluationPath" : "/allOf/1/$ref/properties/properties/additionalProperties/$dynamicRef/allOf/3/$ref/properties/type/anyOf/1",
    "schemaLocation" : "https://json-schema.org/draft/2020-12/meta/validation#/properties/type/anyOf/1",
    "instanceLocation" : "/properties/key/type",
    "errors" : {
      "type" : "string found, array expected"
    }
  } ]
}

The following in an excerpt of the schema that fails.

{
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "$id": "https://json-schema.org/draft/2020-12/meta/validation",
    "$dynamicAnchor": "meta",

    "title": "Validation vocabulary meta-schema",
    "type": ["object", "boolean"],
    "properties": {
        "type": {
            "anyOf": [
                { "$ref": "#/$defs/simpleTypes" },
                {
                    "type": "array",
                    "items": { "$ref": "#/$defs/simpleTypes" },
                    "minItems": 1,
                    "uniqueItems": true
                }
            ]
        },
...
}

The second error corresponds to

                {
                    "type": "array",
                    "items": { "$ref": "#/$defs/simpleTypes" },
                    "minItems": 1,
                    "uniqueItems": true
                }

justin-tay avatar Mar 11 '24 02:03 justin-tay

Agreed! Thanks for the explanation

This issue can be closed from my side.

rishabh413 avatar Mar 11 '24 02:03 rishabh413

@rishabh413 Thanks a lot for your confirmation. Closing this issue.

stevehu avatar Mar 11 '24 02:03 stevehu