build_regex_from_schema does not throw an error for invalid JSON schema
Describe the issue as clearly as possible:
I operate some vLLM deployments, and we've run into some requests that hog cpu resources and don't release them. We've identified that these requests are attempting use outlines for guided decoding with a complex json schema, and it appears that the RegexGuide.from_regex call never finishes for these requests.
I've attached a small reproduction script with a redacted version of the schema from these requests. I haven't dug into the outlines code to try to figure out why this is happening. I hope it's fixed by outlines-core 0.2.x but I see that hasn't been integrated into a release here yet.
edit: The problem seems to be due to an invalid schema that build_regex_from_schema is not rejecting
Steps/code to reproduce the bug:
import outlines
from outlines_core.fsm.json_schema import build_regex_from_schema
from outlines.fsm.guide import RegexGuide
import json
from transformers import AutoTokenizer
tk = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
outlines_tokenizer = outlines.models.TransformerTokenizer(
tk
)
redacted_guided_json = {
"properties": {
"redacted_52": {
"default": [],
"description": "REDACTED",
"items": {
"discriminator": {
"mapping": {
"redacted_191": "#/$defs/Object11",
"redacted_4": "#/$defs/Object10",
"redacted_40": "#/$defs/Object2",
"redacted_142": "#/$defs/Object3",
"redacted_192": "#/$defs/Object14",
"redacted_152": "#/$defs/Object9",
"redacted_167": "#/$defs/Object8",
"redacted_213": "#/$defs/Object4",
"redacted_91": "#/$defs/Object7",
"redacted_224": "#/$defs/Object6",
"redacted_43": "#/$defs/Object5",
"redacted_115": "#/$defs/Object12",
"redacted_93": "#/$defs/Object1",
"redacted_90": "#/$defs/Object13"
},
"propertyName": "redacted_10"
},
"oneOf": [
{
"properties": {
"redacted_10": {
"const": "redacted_93",
"default": "redacted_93",
"description": "REDACTED",
"enum": [
"redacted_93"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object1",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_40",
"default": "redacted_40",
"description": "REDACTED",
"enum": [
"redacted_40"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object2",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_142",
"default": "redacted_142",
"description": "REDACTED",
"enum": [
"redacted_142"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object3",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_213",
"default": "redacted_213",
"description": "REDACTED",
"enum": [
"redacted_213"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object4",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_43",
"default": "redacted_43",
"description": "REDACTED",
"enum": [
"redacted_43"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object5",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_224",
"default": "redacted_224",
"description": "REDACTED",
"enum": [
"redacted_224"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object6",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_91",
"default": "redacted_91",
"description": "REDACTED",
"enum": [
"redacted_91"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object7",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_167",
"default": "redacted_167",
"description": "REDACTED",
"enum": [
"redacted_167"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object8",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_152",
"default": "redacted_152",
"description": "REDACTED",
"enum": [
"redacted_152"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object9",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_4",
"default": "redacted_4",
"description": "REDACTED",
"enum": [
"redacted_4"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object10",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_191",
"default": "redacted_191",
"description": "REDACTED",
"enum": [
"redacted_191"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object11",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_115",
"default": "redacted_115",
"description": "REDACTED",
"enum": [
"redacted_115"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object12",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_90",
"default": "redacted_90",
"description": "REDACTED",
"enum": [
"redacted_90"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object13",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_192",
"default": "redacted_192",
"description": "REDACTED",
"enum": [
"redacted_192"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object14",
"type": "object"
}
]
},
"required": True,
"type": "array"
},
"redacted_41": {
"description": "REDACTED",
"pattern": "^\\d{4}-\\d{2}-\\d{2}$",
"required": True,
"type": "string"
},
"redacted_204": {
"anyOf": [
{
"items": {
"description": "REDACTED",
"properties": {
"redacted_191": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED",
"title": "Object15"
},
"redacted_4": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED",
"title": "Object16"
},
"redacted_142": {
"anyOf": [
{
"description": "REDACTED",
"enum": [
"(not set)",
"redacted_111",
"redacted_211",
"redacted_161",
"redacted_171",
"redacted_42",
"redacted_141",
"redacted_168",
"redacted_202",
"redacted_242",
"redacted_220",
"redacted_172",
"redacted_221",
"redacted_145",
"redacted_190",
"redacted_123",
"redacted_114",
"redacted_55",
"redacted_210",
"redacted_147",
"redacted_229",
"redacted_21",
"redacted_179",
"redacted_112",
"redacted_206",
"redacted_125",
"redacted_205",
"redacted_99",
"redacted_185",
"redacted_102",
"redacted_182",
"redacted_136",
"redacted_29",
"redacted_129",
"redacted_15",
"redacted_78",
"redacted_163",
"redacted_214",
"redacted_238",
"redacted_53",
"redacted_120",
"redacted_173",
"redacted_9",
"redacted_95",
"redacted_151",
"redacted_134",
"redacted_219",
"redacted_223",
"redacted_235",
"redacted_107",
"redacted_25",
"redacted_97",
"redacted_2",
"redacted_156",
"redacted_227",
"redacted_83",
"redacted_75",
"redacted_105",
"redacted_72",
"redacted_183",
"redacted_130",
"redacted_39",
"redacted_174",
"redacted_23",
"redacted_193",
"redacted_36",
"redacted_113",
"redacted_117",
"redacted_154",
"redacted_20",
"redacted_148",
"redacted_66",
"redacted_94",
"redacted_0",
"redacted_189",
"redacted_77",
"redacted_26",
"redacted_54",
"redacted_118",
"redacted_150",
"redacted_121",
"redacted_51",
"redacted_143",
"redacted_245",
"redacted_109",
"redacted_60",
"redacted_6",
"redacted_119",
"redacted_71",
"redacted_203",
"redacted_230",
"redacted_131",
"redacted_63",
"redacted_57",
"redacted_64",
"redacted_82",
"redacted_155",
"redacted_212",
"redacted_184",
"redacted_86",
"redacted_231",
"redacted_16",
"redacted_137",
"redacted_169",
"redacted_17",
"redacted_176",
"redacted_38",
"redacted_218",
"redacted_196",
"redacted_122",
"redacted_100",
"redacted_158",
"redacted_124",
"redacted_197",
"redacted_133",
"redacted_61",
"redacted_228",
"redacted_24",
"redacted_68",
"redacted_84",
"redacted_170",
"redacted_108",
"redacted_70",
"redacted_241",
"redacted_103",
"redacted_232",
"redacted_104",
"redacted_76",
"redacted_67",
"redacted_222",
"redacted_79",
"redacted_110",
"redacted_46",
"redacted_7",
"redacted_217",
"redacted_233"
],
"title": "Object17",
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_192": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED",
"title": "Object18"
},
"redacted_152": {
"anyOf": [
{
"description": "REDACTED",
"enum": [
"(not set)",
"redacted_181",
"redacted_177",
"redacted_101",
"redacted_200",
"redacted_92",
"redacted_14",
"redacted_56",
"redacted_138",
"redacted_157",
"redacted_187",
"redacted_3",
"redacted_62",
"redacted_243",
],
"title": "Object19",
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_167": {
"anyOf": [
{
"description": "REDACTED",
"enum": [
"redacted_12",
"redacted_201",
"redacted_199",
"redacted_225",
"(not set)"
],
"title": "Object20",
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_213": {
"anyOf": [
{
"description": "REDACTED",
"enum": [
"redacted_12",
"redacted_234",
"redacted_27",
"redacted_81",
"redacted_96",
"redacted_207",
"redacted_236",
"redacted_140",
"redacted_31",
"redacted_180",
"redacted_32",
"redacted_162",
"redacted_58",
"redacted_98",
"redacted_69",
"redacted_166",
"redacted_87",
"redacted_106",
"redacted_74",
"redacted_194",
"redacted_28",
"redacted_30",
"redacted_209",
"redacted_132",
"redacted_160",
"redacted_146",
"redacted_19",
"redacted_65",
"redacted_44",
"redacted_48",
"redacted_116",
"redacted_215",
"redacted_126",
"redacted_37",
"redacted_45",
"redacted_33"
],
"title": "Object21",
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_91": {
"anyOf": [
{
"description": "REDACTED",
"enum": [
"redacted_144",
"redacted_135",
"redacted_13",
"redacted_164",
"redacted_73",
"redacted_50",
"redacted_195",
"redacted_216",
"redacted_186"
],
"title": "Object22",
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_224": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED",
"title": "Object23"
},
"redacted_43": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED",
"title": "Object24"
}
},
"title": "Object25",
"type": "object"
},
"type": "array"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_127": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_178": {
"default": [
"redacted_244"
],
"description": "REDACTED",
"items": {
"description": "REDACTED",
"enum": [
"redacted_244",
"redacted_188",
"redacted_80",
"redacted_226",
"redacted_1"
],
"type": "string"
},
"required": True,
"type": "array"
},
"redacted_175": {
"anyOf": [
{
"items": {
"enum": [
"redacted_59",
"redacted_22",
"redacted_128",
"redacted_153",
"redacted_85",
"redacted_18",
"redacted_49",
"redacted_88",
"redacted_5",
"redacted_8"
],
"title": "Object26",
"type": "string"
},
"type": "array"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_165": {
"description": "REDACTED",
"pattern": "^\\d{4}-\\d{2}-\\d{2}$",
"required": True,
"type": "string"
}
},
"required": [
"redacted_165",
"redacted_41"
],
"type": "object"
}
regex = build_regex_from_schema(json.dumps(redacted_guided_json))
guide = RegexGuide.from_regex(regex, outlines_tokenizer)
Expected result:
build_regex_from_schema should raise an error indicating what's wrong with the schema
Error message:
There is no error, the script just keeps running. I've seen this go for over an hour.
Outlines/Python version information:
Version information
Context for the issue:
Having a vector for a user to supply a request that will take up resources indefinitely is no fun :(
Because there's no async api for building these guides, it is also hard to manage a timeout and cancellation.
There's a few issues here.
First, your schema is not correct, as you use required: True in the object rather than setting it at the top level using
redacted_guided_json = {
"type": "object",
"required": [
"redacted_165",
"redacted_41",
"redacted_52",
"redacted_178"
],
# . . .
I'll stick the full correct schema down below.
The second issue is that this should be throwing an error. I'm still waiting for it to compile. build_regex_from_schema should be throwing an error because of the incorrect schema, but it is not for some reason.
I was able to get this to compile in 80-90ms using our internal API. Email me at [email protected] if you want to chat about it.
EDIT: Compilation finished, about 65 minutes.
The corrected JSON schema
redacted_guided_json = {
"type": "object",
"required": [
"redacted_165",
"redacted_41",
"redacted_52",
"redacted_178"
],
"properties": {
"redacted_52": {
"default": [],
"description": "REDACTED",
"items": {
"discriminator": {
"mapping": {
"redacted_191": "#/$defs/Object11",
"redacted_4": "#/$defs/Object10",
"redacted_40": "#/$defs/Object2",
"redacted_142": "#/$defs/Object3",
"redacted_192": "#/$defs/Object14",
"redacted_152": "#/$defs/Object9",
"redacted_167": "#/$defs/Object8",
"redacted_213": "#/$defs/Object4",
"redacted_91": "#/$defs/Object7",
"redacted_224": "#/$defs/Object6",
"redacted_43": "#/$defs/Object5",
"redacted_115": "#/$defs/Object12",
"redacted_93": "#/$defs/Object1",
"redacted_90": "#/$defs/Object13"
},
"propertyName": "redacted_10"
},
"oneOf": [
{
"properties": {
"redacted_10": {
"const": "redacted_93",
"default": "redacted_93",
"description": "REDACTED",
"enum": [
"redacted_93"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object1",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_40",
"default": "redacted_40",
"description": "REDACTED",
"enum": [
"redacted_40"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object2",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_142",
"default": "redacted_142",
"description": "REDACTED",
"enum": [
"redacted_142"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object3",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_213",
"default": "redacted_213",
"description": "REDACTED",
"enum": [
"redacted_213"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object4",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_43",
"default": "redacted_43",
"description": "REDACTED",
"enum": [
"redacted_43"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object5",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_224",
"default": "redacted_224",
"description": "REDACTED",
"enum": [
"redacted_224"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object6",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_91",
"default": "redacted_91",
"description": "REDACTED",
"enum": [
"redacted_91"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object7",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_167",
"default": "redacted_167",
"description": "REDACTED",
"enum": [
"redacted_167"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object8",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_152",
"default": "redacted_152",
"description": "REDACTED",
"enum": [
"redacted_152"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object9",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_4",
"default": "redacted_4",
"description": "REDACTED",
"enum": [
"redacted_4"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object10",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_191",
"default": "redacted_191",
"description": "REDACTED",
"enum": [
"redacted_191"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object11",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_115",
"default": "redacted_115",
"description": "REDACTED",
"enum": [
"redacted_115"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object12",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_90",
"default": "redacted_90",
"description": "REDACTED",
"enum": [
"redacted_90"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object13",
"type": "object"
},
{
"properties": {
"redacted_10": {
"const": "redacted_192",
"default": "redacted_192",
"description": "REDACTED",
"enum": [
"redacted_192"
],
"title": "Object0",
"type": "string"
}
},
"title": "Object14",
"type": "object"
}
]
},
"type": "array"
},
"redacted_41": {
"description": "REDACTED",
"pattern": "^\\d{4}-\\d{2}-\\d{2}$",
"type": "string"
},
"redacted_204": {
"anyOf": [
{
"items": {
"description": "REDACTED",
"properties": {
"redacted_191": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED",
"title": "Object15"
},
"redacted_4": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED",
"title": "Object16"
},
"redacted_142": {
"anyOf": [
{
"description": "REDACTED",
"enum": [
"(not set)",
"redacted_111",
"redacted_211",
"redacted_161",
"redacted_171",
"redacted_42",
"redacted_141",
"redacted_168",
"redacted_202",
"redacted_242",
"redacted_220",
"redacted_172",
"redacted_221",
"redacted_145",
"redacted_190",
"redacted_123",
"redacted_114",
"redacted_55",
"redacted_210",
"redacted_147",
"redacted_229",
"redacted_21",
"redacted_179",
"redacted_112",
"redacted_206",
"redacted_125",
"redacted_205",
"redacted_99",
"redacted_185",
"redacted_102",
"redacted_182",
"redacted_136",
"redacted_29",
"redacted_129",
"redacted_15",
"redacted_78",
"redacted_163",
"redacted_214",
"redacted_238",
"redacted_53",
"redacted_120",
"redacted_173",
"redacted_9",
"redacted_95",
"redacted_151",
"redacted_134",
"redacted_219",
"redacted_223",
"redacted_235",
"redacted_107",
"redacted_25",
"redacted_97",
"redacted_2",
"redacted_156",
"redacted_227",
"redacted_83",
"redacted_75",
"redacted_105",
"redacted_72",
"redacted_183",
"redacted_130",
"redacted_39",
"redacted_174",
"redacted_23",
"redacted_193",
"redacted_36",
"redacted_113",
"redacted_117",
"redacted_154",
"redacted_20",
"redacted_148",
"redacted_66",
"redacted_94",
"redacted_0",
"redacted_189",
"redacted_77",
"redacted_26",
"redacted_54",
"redacted_118",
"redacted_150",
"redacted_121",
"redacted_51",
"redacted_143",
"redacted_245",
"redacted_109",
"redacted_60",
"redacted_6",
"redacted_119",
"redacted_71",
"redacted_203",
"redacted_230",
"redacted_131",
"redacted_63",
"redacted_57",
"redacted_64",
"redacted_82",
"redacted_155",
"redacted_212",
"redacted_184",
"redacted_86",
"redacted_231",
"redacted_16",
"redacted_137",
"redacted_169",
"redacted_17",
"redacted_176",
"redacted_38",
"redacted_218",
"redacted_196",
"redacted_122",
"redacted_100",
"redacted_158",
"redacted_124",
"redacted_197",
"redacted_133",
"redacted_61",
"redacted_228",
"redacted_24",
"redacted_68",
"redacted_84",
"redacted_170",
"redacted_108",
"redacted_70",
"redacted_241",
"redacted_103",
"redacted_232",
"redacted_104",
"redacted_76",
"redacted_67",
"redacted_222",
"redacted_79",
"redacted_110",
"redacted_46",
"redacted_7",
"redacted_217",
"redacted_233"
],
"title": "Object17",
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_192": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED",
"title": "Object18"
},
"redacted_152": {
"anyOf": [
{
"description": "REDACTED",
"enum": [
"(not set)",
"redacted_181",
"redacted_177",
"redacted_101",
"redacted_200",
"redacted_92",
"redacted_14",
"redacted_56",
"redacted_138",
"redacted_157",
"redacted_187",
"redacted_3",
"redacted_62",
"redacted_243",
],
"title": "Object19",
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_167": {
"anyOf": [
{
"description": "REDACTED",
"enum": [
"redacted_12",
"redacted_201",
"redacted_199",
"redacted_225",
"(not set)"
],
"title": "Object20",
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_213": {
"anyOf": [
{
"description": "REDACTED",
"enum": [
"redacted_12",
"redacted_234",
"redacted_27",
"redacted_81",
"redacted_96",
"redacted_207",
"redacted_236",
"redacted_140",
"redacted_31",
"redacted_180",
"redacted_32",
"redacted_162",
"redacted_58",
"redacted_98",
"redacted_69",
"redacted_166",
"redacted_87",
"redacted_106",
"redacted_74",
"redacted_194",
"redacted_28",
"redacted_30",
"redacted_209",
"redacted_132",
"redacted_160",
"redacted_146",
"redacted_19",
"redacted_65",
"redacted_44",
"redacted_48",
"redacted_116",
"redacted_215",
"redacted_126",
"redacted_37",
"redacted_45",
"redacted_33"
],
"title": "Object21",
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_91": {
"anyOf": [
{
"description": "REDACTED",
"enum": [
"redacted_144",
"redacted_135",
"redacted_13",
"redacted_164",
"redacted_73",
"redacted_50",
"redacted_195",
"redacted_216",
"redacted_186"
],
"title": "Object22",
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_224": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED",
"title": "Object23"
},
"redacted_43": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED",
"title": "Object24"
}
},
"title": "Object25",
"type": "object"
},
"type": "array"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_127": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_178": {
"default": [
"redacted_244"
],
"description": "REDACTED",
"items": {
"description": "REDACTED",
"enum": [
"redacted_244",
"redacted_188",
"redacted_80",
"redacted_226",
"redacted_1"
],
"type": "string"
},
"type": "array"
},
"redacted_175": {
"anyOf": [
{
"items": {
"enum": [
"redacted_59",
"redacted_22",
"redacted_128",
"redacted_153",
"redacted_85",
"redacted_18",
"redacted_49",
"redacted_88",
"redacted_5",
"redacted_8"
],
"title": "Object26",
"type": "string"
},
"type": "array"
},
{
"type": "null"
}
],
"default": None,
"description": "REDACTED"
},
"redacted_165": {
"description": "REDACTED",
"pattern": "^\\d{4}-\\d{2}-\\d{2}$",
"type": "string"
}
}
}
First, your schema is not correct, as you use required: True in the object rather than setting it at the top level
Hah, yeah I'm no json schema expert and was just assuming this was valid because outlines wasn't throwing an error 🤦. Thanks for pointing that out!
It would be great to have that validation be done in build_regex_from_schema, but in the meantime I can get back to our product / the user with this info as well
Of course! Happy to help. We might wish to change the title of this issue to something like
build_regex_from_schemadoes not throw an error for invalid JSON schema
done!
Moving this issue to outlines-core as this is where schema validation is handled.