SHARK
SHARK copied to clipboard
Hires fix does not work.
I tried using the hires fix, but it does not work. Here is the error that I get:
Traceback (most recent call last): File "gradio\routes.py", line 488, in run_predict File "gradio\blocks.py", line 1431, in process_api File "gradio\blocks.py", line 1117, in call_function File "gradio\utils.py", line 350, in async_iteration File "gradio\utils.py", line 343, in anext File "anyio\to_thread.py", line 33, in run_sync File "anyio_backends_asyncio.py", line 2101, in run_sync_in_worker_thread File "anyio_backends_asyncio.py", line 828, in run File "gradio\utils.py", line 326, in run_sync_iterator_async File "gradio\utils.py", line 695, in gen_wrapper File "ui\txt2img_ui.py", line 195, in txt2img_inf File "apps\stable_diffusion\src\pipelines\pipeline_shark_stable_diffusion_txt2img.py", line 134, in generate_images File "apps\stable_diffusion\src\pipelines\pipeline_shark_stable_diffusion_utils.py", line 235, in produce_img_latents File "apps\stable_diffusion\src\pipelines\pipeline_shark_stable_diffusion_utils.py", line 114, in load_unet File "apps\stable_diffusion\src\models\model_wrappers.py", line 855, in unet File "apps\stable_diffusion\src\models\model_wrappers.py", line 818, in unet File "apps\stable_diffusion\src\models\model_wrappers.py", line 778, in compile_unet_variants File "apps\stable_diffusion\src\models\model_wrappers.py", line 619, in get_unet File "apps\stable_diffusion\src\utils\utils.py", line 178, in compile_through_fx File "apps\stable_diffusion\src\utils\utils.py", line 80, in _compile_module File "shark\shark_inference.py", line 198, in save_module return export_iree_module_to_vmfb( ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "shark\iree_utils\compile_utils.py", line 491, in export_iree_module_to_vmfb flatbuffer_blob = compile_module_to_flatbuffer( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "shark\iree_utils\compile_utils.py", line 317, in compile_module_to_flatbuffer flatbuffer_blob = ireec.compile_str( ^^^^^^^^^^^^^^^^^^ File "iree\compiler\tools\core.py", line 300, in compile_str File "iree\compiler\tools\binaries.py", line 199, in invoke_immediate SystemExit: Error invoking IREE compiler tool iree-compile.exe Diagnostics: <eval_with_key>.13 from torch\fx\experimental\proxy_tensor.py:477 in wrapped:39:15: error: failed to legalize operation 'arith.constant' <eval_with_key>.13 from torch\fx\experimental\proxy_tensor.py:477 in wrapped:39:15: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, DotProduct, DotProductInputAll, DotProductInput4x8BitPacked, DotProductInput4x8Bit, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_integer_dot_product, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 65536, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 1024], subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>}> <eval_with_key>.13 from torch\fx\experimental\proxy_tensor.py:477 in wrapped:39:15: error: failed to serialize executables <eval_with_key>.13 from torch\fx\experimental\proxy_tensor.py:477 in wrapped:41:15: error: failed to legalize operation 'arith.constant' <eval_with_key>.13 from torch\fx\experimental\proxy_tensor.py:477 in wrapped:41:15: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"vulkan", "vulkan-spirv-fb", {spirv.target_env = #spirv.target_env<#spirv.vce<v1.6, [Shader, Float64, Float16, Int64, Int16, Int8, StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16, StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8, GroupNonUniform, GroupNonUniformVote, GroupNonUniformArithmetic, GroupNonUniformBallot, GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformClustered, GroupNonUniformQuad, VariablePointers, VariablePointersStorageBuffer, DotProduct, DotProductInputAll, DotProductInput4x8BitPacked, DotProductInput4x8Bit, CooperativeMatrixNV], [SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_integer_dot_product, SPV_KHR_storage_buffer_storage_class, SPV_KHR_variable_pointers, SPV_NV_cooperative_matrix]>, api=Vulkan, AMD:DiscreteGPU, #spirv.resource_limits<max_compute_shared_memory_size = 65536, max_compute_workgroup_invocations = 1024, max_compute_workgroup_size = [1024, 1024, 1024], subgroup_size = 64, min_subgroup_size = 32, max_subgroup_size = 64, cooperative_matrix_properties_nv = [#spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = i8, b_type = i8, c_type = i32, result_type = i32, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f16, result_type = f16, scope = <Subgroup>>, #spirv.coop_matrix_props<m_size = 16, n_size = 16, k_size = 16, a_type = f16, b_type = f16, c_type = f32, result_type = f32, scope = <Subgroup>>]>>}> <eval_with_key>.13 from torch\fx\experimental\proxy_tensor.py:477 in wrapped:41:15: error: failed to serialize executables
Invoked with: iree-compile.exe C:\Users\xxxx\AppData\Local\Temp_MEI300922\iree\compiler\tools.._mlir_libs\iree-compile.exe - --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host --iree-vulkan-target-env=#vk.target_env<v1.3, r(120), [VK_KHR_16bit_storage, VK_KHR_8bit_storage, VK_KHR_shader_float16_int8, VK_KHR_spirv_1_4, VK_KHR_storage_buffer_storage_class, VK_KHR_variable_pointers, VK_EXT_subgroup_size_control, VK_NV_cooperative_matrix], AMD:DiscreteGPU, #vk.caps< maxComputeSharedMemorySize = 65536, maxComputeWorkGroupInvocations = 1024, maxComputeWorkGroupSize = dense<[1024, 1024, 1024]>: vector<3xi32>, subgroupSize = 64, subgroupFeatures = 255: i32, minSubgroupSize = 32, maxSubgroupSize = 64, shaderFloat16 = unit, shaderFloat64 = unit, shaderInt8 = unit, shaderInt16 = unit, shaderInt64 = unit, storageBuffer16BitAccess = unit, storagePushConstant16 = unit, uniformAndStorageBuffer16BitAccess = unit, storageBuffer8BitAccess = unit, storagePushConstant8 = unit, uniformAndStorageBuffer8BitAccess = unit, variablePointers = unit, variablePointersStorageBuffer = unit, cooperativeMatrixPropertiesNV = [#vk.coop_matrix_props<mSize = 16, nSize = 16, kSize = 16, aType = f16, bType = f16, cType = f16, resultType = f16, scope = #vk.scope<Subgroup>>], shaderIntegerDotProduct = unit >> --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=true --verify=false -iree-vulkan-target-triple=rdna3-7900-windows
Need more information? Set IREE_SAVE_TEMPS=/some/dir in your environment to save all artifacts and reproducers.
This compile error has been fixed upstream in SRT, but I ran into the following runtime error with the euler_scale_model_input
submodel:
Traceback (most recent call last):
File "gradio\queueing.py", line 388, in call_prediction
File "gradio\route_utils.py", line 219, in call_process_api
File "gradio\blocks.py", line 1437, in process_api
File "gradio\blocks.py", line 1123, in call_function
File "gradio\utils.py", line 503, in async_iteration
File "gradio\utils.py", line 496, in __anext__
File "anyio\to_thread.py", line 33, in run_sync
File "anyio\_backends\_asyncio.py", line 877, in run_sync_in_worker_thread
File "anyio\_backends\_asyncio.py", line 807, in run
File "gradio\utils.py", line 479, in run_sync_iterator_async
File "gradio\utils.py", line 629, in gen_wrapper
File "ui\txt2img_ui.py", line 266, in txt2img_inf
File "apps\stable_diffusion\src\pipelines\pipeline_shark_stable_diffusion_img2img.py", line 216, in generate_images
File "apps\stable_diffusion\src\pipelines\pipeline_shark_stable_diffusion_utils.py", line 241, in produce_img_latents
File "apps\stable_diffusion\src\schedulers\shark_eulerdiscrete.py", line 135, in scale_model_input
File "shark\shark_inference.py", line 142, in __call__
return self.shark_runner.run(function_name, inputs, send_to_host)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "shark\shark_runner.py", line 101, in run
return get_results(
^^^^^^^^^^^^
File "shark\iree_utils\compile_utils.py", line 538, in get_results
result = compiled_vm[function_name](*device_inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "iree\runtime\function.py", line 137, in __call__
File "iree\runtime\function.py", line 162, in _invoke
ValueError: Error invoking function: C:\actions-runner\w\SRT\SRT\c\runtime\src\iree\modules\hal\utils\buffer_diagnostics.c:225: INVALID_ARGUMENT; input 0 shape dimension 2 mismatch; expected 64 but have 96; expected shape `1x4x64x64`, actual shape `1x4x96x96`; while invoking native function hal.buffer_view.assert; while calling import;
[ 1] native hal.buffer_view.assert:0 -
[ 0] bytecode module@0:236 -
ran into the following runtime error with the
euler_scale_model_input
submodel:
Yea, I noticed that too. It would work fine with 512x512, but 768x768 would break. DEIS and some others seemed to work.
Having this issue too, on AMD. Is there a workaround/fix?