syft
syft copied to clipboard
wip: 4184 pt2 oci model support
Description
This PR follows up on #4279 by adding support for a new docker source ocimodelsource (naming pending 😄)
With this change users can do the following: syft -o json docker.io/ai/qwen3-vl | jq .:
They'll get an SBOM with a single package showing the gguf model and details for the model pulled from https://hub.docker.com/u/ai
Example of metadata extracted
"metadata": {
"modelFormat": "gguf",
"modelName": "Qwen3-Vl-8B-Instruct",
"modelVersion": "unknown",
"hash": "321c13d3e93151b5",
"license": "apache-2.0",
"ggufVersion": 3,
"architecture": "qwen3vl",
"quantization": "Q4_K_M",
"parameters": 8190735360,
"tensorCount": 399,
"header": {
"general.base_model.0.name": "Qwen3 VL 8B Instruct",
"general.base_model.0.organization": "Qwen",
"general.base_model.0.repo_url": "https://huggingface.co/Qwen/Qwen3-VL-8B-Instruct",
"general.base_model.count": 1,
"general.basename": "Qwen3-Vl-8B-Instruct",
"general.file_type": 15,
"general.finetune": "Instruct",
"general.quantization_version": 2,
"general.quantized_by": "Unsloth",
"general.repo_url": "https://huggingface.co/unsloth",
"general.size_label": "8B",
"general.tags": {
"type": 8,
"len": 2,
"startOffset": 741,
"size": 41
},
"general.type": "model",
"quantize.imatrix.chunks_count": 694,
"quantize.imatrix.dataset": "unsloth_calibration_Qwen3-VL-8B-Instruct.txt",
"quantize.imatrix.entries_count": 252,
"quantize.imatrix.file": "Qwen3-VL-8B-Instruct-GGUF/imatrix_unsloth.gguf",
"qwen3vl.attention.head_count": 32,
"qwen3vl.attention.head_count_kv": 8,
"qwen3vl.attention.key_length": 128,
"qwen3vl.attention.layer_norm_rms_epsilon": 0.000001,
"qwen3vl.attention.value_length": 128,
"qwen3vl.block_count": 36,
"qwen3vl.context_length": 262144,
"qwen3vl.embedding_length": 4096,
"qwen3vl.feed_forward_length": 12288,
"qwen3vl.n_deepstack_layers": 3,
"qwen3vl.rope.dimension_sections": {
"type": 5,
"len": 4,
"startOffset": 1268,
"size": 16
},
"qwen3vl.rope.freq_base": 5000000,
"tokenizer.ggml.add_bos_token": false,
"tokenizer.ggml.bos_token_id": 151643,
"tokenizer.ggml.eos_token_id": 151645,
"tokenizer.ggml.merges": {
"type": 8,
"len": 151387,
"startOffset": 3197544,
"size": 2731548
},
A larger google doc is being put together to go over the choices made in this PR and changes we need to make so that pt1/pt2 are working together as intended
Type of change
- [ ] New feature (non-breaking change which adds functionality)
Checklist:
- [ ] I have tested my code in common scenarios and confirmed there are no regressions
- [ ] I have added comments to my code, particularly in hard-to-understand sections