Implement chunked image uploads
Currently images are uploaded in one chunk (one request per image).
But as some images can be really large, and many connections can be unstable, we could improve this by uploading individual files in chunks, just like we do for task imports.
Cool! I thought it was implemented !
Could this solve the issue (never reported) I have with .upload files in webodm app media tmp folder never deleted when uploading from api in chunks with partial task ? Note: I regulary upload 200 Gb or more which raise disk filling issue > double files in tmp/tmp*.upload and webodm project
import requests
import sys
import os
import json
import time
import settings
CHUNK_SIZE = 10 # Upload 50 images per batch
RETRY_DELAY = 5 # Retry delay in seconds
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <path_to_images> [additional_paths...]")
sys.exit(1)
# Function to list images efficiently
def get_image_files(directory, extensions=(".tif", ".tiff", ".png")):
return [
os.path.join(root, file)
for root, _, files in os.walk(directory)
for file in files
if file.lower().endswith(extensions)
]
# Collect images from all provided directories
all_images = []
for path in sys.argv[1:]:
all_images.extend(get_image_files(path))
if len(all_images) < 2:
print("Need at least 2 images")
sys.exit(1)
print(f"Found {len(all_images)} images.")
# Authenticate with WebODM
res = requests.post(
settings.SERVER + "/api/token-auth/",
data={"username": settings.USERNAME, "password": settings.PASSWORD}
).json()
if "token" not in res:
print("Invalid credentials!")
sys.exit(1)
print("Logged-in!")
token = res["token"]
# Create a new WebODM project
project_name = f"PyWebODM - {os.path.basename(sys.argv[1])}"
res = requests.post(
settings.SERVER + "/api/projects/",
headers={"Authorization": f"JWT {token}"},
data={"name": project_name}
).json()
if "id" not in res:
print(f"Cannot create project: {res}")
sys.exit(1)
project_id = res["id"]
print(f"Created project: {project_name} (ID: {project_id})")
# Define processing options
options = json.dumps([
{"name": "auto-boundary", "value": True},
{"name": "radiometric-calibration", "value": "camera+sun"},
{"name": "optimize-disk-space", "value": True},
{"name": "crop", "value": 1},
{"name": "dem-decimation", "value": 1},
{"name": "dsm", "value": True},
{"name": "max-concurrency", "value": 32}
])
# **Step 1: Create a Partial Task**
print("Creating a partial task...")
res = requests.post(
settings.SERVER + f"/api/projects/{project_id}/tasks/",
headers={"Authorization": f"JWT {token}"},
data={"partial": "true", "options": options}
).json()
if "id" not in res:
print(f"Failed to create partial task: {res}")
sys.exit(1)
task_id = res["id"]
print(f"Partial task created: {task_id}")
# **Step 2: Upload images in chunks**
for i in range(0, len(all_images), CHUNK_SIZE):
chunk = all_images[i:i + CHUNK_SIZE]
print(f"Uploading images {i + 1} to {i + len(chunk)}...")
files = [("images", (os.path.basename(img), open(img, "rb"), "image/tiff")) for img in chunk]
retries = 3
while retries > 0:
try:
res = requests.post(
settings.SERVER + f"/api/projects/{project_id}/tasks/{task_id}/upload/",
headers={"Authorization": f"JWT {token}"},
files=files
)
res_json = res.json()
if "id" in res_json or res_json["success"] == True:
print(f"Uploaded {len(chunk)} images successfully.")
break # Success, move to next batch
else:
# if res_json["success"] == True:
# print(f"Uploaded {len(chunk)} images successfully.")
# break
# else:
print(f"Error uploading images: {res_json}")
retries -= 1
time.sleep(RETRY_DELAY)
except json.JSONDecodeError:
print("Warning: WebODM did not return JSON. Retrying...")
retries -= 1
time.sleep(RETRY_DELAY)
finally:
for _, file_tuple in files:
file_tuple[1].close() # Closing file descriptors
if retries == 0:
print("Failed to upload images after multiple attempts.")
sys.exit(1)
if retries == 0:
print("Failed to upload images after multiple attempts.")
sys.exit(1)
print("All images uploaded successfully.")
# **Step 3: Start processing the task**
res = requests.post(
settings.SERVER + f"/api/projects/{project_id}/tasks/{task_id}/restart/",
headers={"Authorization": f"JWT {token}"}
).json()
if "True" in str(res):
print(f"Processing started for project {project_id} - task {task_id}.")
else:
print(f"Failed to start processing: {res}")
I don't think it would completely solve the problem of having too many files in the tmp folder (that seems to be a sign of connection instability), although it might reduce it since with chunked uploads the system would retry the upload for a chunk a few times before giving up (and leaving an orphaned upload file in tmp).
Note you would need to change your API code once/when/if this is implemented.