WebODM icon indicating copy to clipboard operation
WebODM copied to clipboard

Implement chunked image uploads

Open pierotofy opened this issue 9 months ago • 2 comments

Currently images are uploaded in one chunk (one request per image).

But as some images can be really large, and many connections can be unstable, we could improve this by uploading individual files in chunks, just like we do for task imports.

pierotofy avatar Mar 24 '25 03:03 pierotofy

Cool! I thought it was implemented !

Could this solve the issue (never reported) I have with .upload files in webodm app media tmp folder never deleted when uploading from api in chunks with partial task ? Note: I regulary upload 200 Gb or more which raise disk filling issue > double files in tmp/tmp*.upload and webodm project

import requests
import sys
import os
import json
import time
import settings

CHUNK_SIZE = 10  # Upload 50 images per batch
RETRY_DELAY = 5  # Retry delay in seconds

if len(sys.argv) < 2:
    print(f"Usage: {sys.argv[0]} <path_to_images> [additional_paths...]")
    sys.exit(1)

# Function to list images efficiently
def get_image_files(directory, extensions=(".tif", ".tiff", ".png")):
    return [
        os.path.join(root, file)
        for root, _, files in os.walk(directory)
        for file in files
        if file.lower().endswith(extensions)
    ]

# Collect images from all provided directories
all_images = []
for path in sys.argv[1:]:
    all_images.extend(get_image_files(path))

if len(all_images) < 2:
    print("Need at least 2 images")
    sys.exit(1)

print(f"Found {len(all_images)} images.")

# Authenticate with WebODM
res = requests.post(
    settings.SERVER + "/api/token-auth/",
    data={"username": settings.USERNAME, "password": settings.PASSWORD}
).json()

if "token" not in res:
    print("Invalid credentials!")
    sys.exit(1)

print("Logged-in!")
token = res["token"]

# Create a new WebODM project
project_name = f"PyWebODM - {os.path.basename(sys.argv[1])}"
res = requests.post(
    settings.SERVER + "/api/projects/",
    headers={"Authorization": f"JWT {token}"},
    data={"name": project_name}
).json()

if "id" not in res:
    print(f"Cannot create project: {res}")
    sys.exit(1)

project_id = res["id"]
print(f"Created project: {project_name} (ID: {project_id})")

# Define processing options
options = json.dumps([
    {"name": "auto-boundary", "value": True},
    {"name": "radiometric-calibration", "value": "camera+sun"},
    {"name": "optimize-disk-space", "value": True},
    {"name": "crop", "value": 1},
    {"name": "dem-decimation", "value": 1},
    {"name": "dsm", "value": True},
    {"name": "max-concurrency", "value": 32}
])

# **Step 1: Create a Partial Task**
print("Creating a partial task...")
res = requests.post(
    settings.SERVER + f"/api/projects/{project_id}/tasks/",
    headers={"Authorization": f"JWT {token}"},
    data={"partial": "true", "options": options}
).json()

if "id" not in res:
    print(f"Failed to create partial task: {res}")
    sys.exit(1)

task_id = res["id"]
print(f"Partial task created: {task_id}")

# **Step 2: Upload images in chunks**
for i in range(0, len(all_images), CHUNK_SIZE):
    chunk = all_images[i:i + CHUNK_SIZE]
    print(f"Uploading images {i + 1} to {i + len(chunk)}...")

    files = [("images", (os.path.basename(img), open(img, "rb"), "image/tiff")) for img in chunk]

    retries = 3
    while retries > 0:
        try:
            res = requests.post(
                settings.SERVER + f"/api/projects/{project_id}/tasks/{task_id}/upload/",
                headers={"Authorization": f"JWT {token}"},
                files=files
            )
            res_json = res.json()
            if "id" in res_json or res_json["success"] == True:
                print(f"Uploaded {len(chunk)} images successfully.")
                break  # Success, move to next batch
            else:
#                if res_json["success"] == True:
#                   print(f"Uploaded {len(chunk)} images successfully.")
#                    break
#                else:
                print(f"Error uploading images: {res_json}")
                retries -= 1
                time.sleep(RETRY_DELAY)

        except json.JSONDecodeError:
            print("Warning: WebODM did not return JSON. Retrying...")
            retries -= 1
            time.sleep(RETRY_DELAY)

        finally:
           for _, file_tuple in files:
               file_tuple[1].close()  # Closing file descriptors

    if retries == 0:
        print("Failed to upload images after multiple attempts.")
        sys.exit(1)


if retries == 0:
    print("Failed to upload images after multiple attempts.")
    sys.exit(1)

print("All images uploaded successfully.")

# **Step 3: Start processing the task**
res = requests.post(
    settings.SERVER + f"/api/projects/{project_id}/tasks/{task_id}/restart/",
    headers={"Authorization": f"JWT {token}"}
).json()

if "True" in str(res):
    print(f"Processing started for project {project_id} - task {task_id}.")
else:
    print(f"Failed to start processing: {res}")

kikislater avatar Mar 24 '25 05:03 kikislater

I don't think it would completely solve the problem of having too many files in the tmp folder (that seems to be a sign of connection instability), although it might reduce it since with chunked uploads the system would retry the upload for a chunk a few times before giving up (and leaving an orphaned upload file in tmp).

Note you would need to change your API code once/when/if this is implemented.

pierotofy avatar Mar 24 '25 15:03 pierotofy