olmocr
olmocr copied to clipboard
how to process jpg file, instead of pdf
if request input is jpg, png,?
We use the following code in the web demo, sorry it's not in the repo yet, we should add it in v2.
import os
import tempfile
async def convert_image_to_pdf(png_file: tempfile.NamedTemporaryFile) -> tempfile.NamedTemporaryFile:
import img2pdf
new_file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
new_file.write(img2pdf.convert([png_file.name]))
new_file.close()
os.remove(png_file.name)
return new_file
def is_png(file_path):
try:
with open(file_path, "rb") as f:
header = f.read(8)
return header == b"\x89PNG\r\n\x1a\n"
except Exception as e:
print(f"Error: {e}")
return False
def is_jpeg(file_path):
try:
with open(file_path, 'rb') as f:
header = f.read(2)
return header == b'\xff\xd8'
except Exception as e:
print(f"Error: {e}")
return False