pdf2docx
pdf2docx copied to clipboard
alignment of words are sacttered when the images in it
Description of the bug
words only pdf files are converting as expected, but when it comes to images with some words inside the pdf file is giving the issue, sample file is attached for your reference. test_conversion.pdf
How to reproduce the bug
from flask import Flask, request, jsonify from pdf2docx import Converter import os from flask import send_file import logging from flask_cors import CORS
logging.basicConfig(level=logging.DEBUG)
app = Flask(name) CORS(app)
@app.route('/convert_pdf_to_docx', methods=['POST']) def convert_pdf_to_docx(): pass if 'file' not in request.files: return jsonify({"error": "No file part"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "No selected file"}), 400
try:
# Save the uploaded PDF to a temporary file
pdf_path = 'C:\\Users\\user\\Downloads\\python\\test\\temp.pdf'
file.save(pdf_path)
# Convert PDF to DOCX
docx_path = 'C:\\Users\\user\\Downloads\\python\\test\\output.docx'
cv = Converter(pdf_path)
cv.convert(docx_path, start=0, end=None)
cv.close()
# Return the DOCX file
#return jsonify({"message": "Conversion successful", "docx_file": docx_path})
return send_file(docx_path, as_attachment=True)
except Exception as e:
return jsonify({"error": str(e)}), 500
finally:
# Clean up temporary PDF file
if os.path.exists(pdf_path):
os.remove(pdf_path)
if name == 'main': app.run(host='192.168.200.5', port=5000) app.run(debug=True)
pdf2docx version
0.5.3
Operating system
Windows
Python version
3.10