markitdown
markitdown copied to clipboard
convert_stream in case of xlsx and xls files is broken
f3 works but f2 does not with markitdown version 0.0.1a4
from markitdown import MarkItDown
md = MarkItDown()
p1= r'sample.pptx'
p2 = r'iris.xlsx'
from io import BytesIO
import re
def ftob(path):
with open(path,"rb") as f:
buff = BytesIO(f.read())
return buff
md = MarkItDown()
def f3(path):
result = md.convert(path)
sheets = re.split(r"(?=## Sheet\d+)", result.text_content)
for sheet in sheets:
print(sheet.strip())
f3(p2)
#outputs None
def f2(path):
byt = ftob(path)
result = md.convert_stream(byt, file_extension=".xlsx")
sheets = re.split(r"(?=## Sheet\d+)", result.text_content)
for sheet in sheets:
print(sheet.strip())
f2(p2)
#print sheets content