markitdown icon indicating copy to clipboard operation
markitdown copied to clipboard

convert_stream in case of xlsx and xls files is broken

Open abab-dev opened this issue 11 months ago • 0 comments

f3 works but f2 does not with markitdown version 0.0.1a4

from markitdown import MarkItDown
md = MarkItDown()
p1= r'sample.pptx'
p2 = r'iris.xlsx'
from io import BytesIO

import re
def ftob(path):
    with open(path,"rb") as f:
        buff = BytesIO(f.read())
    return buff


md = MarkItDown()

def f3(path):
    result = md.convert(path)
    sheets = re.split(r"(?=## Sheet\d+)", result.text_content)

    for sheet in sheets:
        print(sheet.strip())

f3(p2)
#outputs None


def f2(path):
    byt = ftob(path)

    result = md.convert_stream(byt, file_extension=".xlsx")

    sheets = re.split(r"(?=## Sheet\d+)", result.text_content)

    for sheet in sheets:
        print(sheet.strip())


f2(p2)

#print sheets content

abab-dev avatar Feb 11 '25 10:02 abab-dev