django-quill-editor
django-quill-editor copied to clipboard
Parse HTML to Delta
How can I convert an HTML string to delta
?
I'm using Quill together with Django Import-Export. The CSV input file contains an HTML string, that I want to save as my QuillField
attribute of a model.
I'm using a custom function to get te Quill
object, but I'm missing how to get the delta
without having to implement the whole parser by myself.
def get_quill_field(string):
delta = parse_html_to_delta(string) # HOW TO DO THIS?
return Quill(
'{"delta":"' + delta + '","html":"' + string + '"}')
I have the same problem
Thank you for using this library :) Converting HTML to delta is handled by the Quill.js library, not this Python library. (If Django passes an HTML string to the JS library through a template, the HTML is converted to delta during the initialization process of Quill.js and loaded in the browser's memory.)
It would be great if the behavior of Quill.js could be implemented within Python, but I haven't been able to find such a way right now.
No solutions for this yet ?
I have created a sample parser. It works for most of the elements. Feel free to update this gist
from bs4 import BeautifulSoup, NavigableString
def convert_html_to_delta(html_string):
soup = BeautifulSoup(html_string, "html.parser")
delta = {"ops": []}
for element in soup.descendants:
if isinstance(element, NavigableString):
if element.string:
ops = {"insert": element.string, "attributes": get_style_attributes(element)}
ops.update(get_class_and_id_attributes(element))
delta["ops"].append(ops)
elif element.name in ("p", "h1", "h2", "h3", "h4", "h5", "h6"):
convert_paragraph(element, delta["ops"])
elif element.name == "br":
delta["ops"].append({"insert": "\n"})
elif element.name == "img":
src = element["src"]
alt = element.get("alt", "")
delta["ops"].append({"insert": {"image": src}, "attributes": {"alt": alt}})
elif element.name == "a":
href = element.get("href", "")
convert_link(element, delta["ops"], href)
elif element.name == "span":
convert_span(element, delta["ops"])
elif element.name in ("strong", "b"):
convert_bold(element, delta["ops"])
elif element.name in ("em", "i"):
convert_italic(element, delta["ops"])
return delta
def convert_paragraph(element, ops):
text = element.text
for child in element.children:
if child.name == "b":
text = text.replace(child.text, "**%s**" % child.text)
elif child.name == "i":
text = text.replace(child.text, "*%s*" % child.text)
ops.append({"insert": text, "attributes": get_style_attributes(element)})
ops[-1].update(get_class_and_id_attributes(element))
def convert_link(element, ops, href):
text = element.text
ops.append({"insert": text, "attributes": {"link": href}})
ops[-1].update(get_class_and_id_attributes(element))
def convert_span(element, ops):
text = element.text
ops.append({"insert": text, "attributes": get_style_attributes(element)})
ops[-1].update(get_class_and_id_attributes(element))
def convert_bold(element, ops):
text = element.text
ops.append({"insert": text, "attributes": {"bold": True}})
ops[-1].update(get_class_and_id_attributes(element))
def convert_italic(element, ops):
text = element.text
ops.append({"insert": text, "attributes": {"italic": True}})
ops[-1].update(get_class_and_id_attributes(element))
def get_style_attributes(element):
attributes = {}
if hasattr(element, "attrs"):
if "class" in element.attrs:
attributes["class"] = " ".join(element["class"])
if "id" in element.attrs:
attributes["id"] = element["id"]
if "style" in element.attrs:
styles = [s.strip() for s in element["style"].split(";")]
style_dict = {s.split(":")[0]: s.split(":")[1] for s in styles if ":" in s}
attributes.update(style_dict)
return attributes
def get_class_and_id_attributes(element):
attributes = {}
if hasattr(element, "attrs"):
if "class" in element.attrs:
attributes["class"] = " ".join(element["class"])
if "id" in element.attrs:
attributes["id"] = element["id"]
return attributes