Allow for formatting in captions
Currently pandoc-reference-filter strips all formatting from captions. This is undesirable, but fixing it requires some significant changes to figure_replacement(), treating LaTeX, html, and markdown all differently.
I have a somewhat hackish proof-of-concept fix for this, which is built on top of (but mostly replaces) my work on adding support for short captions (in a pending pull request), though it should be relatively easy to remove the short caption stuff from this proof-of-concept if that were desirable. I've pasted the patch text below. (If you want, I can create a pull request, but I'm not sure this is ready. It's in the Format-Captions branch of my fork if you want to look there.)
Currently .docx captions aren't showing up at all (though the image is), for reasons I can't figure out. Any help with that would be appreciated. Also, it's not well tested, so expect bugs.
The question is: is this wanted? If so, is this the right approach, or does anyone have better ideas?
internalreferences.py | 165 ++++++++++++++++++++++++++++----------------------
1 file changed, 94 insertions(+), 71 deletions(-)
diff --git a/internalreferences.py b/internalreferences.py
index 5e85073..63f3a3f 100755
--- a/internalreferences.py
+++ b/internalreferences.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python
import re
from collections import OrderedDict
+from subprocess import Popen, PIPE
import pandocfilters as pf
@@ -127,6 +128,76 @@ def create_figures(key, value, format, metadata):
else:
return None
+def toFormat(string, format):
+ # Process string through pandoc to get formatted string. Is there a better way?
+ p1 = Popen(['echo'] + string.split(), stdout=PIPE)
+ p2 = Popen(['pandoc', '-t', format], stdin=p1.stdout, stdout=PIPE)
+ p1.stdout.close()
+ return p2.communicate()[0].strip('\n')
+
+def latex_figure(attr, filename, caption, alt):
+ beginText = (u'\n'
+ '\\begin{{figure}}[htbp]\n'
+ '\\centering\n'
+ '\\includegraphics{{{filename}}}\n'.format(
+ filename=filename
+ ).encode('utf-8'))
+ endText = (u'}}\n'
+ '\\label{{{attr.id}}}\n'
+ '\\end{{figure}}\n'.format(attr=attr))
+ star = False
+ if 'unnumbered' in attr.classes:
+ beginText += '\\caption*{'
+ star = True
+ if alt and not star:
+ shortCaption = toFormat(alt, 'latex')
+ beginText += '\\caption['
+ latexFigure = [RawInline('latex', beginText)]
+ latexFigure += [RawInline('latex', shortCaption + ']{')]
+
+ else: # No short caption
+ if star: beginText += '\\caption*{'
+ else: beginText += '\\caption{'
+ latexFigure = [RawInline('latex', beginText + '{')]
+
+ latexFigure += caption
+ latexFigure += [RawInline('latex', endText)]
+ return pf.Para(latexFigure)
+
+def html_figure(attr, filename, fcaption, alt):
+ beginText = (u'\n'
+ '<div {attr.html}>\n'
+ '<img src="{filename}" alt="{alt}" />\n'
+ '<p class="caption">').format(attr=attr,
+ filename=filename,
+ alt=alt)
+ endText = (u'</p>\n'
+ '</div>\n')
+ htmlFigure = [RawInline('html', beginText)]
+ htmlFigure += fcaption
+ htmlFigure += [RawInline('html', endText)]
+ return pf.Plain(htmlFigure)
+
+def html5_figure(attr, filename, fcaption, alt):
+ beginText = (u'\n'
+ '<figure {attr.html}>\n'
+ '<img src="{filename}" alt="{alt}" />\n'
+ '<figcaption>').format(attr=attr,
+ filename=filename,
+ alt=alt)
+ endText = u'</figcaption>\n</figure>\n'
+ htmlFigure = [RawInline('html5', beginText)]
+ htmlFigure += fcaption
+ htmlFigure += [RawInline('html5', endText)]
+ return pf.Plain(htmlFigure)
+
+def markdown_figure(attr, filename, fcaption, alt):
+ beginText = u'<div {attr.html}>'.format(attr=attr)
+ endText = u'</div>'
+ markdownFigure = [pf.Para([pf.RawInline('html', beginText)])]
+ markdownFigure += [pf.Para([pf.Image(fcaption, (filename,alt))])]
+ markdownFigure += [pf.Para([pf.RawInline('html', endText)])]
+ return markdownFigure
class ReferenceManager(object):
"""Internal reference manager.
@@ -139,32 +210,6 @@ class ReferenceManager(object):
text of any given internal reference (no need for e.g. 'fig:' at
the start of labels).
"""
- figure_styles = {
- 'latex': (u'\n'
- '\\begin{{figure}}[htbp]\n'
- '\\centering\n'
- '\\includegraphics{{{filename}}}\n'
- '\\caption{star}{{{caption}}}\n'
- '\\label{{{attr.id}}}\n'
- '\\end{{figure}}\n'),
-
- 'html': (u'\n'
- '<div {attr.html}>\n'
- '<img src="{filename}" alt="{alt}" />'
- '<p class="caption">{fcaption}</p>\n'
- '</div>\n'),
-
- 'html5': (u'\n'
- '<figure {attr.html}>\n'
- '<img src="{filename}" alt="{alt}" />\n'
- '<figcaption>{fcaption}</figcaption>\n'
- '</figure>\n'),
-
- 'markdown': (u'\n'
- '<div {attr.html}>\n'
- '\n'
- '\n'
- '</div>\n')}
latex_multi_autolink = u'\\cref{{{labels}}}{post}'
@@ -243,7 +288,7 @@ class ReferenceManager(object):
"""If the key, value represents a figure, append reference
data to internal state.
"""
- _caption, (filename, target), (id, classes, kvs) = value
+ _caption, (filename, alt), (id, classes, kvs) = value
if 'unnumbered' in classes:
return
else:
@@ -278,7 +323,7 @@ class ReferenceManager(object):
self.references[label] = {'type': 'math',
'id': self.equation_count,
'label': label}
-
+
def figure_replacement(self, key, value, format, metadata):
"""Replace figures with appropriate representation.
@@ -288,58 +333,36 @@ class ReferenceManager(object):
The other way of doing it would be to pull out a '\label{(.*)}'
from the caption of an Image and use that to update the references.
"""
- _caption, (filename, target), attrs = value
-# caption = pf.stringify(_caption)
- caption = _caption
+ caption, (filename, alt), attrs = value
+ if format == 'latex': alt = toFormat(str(alt), format) # Preserve formatting
+# else: alt = pf.stringify(alt)
attr = PandocAttributes(attrs)
if 'unnumbered' in attr.classes:
- star = '*'
fcaption = caption
else:
ref = self.references[attr.id]
star = ''
if caption:
- fcaption = u'Figure {n}: {caption}'.format(n=ref['id'],
- caption=caption)
+ fcaption = [pf.Str(u'Figure {n}: '.format(n=ref['id']))] + caption
else:
- fcaption = u'Figure {n}'.format(n=ref['id'])
+ fcaption = [pf.Str(u'Figure {n}'.format(n=ref['id']))]
if 'figure' not in attr.classes:
attr.classes.insert(0, 'figure')
-
- if format in self.formats:
-# figure = self.figure_styles[format].format(attr=attr,
-# filename=filename,
-# alt=fcaption,
-# fcaption=fcaption,
-# caption=caption,
-# star=star).encode('utf-8')
-
-# return RawBlock(format, figure)
- beginText = (u'\n'
- '\\begin{{figure}}[htbp]\n'
- '\\centering\n'
- '\\includegraphics{{{filename}}}\n'
- '\\caption{star}{{'.format(filename=filename,
- star=star).encode('utf-8'))
- endText = (u'}}\n'
- '\\label{{{attr.id}}}\n'
- '\\end{{figure}}\n'.format(attr=attr))
- begin = RawBlock('latex', beginText)
- end = RawBlock('latex', endText)
- all = [begin, pf.Str('hello'), end]
- return [begin] + [pf.Plain(caption)] + [end]
- # Convert from: {"t":"Figure", "c":[[{"t":"Str","c":"CAPTION"}],["FIGURE.JPG","TITLE"],"{#REFERENCE}"]}
- # to: {"t": "RawBlock", "c": }
-
+
+ if format == 'latex': return latex_figure(attr, filename, caption, alt)
+ elif format == 'html': return html_figure(attr, filename, fcaption, alt)
+ elif format == 'html5': return html5_figure(attr, filename, fcaption, alt)
+ elif format == 'markdown': return markdown_figure(attr, filename, fcaption, alt)
else:
- alt = [pf.Str(fcaption)]
- target = (filename, '')
- image = pf.Image(alt, target)
- figure = pf.Para([image])
- return pf.Div(attr.to_pandoc(), [figure])
+# # FIXME: docx export fails to include the caption!
+# fcaption = pf.stringify(fcaption)
+# fcaption = [pf.Str(str(caption))]
+ image = pf.Image(fcaption, [filename, ''])
+ return pf.Plain([image])
+# return pf.Para([image])
def section_replacement(self, key, value, format, metadata):
"""Replace sections with appropriate representation.
@@ -406,8 +429,9 @@ class ReferenceManager(object):
else:
citation = citations[0]
- prefix = citation['citationPrefix']
+ prefix = citation['citationPrefix'] + [pf.Space()]
suffix = citation['citationSuffix']
+
label = citation['citationId']
@@ -426,10 +450,9 @@ class ReferenceManager(object):
link = pf.RawInline('latex', '\\ref{{{label}}}'.format(label=label))
return prefix + [link] + suffix
- else: # FIXME! -- This must be the HTML case.
- link_text = '{}{}{}'.format(prefix, text, suffix)
- link = pf.Link([pf.Str(link_text)], ('#' + label, ''))
- return link
+ else:
+ link = pf.Link([pf.Str(text)], ('#' + label, ''))
+ return prefix + [link] + suffix
def convert_multiref(self, key, value, format, metadata):
"""Convert all internal links from '#blah' into format
--
2.2.1
@bwhelm by formatting, are you meaning that the filter doesn't convert markdown captions into the target format? (Only as plain text). This is a limitation.
Can you submit this as a PR? It would make review easier.
Submitted: https://github.com/aaren/pandoc-reference-filter/pull/17