moin
moin copied to clipboard
new xml library?
we currently use:
emeraldtree- was forked from elementtree and adapted for moin2 to overcome some lacking API of elementtree back then
- https://pypi.org/project/emeraldtree/
- https://libraries.io/pypi/emeraldtree (looks like only moin2 is using this)
- mostly unmaintained
- lacks xpath support
- py27 and py33, pure python
lxml- https://pypi.org/project/lxml/
- maintained and lots of users
- was sometimes painful to install in the past (current state == ?)
- xpath support (this is why we use
lxml.etreefor our tests already) - py27 and up to py37, pypy support == ?
there is also elementtree in python stdlib, which got limited xpath support.
Just played around with the stuff a bit:
- lxml worked for cpython (guess it took the binary egg there) and stumbled over finding some header file with pypy (although I have libxml2-dev installed).
- xml.etree from stdlib has really limited xpath support. tests need some rewrites as api is slightly different and also only simple xpath expressions work.
How is pip install lxml on windows? @RogerHaase
adapting to xml.etree stdlib looks like this:
(moin-venv-python) tw@tux:~/w/moin$ git diff
diff --git a/src/moin/converter/_tests/test_smiley.py b/src/moin/converter/_tests/test_smiley.py
index d875bcf4..2f2362b5 100644
--- a/src/moin/converter/_tests/test_smiley.py
+++ b/src/moin/converter/_tests/test_smiley.py
@@ -9,6 +9,8 @@ MoinMoin - Tests for moin.converter.smiley
import re
import pytest
+from xml import etree
+
from moin.converter.smiley import Converter, moin_page, ET
@@ -32,35 +34,42 @@ output_re = re.compile(r'\s+xmlns="[^"]+"')
test_data = {
'normal': (
'<page><body><p>bla bla :-) bla bla</p></body></page>',
- '/page/body/p/span[@class="moin-text-icon moin-smile"]'),
+ './body/p/span[@class="moin-text-icon moin-smile"]'),
'in code': (
'<page><body><code>bla bla :-) bla bla</code></body></page>',
- '/page/body[code="bla bla :-) bla bla"]'),
- '2 at once': (
+ './body[code="bla bla :-) bla bla"]'),
+ '2 at once 1': (
+ '<page><body><p>:-) :-(</p></body></page>',
+ './body/p/'
+ "span[1][@class='moin-text-icon moin-smile']"),
+ '2 at once 2': (
'<page><body><p>:-) :-(</p></body></page>',
- '/page/body/p'
- '[span[1][@class="moin-text-icon moin-smile"]]'
- '[span[2][@class="moin-text-icon moin-sad"]]'),
+ './body/p/'
+ "span[2][@class='moin-text-icon moin-sad']"),
'strong': (
'<page><body><p><strong>:-)</strong></p></body></page>',
- '/page/body/p/strong/span[@class="moin-text-icon moin-smile"]'),
+ './body/p/strong/span[@class="moin-text-icon moin-smile"]'),
# Test to check we do not have bug with newline in the string
'ok with newlines': (
'<page><body><p>1\n2\n3\n4</p></body></page>',
- '/page/body[p="1\n2\n3\n4"]'),
+ './body[p="1\n2\n3\n4"]'),
# Test with space between the elements
'space between elem': (
'<page><body><table-of-content /> <p>text</p></body></page>',
- '/page/body[p="text"]'),
+ './body[p="text"]'),
# Test the ignored tags
'ignored tag': (
'<page><body><p><code>:-)</code></p></body></page>',
- '/page/body/p[code=":-)"]'),
+ './body/p[code=":-)"]'),
# Test the ignored tags and subelement
- 'ignored tag and subelem': (
+ 'ignored tag and subelem 1': (
+ '<page><body><blockcode>:-)<strong>:-(</strong>'
+ '</blockcode></body></page>',
+ "./body/[blockcode=':-):-(']"),
+ 'ignored tag and subelem 2': (
'<page><body><blockcode>:-)<strong>:-(</strong>'
diff --git a/src/moin/converter/_tests/test_smiley.py b/src/moin/converter/_tests/test_smiley.py
index d875bcf4..2f2362b5 100644
--- a/src/moin/converter/_tests/test_smiley.py
+++ b/src/moin/converter/_tests/test_smiley.py
@@ -9,6 +9,8 @@ MoinMoin - Tests for moin.converter.smiley
import re
import pytest
+from xml import etree
+
from moin.converter.smiley import Converter, moin_page, ET
@@ -32,35 +34,42 @@ output_re = re.compile(r'\s+xmlns="[^"]+"')
test_data = {
'normal': (
'<page><body><p>bla bla :-) bla bla</p></body></page>',
- '/page/body/p/span[@class="moin-text-icon moin-smile"]'),
+ './body/p/span[@class="moin-text-icon moin-smile"]'),
'in code': (
'<page><body><code>bla bla :-) bla bla</code></body></page>',
- '/page/body[code="bla bla :-) bla bla"]'),
- '2 at once': (
+ './body[code="bla bla :-) bla bla"]'),
+ '2 at once 1': (
+ '<page><body><p>:-) :-(</p></body></page>',
+ './body/p/'
+ "span[1][@class='moin-text-icon moin-smile']"),
+ '2 at once 2': (
'<page><body><p>:-) :-(</p></body></page>',
- '/page/body/p'
- '[span[1][@class="moin-text-icon moin-smile"]]'
- '[span[2][@class="moin-text-icon moin-sad"]]'),
+ './body/p/'
+ "span[2][@class='moin-text-icon moin-sad']"),
'strong': (
'<page><body><p><strong>:-)</strong></p></body></page>',
- '/page/body/p/strong/span[@class="moin-text-icon moin-smile"]'),
+ './body/p/strong/span[@class="moin-text-icon moin-smile"]'),
# Test to check we do not have bug with newline in the string
'ok with newlines': (
'<page><body><p>1\n2\n3\n4</p></body></page>',
- '/page/body[p="1\n2\n3\n4"]'),
+ './body[p="1\n2\n3\n4"]'),
# Test with space between the elements
'space between elem': (
'<page><body><table-of-content /> <p>text</p></body></page>',
- '/page/body[p="text"]'),
+ './body[p="text"]'),
# Test the ignored tags
'ignored tag': (
'<page><body><p><code>:-)</code></p></body></page>',
- '/page/body/p[code=":-)"]'),
+ './body/p[code=":-)"]'),
# Test the ignored tags and subelement
- 'ignored tag and subelem': (
+ 'ignored tag and subelem 1': (
+ '<page><body><blockcode>:-)<strong>:-(</strong>'
+ '</blockcode></body></page>',
+ "./body/[blockcode=':-):-(']"),
+ 'ignored tag and subelem 2': (
'<page><body><blockcode>:-)<strong>:-(</strong>'
'</blockcode></body></page>',
- '/page/body/blockcode[text()=":-)"][strong=":-("]'),
+ "./body/blockcode/[strong=':-(']"),
}
@@ -71,14 +80,13 @@ def ET_to_string(elem, **options):
def test_smiley_convert(input, query):
- etree = pytest.importorskip('lxml.etree')
conv = Converter()
print 'input:', input
out_elem = conv(ET.XML(input))
after_conversion = ET_to_string(out_elem)
print 'output:', after_conversion
print 'query:', query
- tree = etree.fromstring(after_conversion)
- result = tree.xpath(query)
+ tree = etree.ElementTree.fromstring(after_conversion)
+ result = tree.findall(query)
print 'query result:', result
assert result
note: we do not have lxml in requirements.d/development.txt yet, tests are skipped due to that.
this will be fixed by #627.
do #639 first.
- moved emeraldtree to https://github.com/moinwiki/emeraldtree
- tests work on py27, py34/35/36/37
https://pypi.org/project/elementpath/ interesting?