pyquery
pyquery copied to clipboard
Case sensitivity on selectors
If an XML doc contains an element name which is not lower case, bad things happen.
It would seem this is related to #84, and indeed setting parser='xml' fixed the problem.. but this really should be better documented, or even auto-detected.
However even with this fix, mixed case tags are not picked up correctly when using closest() or parents(), this is a bug.
It's also worth mentioning that this problem did not happen on BeautifulSoup.
Here is a small script that will reproduce the behaviour;
#!/usr/bin/env python
from pyquery import PyQuery as pq
import unittest
from bs4 import BeautifulStoneSoup
xmldoc = """
<hello>
<subFamily DsubFamily="helloWorld">
<wtf>
</wtf>
</subFamily>
<subFamily DsubFamily="helloWorld">
<wtf>
</wtf>
</subFamily>
</hello>
"""
class BugTestCase(unittest.TestCase):
def test_case_selector_upper(self):
doc = pq(xmldoc)
hello = doc("hello")
self.assertEquals(
len(hello("subFamily")), 1)
def test_case_selector_lower(self):
doc = pq(xmldoc)
hello = doc("hello")
self.assertEquals(
len(hello("subfamily")), 1)
def test_case_broken_closest(self):
doc = pq(xmldoc)
hello = doc("hello")
self.assertEquals(
len(hello.closest("subFamily")), 1)
def test_bs(self):
soup = BeautifulStoneSoup(xmldoc)
self.assertEquals(
len(soup.find_all("subFamily")), 2)
self.assertTrue(
soup.find("wtf").findParent("subFamily"))
if __name__ == '__main__':
unittest.main()
$ pip show pyquery
Version: 1.2.9