././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 011452 x ustar 00 0000000 0000000 28 mtime=1631060004.6335707 beautifulsoup4-4.10.0/ 0000775 0001750 0001750 00000000000 00000000000 016210 5 ustar 00leonardr leonardr 0000000 0000000 ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1483369082.0 beautifulsoup4-4.10.0/COPYING.txt 0000664 0001750 0001750 00000002443 00000000000 020064 0 ustar 00leonardr leonardr 0000000 0000000 Beautiful Soup is made available under the MIT license: Copyright (c) 2004-2017 Leonard Richardson Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Beautiful Soup incorporates code from the html5lib library, which is also made available under the MIT license. Copyright (c) 2006-2013 James Graham and other contributors ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1545665720.0 beautifulsoup4-4.10.0/LICENSE 0000644 0001750 0001750 00000002647 00000000000 017224 0 ustar 00leonardr leonardr 0000000 0000000 Beautiful Soup is made available under the MIT license: Copyright (c) 2004-2019 Leonard Richardson Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Beautiful Soup incorporates code from the html5lib library, which is also made available under the MIT license. Copyright (c) 2006-2013 James Graham and other contributors Beautiful Soup depends on the soupsieve library, which is also made available under the MIT license. Copyright (c) 2018 Isaac Muse ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1532186221.0 beautifulsoup4-4.10.0/MANIFEST.in 0000644 0001750 0001750 00000000333 00000000000 017743 0 ustar 00leonardr leonardr 0000000 0000000 include test-all-versions include convert-py3k include LICENSE include *.txt include doc*/Makefile include doc*/source/*.py include doc*/source/*.rst include doc*/source/*.jpg include scripts/*.py include scripts/*.txt ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1631059740.0 beautifulsoup4-4.10.0/NEWS.txt 0000644 0001750 0001750 00000173636 00000000000 017543 0 ustar 00leonardr leonardr 0000000 0000000 Beautiful Soup's official support for Python 2 ended on December 31st, 2020. The final release to support Python 2 was Beautiful Soup 4.9.3. In the Launchpad Bazaar repository, the final revision to support Python 2 was revision 605. = 4.10.0 (20210907) * This is the first release of Beautiful Soup to only support Python 3. I dropped Python 2 support to maintain support for newer versions (58 and up) of setuptools. See: https://github.com/pypa/setuptools/issues/2769 [bug=1942919] * The behavior of methods like .get_text() and .strings now differs depending on the type of tag. The change is visible with HTML tags like
That boolean attribute had no value |
Paragraphs shouldn't contain block display elements, but this one does:
Here's a table |
Here's a nested table:
|
foo
' soup = self.soup(markup) return doctype.encode("utf8"), soup def test_normal_doctypes(self): """Make sure normal, everyday HTML doctypes are handled correctly.""" self.assertDoctypeHandled("html") self.assertDoctypeHandled( 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"') def test_empty_doctype(self): soup = self.soup("") doctype = soup.contents[0] self.assertEqual("", doctype.strip()) def test_mixed_case_doctype(self): # A lowercase or mixed-case doctype becomes a Doctype. for doctype_fragment in ("doctype", "DocType"): doctype_str, soup = self._document_with_doctype( "html", doctype_fragment ) # Make sure a Doctype object was created and that the DOCTYPE # is uppercase. doctype = soup.contents[0] self.assertEqual(doctype.__class__, Doctype) self.assertEqual(doctype, "html") self.assertEqual( soup.encode("utf8")[:len(doctype_str)], b"" ) # Make sure that the doctype was correctly associated with the # parse tree and that the rest of the document parsed. self.assertEqual(soup.p.contents[0], 'foo') def test_public_doctype_with_url(self): doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"' self.assertDoctypeHandled(doctype) def test_system_doctype(self): self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"') def test_namespaced_system_doctype(self): # We can handle a namespaced doctype with a system ID. self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"') def test_namespaced_public_doctype(self): # Test a namespaced doctype with a public id. self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"') def test_real_xhtml_document(self): """A real XHTML document should come out more or less the same as it went in.""" markup = b"""tag is never designated as an empty-element tag. Even if the markup shows it as an empty-element tag, it shouldn't be presented that way. """ soup = self.soup("
") self.assertFalse(soup.p.is_empty_element) self.assertEqual(str(soup.p), "") def test_unclosed_tags_get_closed(self): """A tag that's not closed by the end of the document should be closed. This applies to all tags except empty-element tags. """ self.assertSoupEquals("", "
") self.assertSoupEquals("", "") self.assertSoupEquals("foobaz
" self.assertSoupEquals(markup) soup = self.soup(markup) comment = soup.find(text="foobar") self.assertEqual(comment.__class__, Comment) # The comment is properly integrated into the tree. foo = soup.find(text="foo") self.assertEqual(comment, foo.next_element) baz = soup.find(text="baz") self.assertEqual(comment, baz.previous_element) def test_preserved_whitespace_in_pre_and_textarea(self): """Whitespace must be preserved inand