././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1739355010.8961408 zodbpickle-4.2/0000755000076600000240000000000014753071603013216 5ustar00m.howitzstaff././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/.manylinux-install.sh0000755000076600000240000000422014753071602017320 0ustar00m.howitzstaff#!/usr/bin/env bash # Generated from: # https://github.com/zopefoundation/meta/tree/master/config/c-code set -e -x # Running inside docker # Set a cache directory for pip. This was # mounted to be the same as it is outside docker so it # can be persisted. export XDG_CACHE_HOME="/cache" # XXX: This works for macOS, where everything bind-mounted # is seen as owned by root in the container. But when the host is Linux # the actual UIDs come through to the container, triggering # pip to disable the cache when it detects that the owner doesn't match. # The below is an attempt to fix that, taken from bcrypt. It seems to work on # Github Actions. if [ -n "$GITHUB_ACTIONS" ]; then echo Adjusting pip cache permissions mkdir -p $XDG_CACHE_HOME/pip chown -R $(whoami) $XDG_CACHE_HOME fi ls -ld /cache ls -ld /cache/pip # We need some libraries because we build wheels from scratch: yum -y install libffi-devel tox_env_map() { case $1 in *"cp39"*) echo 'py39';; *"cp310"*) echo 'py310';; *"cp311"*) echo 'py311';; *"cp312"*) echo 'py312';; *"cp313"*) echo 'py313';; *"cp314"*) echo 'py314';; *) echo 'py';; esac } # Compile wheels for PYBIN in /opt/python/*/bin; do if \ [[ "${PYBIN}" == *"cp39/"* ]] || \ [[ "${PYBIN}" == *"cp310/"* ]] || \ [[ "${PYBIN}" == *"cp311/"* ]] || \ [[ "${PYBIN}" == *"cp312/"* ]] || \ [[ "${PYBIN}" == *"cp313/"* ]] || \ [[ "${PYBIN}" == *"cp314/"* ]] ; then if [[ "${PYBIN}" == *"cp314/"* ]] ; then "${PYBIN}/pip" install --pre -e /io/ "${PYBIN}/pip" wheel /io/ --pre -w wheelhouse/ else "${PYBIN}/pip" install -e /io/ "${PYBIN}/pip" wheel /io/ -w wheelhouse/ fi if [ `uname -m` == 'aarch64' ]; then cd /io/ ${PYBIN}/pip install tox TOXENV=$(tox_env_map "${PYBIN}") ${PYBIN}/tox -e ${TOXENV} cd .. fi rm -rf /io/build /io/*.egg-info fi done # Bundle external shared libraries into the wheels for whl in wheelhouse/zodbpickle*.whl; do auditwheel repair "$whl" -w /io/wheelhouse/ done ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/.manylinux.sh0000755000076600000240000000077514753071602015667 0ustar00m.howitzstaff#!/usr/bin/env bash # Generated from: # https://github.com/zopefoundation/meta/tree/master/config/c-code set -e -x # Mount the current directory as /io # Mount the pip cache directory as /cache # `pip cache` requires pip 20.1 echo Setting up caching python --version python -mpip --version LCACHE="$(dirname `python -mpip cache dir`)" echo Sharing pip cache at $LCACHE $(ls -ld $LCACHE) docker run --rm -e GITHUB_ACTIONS -v "$(pwd)":/io -v "$LCACHE:/cache" $DOCKER_IMAGE $PRE_CMD /io/.manylinux-install.sh ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/.pre-commit-config.yaml0000644000076600000240000000132114753071602017473 0ustar00m.howitzstaff# Generated from: # https://github.com/zopefoundation/meta/tree/master/config/c-code minimum_pre_commit_version: '3.6' repos: - repo: https://github.com/pycqa/isort rev: "6.0.0" hooks: - id: isort - repo: https://github.com/hhatto/autopep8 rev: "v2.3.2" hooks: - id: autopep8 args: [--in-place, --aggressive, --aggressive] - repo: https://github.com/asottile/pyupgrade rev: v3.19.1 hooks: - id: pyupgrade args: [--py39-plus] - repo: https://github.com/isidentical/teyit rev: 0.4.3 hooks: - id: teyit - repo: https://github.com/PyCQA/flake8 rev: "7.1.1" hooks: - id: flake8 additional_dependencies: - flake8-debugger == 4.1.2 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/.readthedocs.yaml0000644000076600000240000000123014753071602016440 0ustar00m.howitzstaff# Generated from: # https://github.com/zopefoundation/meta/tree/master/config/c-code # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Set the version of Python and other tools you might need build: os: ubuntu-22.04 tools: python: "3.11" # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py # We recommend specifying your dependencies to enable reproducible builds: # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html python: install: - requirements: docs/requirements.txt - method: pip path: . ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/CHANGES.rst0000644000076600000240000001424014753071602015020 0ustar00m.howitzstaff=========== Changelog =========== 4.2 (2025-02-12) ================ - Drop support for Python 3.8. - Add preliminary support for Python 3.14 as of 3.14a4. - Remove unused ``setuptools`` install requirement. 4.1.1 (2024-10-02) ================== - Fix ``NameError`` which occurred when importing ``zodbpickle.fastpickle``. 4.1 (2024-09-17) ================ - Add final support for Python 3.13. 4.0 (2024-05-30) ================ - Drop support for Python 3.7. 3.3 (2024-04-16) ================ - Build Windows wheels on GHA. - Add preliminary support for Python 3.13 as of 3.13a5. 3.2 (2024-02-16) ================ - Add preliminary support for Python 3.13 as of 3.13a3. 3.1 (2023-10-05) ================ - Add support for Python 3.12. 3.0.1 (2023-03-28) ================== - Fix ``NameError`` in ``.fastpickle`` and ``.slowpickle``. 3.0 (2023-03-24) ================ - Build Linux binary wheels for Python 3.11. - Add preliminary support for Python 3.12a5. - Drop support for Python 2.7, 3.5, 3.6. - Drop support for deprecated ``python setup.py test``. 2.6 (2022-11-17) ================ - Add support for building arm64 wheels on macOS. 2.5 (2022-11-03) ================ - Add support for the final Python 3.11 release. 2.4 (2022-09-15) ================ - Add support for Python 3.11 (as of 3.11.0b3). - Disable unsafe math optimizations in C code. See `pull request 73 `_. 2.3 (2022-04-22) ================ - Add support for Python 3.11 (as of 3.11.0a7). 2.2.0 (2021-09-29) ================== - Add support for Python 3.10. 2.1.0 (2021-09-24) ================== - Add support for Python 3.9. 2.0.0 (2019-11-13) ================== - CPython 2: Make ``zodbpickle.binary`` objects smaller and untracked by the garbage collector. Now they behave more like the native bytes object. Just like it, and just like on Python 3, they cannot have arbitrary attributes or be weakly referenced. See `issue 53 `_. 1.1 (2019-11-09) ================ - Add support for Python 3.8. - Drop support for Python 3.4. 1.0.4 (2019-06-12) ================== - Fix pickle corruption under certain conditions. See `pull request 47 `_. 1.0.3 (2018-12-18) ================== - Fix a bug: zodbpickle.slowpickle assigned `_Pickler` to `Unpickler`. 1.0.2 (2018-08-10) ================== - Add support for Python 3.7. 1.0.1 (2018-05-16) ================== - Fix a memory leak in pickle protocol 3 under Python 2. See `issue 36 `_. 1.0 (2018-02-09) ================ - Add a warning to the readme not to use untrusted pickles. - Drop support for Python 3.3. 0.7.0 (2017-09-22) ================== - Drop support for Python 2.6 and 3.2. - Add support for Jython 2.7. - Add support for Python 3.5 and 3.6. 0.6.0 (2015-04-02) ================== - Restore the ``noload`` behaviour from Python 2.6 and provide the ``noload`` method on the non-C-accelerated unpicklers under PyPy and Python 2. - Add support for PyPy, PyPy3, and Python 3.4. 0.5.2 (2013-08-17) ================== - Import accelerator from *our* extension module under Py3k. See https://github.com/zopefoundation/zodbpickle/issues/6, https://github.com/zopefoundation/zodbpickle/issues/7. - Fix unpickler's ``load_short_binstring`` across supported platforms. 0.5.1 (2013-07-06) ================== - Update all code and tests to Python 2.6.8, 2.7.5, 3.2.5, 3.3.2 . - Add the modules ``zodbpickle.fastpickle`` and ``zodbpickle.slowpickle``. This provides a version-independent choice of the C or Python implementation. - Fix a minor bug on OS X 0.5.0 (2013-06-14) ================== - Removed support for the ``bytes_as_strings`` arguments to pickling APIs: the pickles created when that argument was true might not be unpickled without passing ``encoding='bytes'``, which ZODB couldn't reliably enforce. On Py3k, ZODB will be using ``protocol=3`` pickles anyway. 0.4.4 (2013-06-07) ================== - Add protocol 3 opcodes to the C version of the ``noload()`` dispatcher. 0.4.3 (2013-06-07) ================== - Packaging error: remove spurious ``-ASIDE`` file from sdist. 0.4.2 (2013-06-07) ================== - Fix NameError in pure-Python version of ``Unpickler.noload_appends``. - Fix NameError in pure-Python version of ``Unpickler.noload_setitems``. 0.4.1 (2013-04-29) ================== - Fix typo in Python2 version of ``zodbpickle.pickle`` module. 0.4 (2013-04-28) ================ - Support the common pickle module interface for Python 2.6, 2.7, 3.2, and 3.3. - Split the Python implementations / tests into Python2- and Py3k-specific variants. - Added a fork of the Python 2.7 ``_pickle.c``, for use under Python2. The fork adds support for the Py3k ``protocol 3`` opcodes. - Added a custom ``binary`` type for use in Python2 apps. Derived from ``bytes``, the ``binary`` type allows Python2 apps to pickle binary data using opcodes which will cause it to be unpickled as ``bytes`` on Py3k. Under Py3k, the ``binary`` type is just an alias for ``bytes``. 0.3 (2013-03-18) ================ - Added ``noload`` code to Python 3.2 version of ``Unpickler``. As with the Python 3.3 version, this code remains untested. - Added ``bytes_as_strings`` option to the Python 3.2 version of ``Pickler``, ``dump``, and ``dumps``. 0.2 (2013-03-05) ================ - Added ``bytes_as_strings`` option to ``Pickler``, ``dump``, and ``dumps``. - Incomplete support for Python 3.2: - Move ``_pickle.c`` -> ``_pickle_33.c``. - Clone Python 3.2.3's ``_pickle.c`` -> ``_pickle_32.c`` and apply the same patch. - Choose between them at build time based on ``sys.version_info``. - Disable some tests of 3.3-only features. - Missing: implementation of ``noload()`` in ``_pickle_32.c``. - Missing: implementation of ``bytes_as_strings=True`` in ``_pickle_32.c``. 0.1.0 (2013-02-27) ================== - Initial release of Python 3.3's pickle with the patches of Python `issue 6784`__ applied. .. __: http://bugs.python.org/issue6784#msg156166 - Added support for ``errors="bytes"``. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/CONTRIBUTING.md0000644000076600000240000000143614753071602015452 0ustar00m.howitzstaff # Contributing to zopefoundation projects The projects under the zopefoundation GitHub organization are open source and welcome contributions in different forms: * bug reports * code improvements and bug fixes * documentation improvements * pull request reviews For any changes in the repository besides trivial typo fixes you are required to sign the contributor agreement. See https://www.zope.dev/developer/becoming-a-committer.html for details. Please visit our [Developer Guidelines](https://www.zope.dev/developer/guidelines.html) if you'd like to contribute code changes and our [guidelines for reporting bugs](https://www.zope.dev/developer/reporting-bugs.html) if you want to file a bug report. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/LICENSE.txt0000644000076600000240000001133014753071602015036 0ustar00m.howitzstaffThis package contains code originally lifted from Python 3.3 with extensions made by contributors of the Zope Foundation. =============================================================================== PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 -------------------------------------------- 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Python Software Foundation; All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python. 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. =============================================================================== Zope Public License (ZPL) Version 2.1 ------------------------------------- A copyright notice accompanies this license document that identifies the copyright holders. This license has been certified as open source. It has also been designated as GPL compatible by the Free Software Foundation (FSF). Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions in source code must retain the accompanying copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the accompanying copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Names of the copyright holders must not be used to endorse or promote products derived from this software without prior written permission from the copyright holders. 4. The right to distribute this software or to use it for any purpose does not give you the right to use Servicemarks (sm) or Trademarks (tm) of the copyright holders. Use of them is covered by separate agreement with the copyright holders. 5. If any files are modified, you must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. Disclaimer THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/MANIFEST.in0000644000076600000240000000075414753071602014761 0ustar00m.howitzstaff# Generated from: # https://github.com/zopefoundation/meta/tree/master/config/c-code include *.md include *.rst include *.txt include buildout.cfg include tox.ini include .pre-commit-config.yaml recursive-include docs *.py recursive-include docs *.rst recursive-include docs *.txt recursive-include docs Makefile recursive-include src *.py include *.yaml include *.sh recursive-include docs *.bat recursive-include patches *.diff recursive-include patches *.patch recursive-include src *.c ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1739355010.8960106 zodbpickle-4.2/PKG-INFO0000644000076600000240000003247214753071603014323 0ustar00m.howitzstaffMetadata-Version: 2.1 Name: zodbpickle Version: 4.2 Summary: Fork of Python 3 pickle module. Home-page: https://github.com/zopefoundation/zodbpickle Author: Python and Zope Foundation Author-email: zodb-dev@zope.dev License: PSFL 2 and ZPL-2.1 Keywords: zodb pickle Platform: any Classifier: Development Status :: 5 - Production/Stable Classifier: License :: OSI Approved :: Zope Public License Classifier: License :: OSI Approved :: Python Software Foundation License Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Framework :: ZODB Classifier: Topic :: Database Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Operating System :: Microsoft :: Windows Classifier: Operating System :: Unix Classifier: Operating System :: MacOS :: MacOS X Requires-Python: >=3.9 License-File: LICENSE.txt Provides-Extra: test Requires-Dist: zope.testrunner; extra == "test" Provides-Extra: docs Requires-Dist: Sphinx; extra == "docs" ``zodbpickle`` README ===================== .. image:: https://github.com/zopefoundation/zodbpickle/actions/workflows/tests.yml/badge.svg :target: https://github.com/zopefoundation/zodbpickle/actions/workflows/tests.yml .. image:: https://coveralls.io/repos/github/zopefoundation/zodbpickle/badge.svg :target: https://coveralls.io/github/zopefoundation/zodbpickle :alt: Coverage status .. image:: https://img.shields.io/pypi/v/zodbpickle.svg :target: https://pypi.python.org/pypi/zodbpickle :alt: PyPI .. image:: https://img.shields.io/pypi/pyversions/zodbpickle.svg :target: https://pypi.python.org/pypi/zodbpickle :alt: Python versions This package presents a uniform pickling interface for ZODB: - Under Python2, this package forks both Python 2.7's ``pickle`` and ``cPickle`` modules, adding support for the ``protocol 3`` opcodes. It also provides a new subclass of ``bytes``, ``zodbpickle.binary``, which Python2 applications can use to pickle binary values such that they will be unpickled as ``bytes`` under Py3k. - Under Py3k, this package forks the ``pickle`` module (and the supporting C extension) from both Python 3.2 and Python 3.3. The fork add support for the ``noload`` operations used by ZODB. Caution ------- ``zodbpickle`` relies on Python's ``pickle`` module. The ``pickle`` module is not intended to be secure against erroneous or maliciously constructed data. Never unpickle data received from an untrusted or unauthenticated source as arbitrary code might be executed. Also see https://docs.python.org/3.6/library/pickle.html General Usage ------------- To get compatibility between Python 2 and 3 pickling, replace:: import pickle by:: from zodbpickle import pickle This provides compatibility, but has the effect that you get the fast implementation in Python 3, while Python 2 uses the slow version. To get a more deterministic choice of the implementation, use one of:: from zodbpickle import fastpickle # always C from zodbpickle import slowpickle # always Python Both modules can co-exist which is helpful for comparison. But there is a bit more to consider, so please read on! Loading/Storing Python 2 Strings -------------------------------- In all their wisdom, the Python developers have decided that Python 2 ``str`` instances should be loaded as Python 3 ``str`` objects (i.e. unicode strings). Patches were proposed in Python `issue 6784`__ but were never applied. This code base contains those patches. .. __: http://bugs.python.org/issue6784 Example 1: Loading Python 2 pickles on Python 3 :: $ python2 >>> import pickle >>> pickle.dumps('\xff', protocol=0) "S'\\xff'\np0\n." >>> pickle.dumps('\xff', protocol=1) 'U\x01\xffq\x00.' >>> pickle.dumps('\xff', protocol=2) '\x80\x02U\x01\xffq\x00.' $ python3 >>> from zodbpickle import pickle >>> pickle.loads(b"S'\\xff'\np0\n.", encoding='bytes') b'\xff' >>> pickle.loads(b'U\x01\xffq\x00.', encoding='bytes') b'\xff' >>> pickle.loads(b'\x80\x02U\x01\xffq\x00.', encoding='bytes') b'\xff' Example 2: Loading Python 3 pickles on Python 2 :: $ python3 >>> from zodbpickle import pickle >>> pickle.dumps(b"\xff", protocol=0) b'c_codecs\nencode\np0\n(V\xff\np1\nVlatin1\np2\ntp3\nRp4\n.' >>> pickle.dumps(b"\xff", protocol=1) b'c_codecs\nencode\nq\x00(X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02tq\x03Rq\x04.' >>> pickle.dumps(b"\xff", protocol=2) b'\x80\x02c_codecs\nencode\nq\x00X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02\x86q\x03Rq\x04.' $ python2 >>> import pickle >>> pickle.loads('c_codecs\nencode\np0\n(V\xff\np1\nVlatin1\np2\ntp3\nRp4\n.') '\xff' >>> pickle.loads('c_codecs\nencode\nq\x00(X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02tq\x03Rq\x04.') '\xff' >>> pickle.loads('\x80\x02c_codecs\nencode\nq\x00X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02\x86q\x03Rq\x04.') '\xff' Example 3: everything breaks down :: $ python2 >>> class Foo(object): ... def __init__(self): ... self.x = 'hello' ... >>> import pickle >>> pickle.dumps(Foo(), protocol=0) "ccopy_reg\n_reconstructor\np0\n(c__main__\nFoo\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'x'\np6\nS'hello'\np7\nsb." >>> pickle.dumps(Foo(), protocol=1) 'ccopy_reg\n_reconstructor\nq\x00(c__main__\nFoo\nq\x01c__builtin__\nobject\nq\x02Ntq\x03Rq\x04}q\x05U\x01xq\x06U\x05helloq\x07sb.' >>> pickle.dumps(Foo(), protocol=2) '\x80\x02c__main__\nFoo\nq\x00)\x81q\x01}q\x02U\x01xq\x03U\x05helloq\x04sb.' $ python3 >>> from zodbpickle import pickle >>> class Foo(object): pass ... >>> foo = pickle.loads("ccopy_reg\n_reconstructor\np0\n(c__main__\nFoo\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'x'\np6\nS'hello'\np7\nsb.", encoding='bytes') >>> foo.x Traceback (most recent call last): File "", line 1, in AttributeError: 'Foo' object has no attribute 'x' wait what? :: >>> foo.__dict__ {b'x': b'hello'} oooh. So we use ``encoding='ASCII'`` (the default) and ``errors='bytes'`` and hope it works:: >>> foo = pickle.loads("ccopy_reg\n_reconstructor\np0\n(c__main__\nFoo\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'x'\np6\nS'hello'\np7\nsb.", errors='bytes') >>> foo.x 'hello' falling back to bytes if necessary :: >>> pickle.loads(b'\x80\x02U\x01\xffq\x00.', errors='bytes') b'\xff' Support for ``noload()`` ------------------------ The ZODB uses `cPickle`'s ``noload()`` method to retrieve all persistent references from a pickle without loading any objects. This feature was removed from Python 3's pickle. Unfortuantely, this unnecessarily fills the pickle cache. This module provides a ``noload()`` method again. =========== Changelog =========== 4.2 (2025-02-12) ================ - Drop support for Python 3.8. - Add preliminary support for Python 3.14 as of 3.14a4. - Remove unused ``setuptools`` install requirement. 4.1.1 (2024-10-02) ================== - Fix ``NameError`` which occurred when importing ``zodbpickle.fastpickle``. 4.1 (2024-09-17) ================ - Add final support for Python 3.13. 4.0 (2024-05-30) ================ - Drop support for Python 3.7. 3.3 (2024-04-16) ================ - Build Windows wheels on GHA. - Add preliminary support for Python 3.13 as of 3.13a5. 3.2 (2024-02-16) ================ - Add preliminary support for Python 3.13 as of 3.13a3. 3.1 (2023-10-05) ================ - Add support for Python 3.12. 3.0.1 (2023-03-28) ================== - Fix ``NameError`` in ``.fastpickle`` and ``.slowpickle``. 3.0 (2023-03-24) ================ - Build Linux binary wheels for Python 3.11. - Add preliminary support for Python 3.12a5. - Drop support for Python 2.7, 3.5, 3.6. - Drop support for deprecated ``python setup.py test``. 2.6 (2022-11-17) ================ - Add support for building arm64 wheels on macOS. 2.5 (2022-11-03) ================ - Add support for the final Python 3.11 release. 2.4 (2022-09-15) ================ - Add support for Python 3.11 (as of 3.11.0b3). - Disable unsafe math optimizations in C code. See `pull request 73 `_. 2.3 (2022-04-22) ================ - Add support for Python 3.11 (as of 3.11.0a7). 2.2.0 (2021-09-29) ================== - Add support for Python 3.10. 2.1.0 (2021-09-24) ================== - Add support for Python 3.9. 2.0.0 (2019-11-13) ================== - CPython 2: Make ``zodbpickle.binary`` objects smaller and untracked by the garbage collector. Now they behave more like the native bytes object. Just like it, and just like on Python 3, they cannot have arbitrary attributes or be weakly referenced. See `issue 53 `_. 1.1 (2019-11-09) ================ - Add support for Python 3.8. - Drop support for Python 3.4. 1.0.4 (2019-06-12) ================== - Fix pickle corruption under certain conditions. See `pull request 47 `_. 1.0.3 (2018-12-18) ================== - Fix a bug: zodbpickle.slowpickle assigned `_Pickler` to `Unpickler`. 1.0.2 (2018-08-10) ================== - Add support for Python 3.7. 1.0.1 (2018-05-16) ================== - Fix a memory leak in pickle protocol 3 under Python 2. See `issue 36 `_. 1.0 (2018-02-09) ================ - Add a warning to the readme not to use untrusted pickles. - Drop support for Python 3.3. 0.7.0 (2017-09-22) ================== - Drop support for Python 2.6 and 3.2. - Add support for Jython 2.7. - Add support for Python 3.5 and 3.6. 0.6.0 (2015-04-02) ================== - Restore the ``noload`` behaviour from Python 2.6 and provide the ``noload`` method on the non-C-accelerated unpicklers under PyPy and Python 2. - Add support for PyPy, PyPy3, and Python 3.4. 0.5.2 (2013-08-17) ================== - Import accelerator from *our* extension module under Py3k. See https://github.com/zopefoundation/zodbpickle/issues/6, https://github.com/zopefoundation/zodbpickle/issues/7. - Fix unpickler's ``load_short_binstring`` across supported platforms. 0.5.1 (2013-07-06) ================== - Update all code and tests to Python 2.6.8, 2.7.5, 3.2.5, 3.3.2 . - Add the modules ``zodbpickle.fastpickle`` and ``zodbpickle.slowpickle``. This provides a version-independent choice of the C or Python implementation. - Fix a minor bug on OS X 0.5.0 (2013-06-14) ================== - Removed support for the ``bytes_as_strings`` arguments to pickling APIs: the pickles created when that argument was true might not be unpickled without passing ``encoding='bytes'``, which ZODB couldn't reliably enforce. On Py3k, ZODB will be using ``protocol=3`` pickles anyway. 0.4.4 (2013-06-07) ================== - Add protocol 3 opcodes to the C version of the ``noload()`` dispatcher. 0.4.3 (2013-06-07) ================== - Packaging error: remove spurious ``-ASIDE`` file from sdist. 0.4.2 (2013-06-07) ================== - Fix NameError in pure-Python version of ``Unpickler.noload_appends``. - Fix NameError in pure-Python version of ``Unpickler.noload_setitems``. 0.4.1 (2013-04-29) ================== - Fix typo in Python2 version of ``zodbpickle.pickle`` module. 0.4 (2013-04-28) ================ - Support the common pickle module interface for Python 2.6, 2.7, 3.2, and 3.3. - Split the Python implementations / tests into Python2- and Py3k-specific variants. - Added a fork of the Python 2.7 ``_pickle.c``, for use under Python2. The fork adds support for the Py3k ``protocol 3`` opcodes. - Added a custom ``binary`` type for use in Python2 apps. Derived from ``bytes``, the ``binary`` type allows Python2 apps to pickle binary data using opcodes which will cause it to be unpickled as ``bytes`` on Py3k. Under Py3k, the ``binary`` type is just an alias for ``bytes``. 0.3 (2013-03-18) ================ - Added ``noload`` code to Python 3.2 version of ``Unpickler``. As with the Python 3.3 version, this code remains untested. - Added ``bytes_as_strings`` option to the Python 3.2 version of ``Pickler``, ``dump``, and ``dumps``. 0.2 (2013-03-05) ================ - Added ``bytes_as_strings`` option to ``Pickler``, ``dump``, and ``dumps``. - Incomplete support for Python 3.2: - Move ``_pickle.c`` -> ``_pickle_33.c``. - Clone Python 3.2.3's ``_pickle.c`` -> ``_pickle_32.c`` and apply the same patch. - Choose between them at build time based on ``sys.version_info``. - Disable some tests of 3.3-only features. - Missing: implementation of ``noload()`` in ``_pickle_32.c``. - Missing: implementation of ``bytes_as_strings=True`` in ``_pickle_32.c``. 0.1.0 (2013-02-27) ================== - Initial release of Python 3.3's pickle with the patches of Python `issue 6784`__ applied. .. __: http://bugs.python.org/issue6784#msg156166 - Added support for ``errors="bytes"``. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/README.rst0000644000076600000240000001343214753071602014707 0ustar00m.howitzstaff``zodbpickle`` README ===================== .. image:: https://github.com/zopefoundation/zodbpickle/actions/workflows/tests.yml/badge.svg :target: https://github.com/zopefoundation/zodbpickle/actions/workflows/tests.yml .. image:: https://coveralls.io/repos/github/zopefoundation/zodbpickle/badge.svg :target: https://coveralls.io/github/zopefoundation/zodbpickle :alt: Coverage status .. image:: https://img.shields.io/pypi/v/zodbpickle.svg :target: https://pypi.python.org/pypi/zodbpickle :alt: PyPI .. image:: https://img.shields.io/pypi/pyversions/zodbpickle.svg :target: https://pypi.python.org/pypi/zodbpickle :alt: Python versions This package presents a uniform pickling interface for ZODB: - Under Python2, this package forks both Python 2.7's ``pickle`` and ``cPickle`` modules, adding support for the ``protocol 3`` opcodes. It also provides a new subclass of ``bytes``, ``zodbpickle.binary``, which Python2 applications can use to pickle binary values such that they will be unpickled as ``bytes`` under Py3k. - Under Py3k, this package forks the ``pickle`` module (and the supporting C extension) from both Python 3.2 and Python 3.3. The fork add support for the ``noload`` operations used by ZODB. Caution ------- ``zodbpickle`` relies on Python's ``pickle`` module. The ``pickle`` module is not intended to be secure against erroneous or maliciously constructed data. Never unpickle data received from an untrusted or unauthenticated source as arbitrary code might be executed. Also see https://docs.python.org/3.6/library/pickle.html General Usage ------------- To get compatibility between Python 2 and 3 pickling, replace:: import pickle by:: from zodbpickle import pickle This provides compatibility, but has the effect that you get the fast implementation in Python 3, while Python 2 uses the slow version. To get a more deterministic choice of the implementation, use one of:: from zodbpickle import fastpickle # always C from zodbpickle import slowpickle # always Python Both modules can co-exist which is helpful for comparison. But there is a bit more to consider, so please read on! Loading/Storing Python 2 Strings -------------------------------- In all their wisdom, the Python developers have decided that Python 2 ``str`` instances should be loaded as Python 3 ``str`` objects (i.e. unicode strings). Patches were proposed in Python `issue 6784`__ but were never applied. This code base contains those patches. .. __: http://bugs.python.org/issue6784 Example 1: Loading Python 2 pickles on Python 3 :: $ python2 >>> import pickle >>> pickle.dumps('\xff', protocol=0) "S'\\xff'\np0\n." >>> pickle.dumps('\xff', protocol=1) 'U\x01\xffq\x00.' >>> pickle.dumps('\xff', protocol=2) '\x80\x02U\x01\xffq\x00.' $ python3 >>> from zodbpickle import pickle >>> pickle.loads(b"S'\\xff'\np0\n.", encoding='bytes') b'\xff' >>> pickle.loads(b'U\x01\xffq\x00.', encoding='bytes') b'\xff' >>> pickle.loads(b'\x80\x02U\x01\xffq\x00.', encoding='bytes') b'\xff' Example 2: Loading Python 3 pickles on Python 2 :: $ python3 >>> from zodbpickle import pickle >>> pickle.dumps(b"\xff", protocol=0) b'c_codecs\nencode\np0\n(V\xff\np1\nVlatin1\np2\ntp3\nRp4\n.' >>> pickle.dumps(b"\xff", protocol=1) b'c_codecs\nencode\nq\x00(X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02tq\x03Rq\x04.' >>> pickle.dumps(b"\xff", protocol=2) b'\x80\x02c_codecs\nencode\nq\x00X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02\x86q\x03Rq\x04.' $ python2 >>> import pickle >>> pickle.loads('c_codecs\nencode\np0\n(V\xff\np1\nVlatin1\np2\ntp3\nRp4\n.') '\xff' >>> pickle.loads('c_codecs\nencode\nq\x00(X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02tq\x03Rq\x04.') '\xff' >>> pickle.loads('\x80\x02c_codecs\nencode\nq\x00X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02\x86q\x03Rq\x04.') '\xff' Example 3: everything breaks down :: $ python2 >>> class Foo(object): ... def __init__(self): ... self.x = 'hello' ... >>> import pickle >>> pickle.dumps(Foo(), protocol=0) "ccopy_reg\n_reconstructor\np0\n(c__main__\nFoo\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'x'\np6\nS'hello'\np7\nsb." >>> pickle.dumps(Foo(), protocol=1) 'ccopy_reg\n_reconstructor\nq\x00(c__main__\nFoo\nq\x01c__builtin__\nobject\nq\x02Ntq\x03Rq\x04}q\x05U\x01xq\x06U\x05helloq\x07sb.' >>> pickle.dumps(Foo(), protocol=2) '\x80\x02c__main__\nFoo\nq\x00)\x81q\x01}q\x02U\x01xq\x03U\x05helloq\x04sb.' $ python3 >>> from zodbpickle import pickle >>> class Foo(object): pass ... >>> foo = pickle.loads("ccopy_reg\n_reconstructor\np0\n(c__main__\nFoo\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'x'\np6\nS'hello'\np7\nsb.", encoding='bytes') >>> foo.x Traceback (most recent call last): File "", line 1, in AttributeError: 'Foo' object has no attribute 'x' wait what? :: >>> foo.__dict__ {b'x': b'hello'} oooh. So we use ``encoding='ASCII'`` (the default) and ``errors='bytes'`` and hope it works:: >>> foo = pickle.loads("ccopy_reg\n_reconstructor\np0\n(c__main__\nFoo\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'x'\np6\nS'hello'\np7\nsb.", errors='bytes') >>> foo.x 'hello' falling back to bytes if necessary :: >>> pickle.loads(b'\x80\x02U\x01\xffq\x00.', errors='bytes') b'\xff' Support for ``noload()`` ------------------------ The ZODB uses `cPickle`'s ``noload()`` method to retrieve all persistent references from a pickle without loading any objects. This feature was removed from Python 3's pickle. Unfortuantely, this unnecessarily fills the pickle cache. This module provides a ``noload()`` method again. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/buildout.cfg0000644000076600000240000000030414753071602015522 0ustar00m.howitzstaff[buildout] develop = . parts = test scripts [test] recipe = zc.recipe.testrunner eggs = zodbpickle [test] [scripts] recipe = zc.recipe.egg eggs = zodbpickle [test] interpreter = py ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1739355010.8921158 zodbpickle-4.2/docs/0000755000076600000240000000000014753071603014146 5ustar00m.howitzstaff././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/docs/Makefile0000644000076600000240000000117214753071602015606 0ustar00m.howitzstaff# Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/docs/conf.py0000644000076600000240000000240514753071602015445 0ustar00m.howitzstaff# Configuration file for the Sphinx documentation builder. # # For the full list of built-in configuration values, see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = 'zodbpickle' copyright = '2013-2024, ZODB Developers ' author = 'ZODB Developers' version = '4.0' release = '4.0' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration extensions = [ 'sphinx.ext.intersphinx', ] templates_path = ['_templates'] exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = 'alabaster' html_static_path = ['_static'] # -- Options for intersphinx extension --------------------------------------- # https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#configuration intersphinx_mapping = { 'python': ('https://docs.python.org/3', None), } ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1739355010.892246 zodbpickle-4.2/docs/historical/0000755000076600000240000000000014753071603016307 5ustar00m.howitzstaff././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/docs/historical/proposal.rst0000644000076600000240000001765214753071602020712 0ustar00m.howitzstaffProposal for ZODB pickle compatibility ====================================== Issues ------ - There exists no forward-compatible way to pickle bytes on Python2 (Py3k pickle module "guesses", decoding any Python2 ``str`` using ``latin1``). - Some data pickled as ``str`` on Python2 truly is binary (e.g., ``Pdata`` objects for Zope2's ``OFS.Image.File`` and ``OFS.Image.Image`` types; crypto hases?) - Some Python2 applications may have the same attribute for a given class stored both as ``str`` and as ``unicode`` (due e.g., to bugs in the code, literal defaults, browser quirks, changes to code over time). Scenarios --------- .. _py2_forever: Existing Python2-only Application +++++++++++++++++++++++++++++++++ - Code for the app is never(ish) going to migrate to Py3k. - Using an updated / supported ZODb package **must** be possible - Ideally, requires no changes to application code. - Ideally, requies no database fixup / conversion. - Best strategy is likely ignore_compat_. .. _py3k_only: New, Py3k-only Application ++++++++++++++++++++++++++ - Code for the app will run only on Py3k. - Running with the latest-and-greatest ZODB **must** be possible. - Ideally, the code for the app will make no concessions to backward- compatibility. - Best strategy is likely ignore_compat_. .. _migrate_w_convert: Python2 Application Migrating to Py3k +++++++++++++++++++++++++++++++++++++ - Application code "straddles" both Pythons using "compatible subset" dialect, but only during the migration period. - During that period, code **must** be able to open the database from both Python2 and Py3k. - Ideally, application code will need to make no concessions to backward-compatibility after migration. - It is acceptable to run a conversion process which normalizes all active records in the database prior to testing. - For databases which are already "binary clean" (binary data exists only in blobs; the application creates no new non-blob binary attributes), the best strategy is likely ignore_compat_. - For databases which are not already "binary clean" (there may be non-blob binary attributes), the best strategy is likely to convert_storages_, followed by replace_py2_cpickle_ (if the Python2 client might create new non-blob binary attributes). - wrap_storages_ (on the Python2 side) might be simpler than replace_py2_cpickle_, if the sources of non-blob binary attributes are well understood. .. _straddle_w_convert: Python2 Application Straddling Python2 / Py3k (1) +++++++++++++++++++++++++++++++++++++++++++++++++ - Application code "straddles" both Pythons using "compatible subset" dialect. - Code **must** be able to open the database from both Python2 and Py3k. - It is acceptable to run a conversion process which normalizes all active records in the database prior to testing. - For databases which are already "binary clean" (binary data exists only in blobs; the application creates no new non-blob binary attributes), the best strategy is likely ignore_compat_. - For databases which are not already "binary clean" (there may be non-blob binary attributes), the best strategy is likely to convert_storages_, followed by replace_py2_cpickle_ (if the Python2 client might create new non-blob binary attributes). - For cases where Python2 and Py3k clients may share the database for an extended period, and where disruption to the Python2 clients must be minimized, the replace_py3k_pickle_ strategy might be preferred, until convert_storages_ becomes feasible. .. _straddle_no_convert: Python2 Application Migrating to Py3k (2) +++++++++++++++++++++++++++++++++++++++++ - Application code "straddles" both Pythons using "compatible subset" dialect. - Code **must** be able to open the database from both Python2 and Py3k. - It is **not** acceptable to run a conversion process which normalizes all active records in the database prior to testing (e.g., the database is too large to convert on existing hardware, or the downtime required for conversion is unacceptable). - Because disruption to the Python2 clients must be minimized, the best strategy is likely replace_py3k_pickle_ until convert_storages_ becomes feasible. - Alternatively, wrap_storages_ might be the best strategy for the Py3k clients. Strategies ---------- .. _ignore_compat: Ignore compatibility ++++++++++++++++++++ Use the stdlib pickle support in its default mode. - No changes to the ``ZODB`` packages on Python2 or Py3k. - Pickles created under Python2 will be readable on Py3k; however, *all* bytes data will be coerced (via ``latin1``) to unicode. - Pickles created under Py3k will likely not be readable on Python2 (Python2 has no support for ``protocol 3``). - Easiest usage for applications which are never going to straddle. - Compatibility will only be achievalble via one-time conversions (where the conversion script uses one of the other strategies or tools). .. _replace_py3k_pickle: Replace Py3k ``pickle`` +++++++++++++++++++++++ Keep pickling in the Python2 / protocol 1 way we have always done. - No changes to the ``ZODB`` packages on Python2. Storages do not need to be configured with any custom pickle support. - On Py3k, ``ZODB`` uses pickler / unpickler from the ``zodbpickle`` module, such that Python2 ``str`` objects are unpickled as ``bytes``; ``bytes`` are pickled using the ``protocol 1`` opcodes (so that Python2 will unpickle them as ``str``). .. _replace_py2_cPickle: Replace Python2 ``cPickle`` +++++++++++++++++++++++++++ Move to pickling in the new protocol 3 way (native under Py3k). - On Python2, applications which need to ensure that ``bytes`` objects unpickle correctly under Py3k need must be changed to use a new type, ``zodbpickle,binary``. ``ZODB`` is configured with pickler / upickler from ``zodbpickle``, such that objects of this type will be pickled using the ``protocol 3`` opcodes for bytes (so that Py3k will unpickle them as ``bytes``). - Existing data for the affected classes will need to be fixed up using a variation of convert_storages_. - No changes to the ``ZODB`` packages on Py3k. Storages do not need to be configured with any custom pickle support. .. _convert_storages: Convert Database Storages +++++++++++++++++++++++++ - Need tool(s) to identify problematic data: - Classes which mix ``str`` and ``unicode`` values for the same attribute across records / instances. - Utility which can apply per-class transforms to state pickles: - E.g., for instances of ``OFS.Image.Pdata``, convert the ``data`` attribute (which should be a Python2 ``str``) to ``zodbpickle.binary``. (Of course, these would probably be better off written out as blobs). - Or, for some application which mixes ``str`` and ``unicode`` under Python2 (either across instances or across transaction): upconvert any value of type ``str`` for the given attribute(s) to ``unicode``, using a configured encoding strategy (e.g, try ``utf8`` first, falling back to ``latin1``). - One-time converter utility would use ``copyTransactionsFrom``-style pattern, opening the existing database readonly, getting pickles for each transaction, invoking the converter utility for each instance to fix up the pickle, then writing the converted pickles into the new database. .. _wrap_storages: Wrap Database Storages ++++++++++++++++++++++ - A wrapper storage uses the converter utility (identified above) during the ``load`` operation, fixing up the object state it is handed to the instance's ``__setstate__``. - During the ``save`` operation, the wrapper would fix up pickled instance state (after calling ``__getstate__``). - Wrappers might be applied under Python2 (e.g., for apps where the databse is already converted to ``protocol 3``) as an alternative to replace_py2_cpickle_. - Wrappers might be applied under Py3k (e.g., for apps where the databse is not already converted to ``protocol 3``) as an alternative to replace_py3k_pickle_.. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/docs/index.rst0000644000076600000240000000040114753071602016001 0ustar00m.howitzstaff:mod:`zodbpickle` documentation =============================== Historical ---------- .. toctree:: :maxdepth: 2 :caption: Contents: historical/proposal Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/docs/make.bat0000644000076600000240000000144014753071602015551 0ustar00m.howitzstaff@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.https://www.sphinx-doc.org/ exit /b 1 ) if "%1" == "" goto help %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1739355010.8927577 zodbpickle-4.2/patches/0000755000076600000240000000000014753071603014645 5ustar00m.howitzstaff././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/patches/pickle_bytes_code.diff0000644000076600000240000001054214753071602021147 0ustar00m.howitzstaffdiff -r 39ddcc5c7fb9 Lib/pickle.py --- a/Lib/pickle.py Sat Feb 25 19:26:39 2012 +0200 +++ b/Lib/pickle.py Sat Mar 17 17:03:09 2012 +0100 @@ -798,7 +798,8 @@ map the old Python 2.x names to the new names used in Python 3.x. The *encoding* and *errors* tell pickle how to decode 8-bit string instances pickled by Python 2.x; these default to 'ASCII' and - 'strict', respectively. + 'strict', respectively. *encoding* can be 'bytes' to read 8-bit string + instances as bytes objects. """ self.readline = file.readline self.read = file.read @@ -935,6 +936,12 @@ self.append(unpack('>d', self.read(8))[0]) dispatch[BINFLOAT[0]] = load_binfloat + def decode_string(self, value): + if self.encoding == "bytes": + return value + else: + return value.decode(self.encoding, self.errors) + def load_string(self): orig = self.readline() rep = orig[:-1] @@ -946,15 +953,13 @@ break else: raise ValueError("insecure string pickle: %r" % orig) - self.append(codecs.escape_decode(rep)[0] - .decode(self.encoding, self.errors)) + self.append(self.decode_string(codecs.escape_decode(rep)[0])) dispatch[STRING[0]] = load_string def load_binstring(self): len = mloads(b'i' + self.read(4)) data = self.read(len) - value = str(data, self.encoding, self.errors) - self.append(value) + self.append(self.decode_string(data)) dispatch[BINSTRING[0]] = load_binstring def load_binbytes(self): @@ -973,9 +978,8 @@ def load_short_binstring(self): len = ord(self.read(1)) - data = bytes(self.read(len)) - value = str(data, self.encoding, self.errors) - self.append(value) + data = self.read(len) + self.append(self.decode_string(data)) dispatch[SHORT_BINSTRING[0]] = load_short_binstring def load_short_binbytes(self): diff -r 39ddcc5c7fb9 Modules/_pickle.c --- a/Modules/_pickle.c Sat Feb 25 19:26:39 2012 +0200 +++ b/Modules/_pickle.c Sat Mar 17 17:03:09 2012 +0100 @@ -4108,6 +4108,18 @@ return 0; } +/* Returns a new reference */ +static PyObject * +decode_string(UnpicklerObject *self, PyObject *value) +{ + if (strcmp(self->encoding, "bytes") == 0) { + Py_INCREF(value); + return value; + } else { + return PyUnicode_FromEncodedObject(value, self->encoding, self->errors); + } +} + static int load_string(UnpicklerObject *self) { @@ -4150,7 +4162,8 @@ free(s); if (bytes == NULL) return -1; - str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors); + + str = decode_string(self, bytes); Py_DECREF(bytes); if (str == NULL) return -1; @@ -4214,7 +4227,7 @@ static int load_binstring(UnpicklerObject *self) { - PyObject *str; + PyObject *bytes, *str; Py_ssize_t x; char *s; @@ -4231,8 +4244,12 @@ if (_Unpickler_Read(self, &s, x) < 0) return -1; - /* Convert Python 2.x strings to unicode. */ - str = PyUnicode_Decode(s, x, self->encoding, self->errors); + bytes = PyBytes_FromStringAndSize(s, x); + if (bytes == NULL) + return -1; + + str = decode_string(self, bytes); + Py_DECREF(bytes); if (str == NULL) return -1; @@ -4243,7 +4260,7 @@ static int load_short_binstring(UnpicklerObject *self) { - PyObject *str; + PyObject *bytes, *str; Py_ssize_t x; char *s; @@ -4255,8 +4272,12 @@ if (_Unpickler_Read(self, &s, x) < 0) return -1; - /* Convert Python 2.x strings to unicode. */ - str = PyUnicode_Decode(s, x, self->encoding, self->errors); + bytes = PyBytes_FromStringAndSize(s, x); + if (bytes == NULL) + return -1; + + str = decode_string(self, bytes); + Py_DECREF(bytes); if (str == NULL) return -1; @@ -5580,7 +5601,8 @@ "map the old Python 2.x names to the new names used in Python 3.x. The\n" "*encoding* and *errors* tell pickle how to decode 8-bit string\n" "instances pickled by Python 2.x; these default to 'ASCII' and\n" -"'strict', respectively.\n"); +"'strict', respectively. *encoding* can be 'bytes' to read 8-bit string\n" +"instances as byte objects.\n"); static int Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/patches/pickle_bytes_tests.diff0000644000076600000240000001224714753071602021403 0ustar00m.howitzstaffdiff -r 39ddcc5c7fb9 Lib/test/pickletester.py --- a/Lib/test/pickletester.py Sat Feb 25 19:26:39 2012 +0200 +++ b/Lib/test/pickletester.py Sat Mar 17 17:03:45 2012 +0100 @@ -1189,6 +1189,59 @@ dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.' self.assertRaises(ValueError, self.loads, dumped) +class AbstractBytestrTests(unittest.TestCase): + def unpickleEqual(self, data, unpickled): + loaded = self.loads(data, encoding="bytes") + self.assertEqual(loaded, unpickled) + + def test_load_str_protocol_0(self): + """ Test str from protocol=0 + python 2: pickle.dumps('bytestring \x00\xa0', protocol=0) """ + self.unpickleEqual( + b"S'bytestring \\x00\\xa0'\np0\n.", + b'bytestring \x00\xa0') + + def test_load_str_protocol_1(self): + """ Test str from protocol=1 + python 2: pickle.dumps('bytestring \x00\xa0', protocol=1) """ + self.unpickleEqual( + b'U\rbytestring \x00\xa0q\x00.', + b'bytestring \x00\xa0') + + def test_load_str_protocol_2(self): + """ Test str from protocol=2 + python 2: pickle.dumps('bytestring \x00\xa0', protocol=2) """ + self.unpickleEqual( + b'\x80\x02U\rbytestring \x00\xa0q\x00.', + b'bytestring \x00\xa0') + + def test_load_unicode_protocol_0(self): + """ Test unicode with protocol=0 + python 2: pickle.dumps(u"\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440", protocol=0) """ + self.unpickleEqual( + b'V\\u041a\\u043e\\u043c\\u043f\\u044c\\u044e\\u0442\\u0435\\u0440\np0\n.', + '\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440') + + def test_load_unicode_protocol_1(self): + """ Test unicode with protocol=1 + python 2: pickle.dumps(u"\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440", protocol=1) """ + self.unpickleEqual( + b'X\x12\x00\x00\x00\xd0\x9a\xd0\xbe\xd0\xbc\xd0\xbf\xd1\x8c\xd1\x8e\xd1\x82\xd0\xb5\xd1\x80q\x00.', + '\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440') + + def test_load_unicode_protocol_2(self): + """ Test unicode with protocol=1 + python 2: pickle.dumps(u"\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440", protocol=2) """ + self.unpickleEqual( + b'\x80\x02X\x12\x00\x00\x00\xd0\x9a\xd0\xbe\xd0\xbc\xd0\xbf\xd1\x8c\xd1\x8e\xd1\x82\xd0\xb5\xd1\x80q\x00.', + '\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440') + + def test_load_long_str_protocol_1(self): + """ Test long str with protocol=1 + python 2: pickle.dumps('x'*300, protocol=1) """ + self.unpickleEqual( + b'T,\x01\x00\x00xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxq\x00.', + b'x'*300) class BigmemPickleTests(unittest.TestCase): diff -r 39ddcc5c7fb9 Lib/test/test_pickle.py --- a/Lib/test/test_pickle.py Sat Feb 25 19:26:39 2012 +0200 +++ b/Lib/test/test_pickle.py Sat Mar 17 17:03:45 2012 +0100 @@ -8,6 +8,7 @@ from test.pickletester import AbstractPersistentPicklerTests from test.pickletester import AbstractPicklerUnpicklerObjectTests from test.pickletester import BigmemPickleTests +from test.pickletester import AbstractBytestrTests try: import _pickle @@ -19,15 +20,13 @@ class PickleTests(AbstractPickleModuleTests): pass - -class PyPicklerTests(AbstractPickleTests): - +class PyPicklerBase: pickler = pickle._Pickler unpickler = pickle._Unpickler - def dumps(self, arg, proto=None): + def dumps(self, arg, proto=None, **kwds): f = io.BytesIO() - p = self.pickler(f, proto) + p = self.pickler(f, proto, **kwds) p.dump(arg) f.seek(0) return bytes(f.read()) @@ -37,6 +36,11 @@ u = self.unpickler(f, **kwds) return u.load() +class PyPicklerTests(PyPicklerBase, AbstractPickleTests): + pass + +class PyPicklerBytestrTests(PyPicklerBase, AbstractBytestrTests): + pass class InMemoryPickleTests(AbstractPickleTests, BigmemPickleTests): @@ -85,6 +89,10 @@ pickler = _pickle.Pickler unpickler = _pickle.Unpickler + class CPicklerBytestrTests(PyPicklerBytestrTests): + pickler = _pickle.Pickler + unpickler = _pickle.Unpickler + class CPersPicklerTests(PyPersPicklerTests): pickler = _pickle.Pickler unpickler = _pickle.Unpickler @@ -103,9 +111,9 @@ def test_main(): - tests = [PickleTests, PyPicklerTests, PyPersPicklerTests] + tests = [PickleTests, PyPicklerTests, PyPersPicklerTests, PyPicklerBytestrTests] if has_c_implementation: - tests.extend([CPicklerTests, CPersPicklerTests, + tests.extend([CPicklerTests, CPicklerBytestrTests, CPersPicklerTests, CDumpPickle_LoadPickle, DumpPickle_CLoadPickle, PyPicklerUnpicklerObjectTests, CPicklerUnpicklerObjectTests, ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/patches/pickle_noload.patch0000644000076600000240000002612514753071602020476 0ustar00m.howitzstaff--- /tmp/Python-3.3.6/Modules/_pickle.c 2014-10-12 03:03:53.000000000 -0400 +++ src/zodbpickle/_pickle_33.c 2024-05-27 10:47:44.715349635 -0400 @@ -5476,6 +5641,475 @@ return load(self); } +/* No-load functions to support noload, which is used to + find persistent references. */ + +static int +noload_obj(UnpicklerObject *self) +{ + int i; + + if ((i = marker(self)) < 0) return -1; + return Pdata_clear(self->stack, i+1); +} + + +static int +noload_inst(UnpicklerObject *self) +{ + int i; + char *s; + + if ((i = marker(self)) < 0) return -1; + Pdata_clear(self->stack, i); + if (_Unpickler_Readline(self, &s) < 0) return -1; + if (_Unpickler_Readline(self, &s) < 0) return -1; + PDATA_APPEND(self->stack, Py_None, -1); + return 0; +} + +static int +noload_newobj(UnpicklerObject *self) +{ + PyObject *obj; + + PDATA_POP(self->stack, obj); /* pop argtuple */ + if (obj == NULL) return -1; + Py_DECREF(obj); + + PDATA_POP(self->stack, obj); /* pop cls */ + if (obj == NULL) return -1; + Py_DECREF(obj); + + PDATA_APPEND(self->stack, Py_None, -1); + return 0; +} + +static int +noload_global(UnpicklerObject *self) +{ + char *s; + + if (_Unpickler_Readline(self, &s) < 0) return -1; + if (_Unpickler_Readline(self, &s) < 0) return -1; + PDATA_APPEND(self->stack, Py_None,-1); + return 0; +} + +static int +noload_reduce(UnpicklerObject *self) +{ + + if (Py_SIZE(self->stack) < 2) return stack_underflow(); + Pdata_clear(self->stack, Py_SIZE(self->stack)-2); + PDATA_APPEND(self->stack, Py_None,-1); + return 0; +} + +static int +noload_build(UnpicklerObject *self) { + + if (Py_SIZE(self->stack) < 1) return stack_underflow(); + Pdata_clear(self->stack, Py_SIZE(self->stack)-1); + return 0; +} + +static int +noload_extension(UnpicklerObject *self, int nbytes) +{ + char *codebytes; + + assert(nbytes == 1 || nbytes == 2 || nbytes == 4); + if (_Unpickler_Read(self, &codebytes, nbytes) < 0) return -1; + PDATA_APPEND(self->stack, Py_None, -1); + return 0; +} + +static int +do_noload_append(UnpicklerObject *self, Py_ssize_t x) +{ + PyObject *list = 0; + Py_ssize_t len; + + len=Py_SIZE(self->stack); + if (!( len >= x && x > 0 )) return stack_underflow(); + /* nothing to do */ + if (len==x) return 0; + + list=self->stack->data[x-1]; + if (list == Py_None) { + return Pdata_clear(self->stack, x); + } + else { + return do_append(self, x); + } + +} + +static int +noload_append(UnpicklerObject *self) +{ + return do_noload_append(self, Py_SIZE(self->stack) - 1); +} + +static int +noload_appends(UnpicklerObject *self) +{ + return do_noload_append(self, marker(self)); +} + +static int +do_noload_setitems(UnpicklerObject *self, Py_ssize_t x) +{ + PyObject *dict = 0; + Py_ssize_t len; + + if (!( (len=Py_SIZE(self->stack)) >= x + && x > 0 )) return stack_underflow(); + + dict=self->stack->data[x-1]; + if (dict == Py_None) { + return Pdata_clear(self->stack, x); + } + else { + return do_setitems(self, x); + } +} + +static int +noload_setitem(UnpicklerObject *self) +{ + return do_noload_setitems(self, Py_SIZE(self->stack) - 2); +} + +static int +noload_setitems(UnpicklerObject *self) +{ + return do_noload_setitems(self, marker(self)); +} + +static PyObject * +noload(UnpicklerObject *self) +{ + PyObject *err = 0, *val = 0; + char *s; + + self->num_marks = 0; + Pdata_clear(self->stack, 0); + + while (1) { + if (_Unpickler_Read(self, &s, 1) < 0) + break; + + switch (s[0]) { + case NONE: + if (load_none(self) < 0) + break; + continue; + + case BININT: + if (load_binint(self) < 0) + break; + continue; + + case BININT1: + if (load_binint1(self) < 0) + break; + continue; + + case BININT2: + if (load_binint2(self) < 0) + break; + continue; + + case INT: + if (load_int(self) < 0) + break; + continue; + + case LONG: + if (load_long(self) < 0) + break; + continue; + + case LONG1: + if (load_counted_long(self, 1) < 0) + break; + continue; + + case LONG4: + if (load_counted_long(self, 4) < 0) + break; + continue; + + case FLOAT: + if (load_float(self) < 0) + break; + continue; + + case BINFLOAT: + if (load_binfloat(self) < 0) + break; + continue; + + case BINSTRING: + if (load_binstring(self) < 0) + break; + continue; + + case SHORT_BINSTRING: + if (load_short_binstring(self) < 0) + break; + continue; + + case STRING: + if (load_string(self) < 0) + break; + continue; + + case UNICODE: + if (load_unicode(self) < 0) + break; + continue; + + case BINUNICODE: + if (load_binunicode(self) < 0) + break; + continue; + + case EMPTY_TUPLE: + if (load_counted_tuple(self, 0) < 0) + break; + continue; + + case TUPLE1: + if (load_counted_tuple(self, 1) < 0) + break; + continue; + + case TUPLE2: + if (load_counted_tuple(self, 2) < 0) + break; + continue; + + case TUPLE3: + if (load_counted_tuple(self, 3) < 0) + break; + continue; + + case TUPLE: + if (load_tuple(self) < 0) + break; + continue; + + case EMPTY_LIST: + if (load_empty_list(self) < 0) + break; + continue; + + case LIST: + if (load_list(self) < 0) + break; + continue; + + case EMPTY_DICT: + if (load_empty_dict(self) < 0) + break; + continue; + + case DICT: + if (load_dict(self) < 0) + break; + continue; + + case OBJ: + if (noload_obj(self) < 0) + break; + continue; + + case INST: + if (noload_inst(self) < 0) + break; + continue; + + case NEWOBJ: + if (noload_newobj(self) < 0) + break; + continue; + + case GLOBAL: + if (noload_global(self) < 0) + break; + continue; + + case APPEND: + if (noload_append(self) < 0) + break; + continue; + + case APPENDS: + if (noload_appends(self) < 0) + break; + continue; + + case BUILD: + if (noload_build(self) < 0) + break; + continue; + + case DUP: + if (load_dup(self) < 0) + break; + continue; + + case BINGET: + if (load_binget(self) < 0) + break; + continue; + + case LONG_BINGET: + if (load_long_binget(self) < 0) + break; + continue; + + case GET: + if (load_get(self) < 0) + break; + continue; + + case EXT1: + if (noload_extension(self, 1) < 0) + break; + continue; + + case EXT2: + if (noload_extension(self, 2) < 0) + break; + continue; + + case EXT4: + if (noload_extension(self, 4) < 0) + break; + continue; + + case MARK: + if (load_mark(self) < 0) + break; + continue; + + case BINPUT: + if (load_binput(self) < 0) + break; + continue; + + case LONG_BINPUT: + if (load_long_binput(self) < 0) + break; + continue; + + case PUT: + if (load_put(self) < 0) + break; + continue; + + case POP: + if (load_pop(self) < 0) + break; + continue; + + case POP_MARK: + if (load_pop_mark(self) < 0) + break; + continue; + + case SETITEM: + if (noload_setitem(self) < 0) + break; + continue; + + case SETITEMS: + if (noload_setitems(self) < 0) + break; + continue; + + case STOP: + break; + + case PERSID: + if (load_persid(self) < 0) + break; + continue; + + case BINPERSID: + if (load_binpersid(self) < 0) + break; + continue; + + case REDUCE: + if (noload_reduce(self) < 0) + break; + continue; + + case PROTO: + if (load_proto(self) < 0) + break; + continue; + + case NEWTRUE: + if (load_bool(self, Py_True) < 0) + break; + continue; + + case NEWFALSE: + if (load_bool(self, Py_False) < 0) + break; + continue; + + case BINBYTES: + if (load_binbytes(self) < 0) + break; + continue; + + case SHORT_BINBYTES: + if (load_short_binbytes(self) < 0) + break; + continue; + + default: + PyErr_Format(UnpicklingError, + "invalid load key, '%c'.", s[0]); + return NULL; + } + + break; + } + + if ((err = PyErr_Occurred())) { + if (err == PyExc_EOFError) { + PyErr_SetNone(PyExc_EOFError); + } + return NULL; + } + + PDATA_POP(self->stack, val); + return val; +} + + +PyDoc_STRVAR(Unpickler_noload_doc, +"noload() -- not load a pickle, but go through most of the motions\n" +"\n" +"This function can be used to read past a pickle without instantiating\n" +"any objects or importing any modules. It can also be used to find all\n" +"persistent references without instantiating any objects or importing\n" +"any modules.\n"); + +static PyObject * +Unpickler_noload(UnpicklerObject *self, PyObject *unused) +{ + return noload(self); +} + /* The name of find_class() is misleading. In newer pickle protocols, this function is used for loading any global (i.e., functions), not just classes. The name is kept only for backward compatibility. */ @@ -5578,6 +6212,8 @@ static struct PyMethodDef Unpickler_methods[] = { {"load", (PyCFunction)Unpickler_load, METH_NOARGS, Unpickler_load_doc}, + {"noload", (PyCFunction)Unpickler_noload, METH_NOARGS, + Unpickler_noload_doc}, {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS, Unpickler_find_class_doc}, {NULL, NULL} /* sentinel */ ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/pyproject.toml0000644000076600000240000000130714753071602016132 0ustar00m.howitzstaff# # Generated from: # https://github.com/zopefoundation/meta/tree/master/config/c-code [build-system] requires = ["setuptools <= 75.6.0"] build-backend = "setuptools.build_meta" [tool.coverage.run] branch = true source = ["zodbpickle"] relative_files = true [tool.coverage.report] fail_under = 63 precision = 2 ignore_errors = true show_missing = true exclude_lines = ["pragma: no cover", "pragma: nocover", "except ImportError:", "raise NotImplementedError", "if __name__ == '__main__':", "self.fail", "raise AssertionError", "raise unittest.Skip"] [tool.coverage.html] directory = "parts/htmlcov" [tool.coverage.paths] source = ["src/", ".tox/*/lib/python*/site-packages/", ".tox/pypy*/site-packages/"] ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1739355010.8963592 zodbpickle-4.2/setup.cfg0000644000076600000240000000125514753071603015042 0ustar00m.howitzstaff[zest.releaser] create-wheel = no [flake8] doctests = 1 per-file-ignores = src/zodbpickle/fastpickle.py: F401 F403 src/zodbpickle/pickle.py: F401 F403 src/zodbpickle/pickle_3.py: E221 E225 E262 F401 F403 src/zodbpickle/pickletools_3.py: E221 src/zodbpickle/slowpickle.py: F401 F403 [check-manifest] ignore = .editorconfig .meta.toml docs/_build/html/_sources/* [isort] force_single_line = True combine_as_imports = True sections = FUTURE,STDLIB,THIRDPARTY,ZOPE,FIRSTPARTY,LOCALFOLDER known_third_party = docutils, pkg_resources, pytz known_zope = known_first_party = default_section = ZOPE line_length = 79 lines_after_imports = 2 [egg_info] tag_build = tag_date = 0 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/setup.py0000644000076600000240000000564114753071602014735 0ustar00m.howitzstaff############################################################################## # # Copyright (c) 2013 Zope Foundation and Contributors. # All Rights Reserved. # # This software is subject to the provisions of the Zope Public License, # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # FOR A PARTICULAR PURPOSE. # ############################################################################## """Setup""" import os import platform from setuptools import Extension from setuptools import find_packages from setuptools import setup here = os.path.abspath(os.path.dirname(__file__)) def read(fname): with open(os.path.join(here, fname)) as f: return f.read() README = read('README.rst') + '\n\n' + read('CHANGES.rst') EXT = 'src/zodbpickle/_pickle_33.c' # PyPy and jython won't build the extension. py_impl = getattr(platform, 'python_implementation', lambda: None) is_pypy = py_impl() == 'PyPy' is_jython = py_impl() == 'Jython' is_pure = int(os.environ.get('PURE_PYTHON', '0')) if is_pure or is_pypy or is_jython: ext_modules = [] else: ext_modules = [Extension(name='zodbpickle._pickle', sources=[EXT])] setup( name='zodbpickle', version='4.2', description='Fork of Python 3 pickle module.', author='Python and Zope Foundation', author_email='zodb-dev@zope.dev', url='https://github.com/zopefoundation/zodbpickle', license='PSFL 2 and ZPL-2.1', long_description=README, classifiers=[ 'Development Status :: 5 - Production/Stable', 'License :: OSI Approved :: Zope Public License', 'License :: OSI Approved :: Python Software Foundation License', 'Programming Language :: Python', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Framework :: ZODB', 'Topic :: Database', 'Topic :: Software Development :: Libraries :: Python Modules', 'Operating System :: Microsoft :: Windows', 'Operating System :: Unix', 'Operating System :: MacOS :: MacOS X', ], keywords='zodb pickle', platforms=['any'], packages=find_packages('src'), package_dir={'': 'src'}, ext_modules=ext_modules, python_requires='>=3.9', extras_require={ 'test': ['zope.testrunner'], 'docs': ['Sphinx'], }, include_package_data=True, zip_safe=False, ) ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1739355010.8893716 zodbpickle-4.2/src/0000755000076600000240000000000014753071603014005 5ustar00m.howitzstaff././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1739355010.8939433 zodbpickle-4.2/src/zodbpickle/0000755000076600000240000000000014753071603016133 5ustar00m.howitzstaff././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/__init__.py0000644000076600000240000000005414753071602020242 0ustar00m.howitzstaff__all__ = [ 'binary', ] binary = bytes ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/_pickle_33.c0000644000076600000240000062105514753071602020222 0ustar00m.howitzstaff#include "Python.h" #include "structmember.h" PyDoc_STRVAR(pickle_module_doc, "Optimized C implementation for the Python pickle module."); // Compatibility with Visual Studio 2013 and older which don't support // the inline keyword in C (only in C++): use __inline instead. #if (defined(_MSC_VER) && _MSC_VER < 1900 \ && !defined(__cplusplus) && !defined(inline)) # define PYCAPI_COMPAT_INLINE(TYPE static __inline TYPE #else # define PYCAPI_COMPAT_STATIC_INLINE(TYPE) static inline TYPE #endif #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_SIZE) /* 3.9.0a4 */ PYCAPI_COMPAT_STATIC_INLINE(void) _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { ob->ob_size = size; } #define Py_SET_SIZE(ob, size) _Py_SET_SIZE((PyVarObject*)(ob), size) #endif #if (PY_VERSION_HEX >= 0x30D00A5) /* 3.13.0a5 */ /** * In 3.13a5 ``_PyLong_AsByteArray`` got a new ``with_exceptions`` argument. * See https://github.com/python/cpython/commit/7861dfd26a41e40c2b4361eb0bb1356b9b4a064b */ static int _PyLong_AsByteArray_compat(PyLongObject* v, unsigned char* bytes, size_t n, int little_endian, int is_signed) { return _PyLong_AsByteArray( v, bytes, n, little_endian, is_signed, 1 /* with_exceptions */ ); } #define _PyLong_AsByteArray(v, bytes, n, little_endian, is_signed) _PyLong_AsByteArray_compat(v, bytes, n, little_endian, is_signed) #endif #if (PY_VERSION_HEX >= 0x30D00A1) /* 3.13.0a1 */ /** * In 3.13a1 ``_PyObject_LookupAttrId`` was removed. * See https://github.com/python/cpython/commit/579aa89e68a6607398317a50586af781981e89fb * See also the explanation for 3.10 below. */ static int _PyObject_HasAttrId(PyObject* obj, void* id) { int result; PyObject *oname = _PyUnicode_FromId(id); /* borrowed */ if (!oname) { return -1; } PyObject* attr_val; result = PyObject_GetOptionalAttr(obj, oname, &attr_val); Py_XDECREF(attr_val); return result; } #elif (PY_VERSION_HEX >= 0x30A00B1) /* 3.10.0b1 */ /** * The function ``_PyObject_LookupAttrId`` function replaces the combo of * ``_PyObject_HasAttrId`` followed by ``_PyObject_GetAttrId``; our code isn't * structured to take advantage of that, so for now we throw away the * resulting attribute value and continue to make the extra ``Get`` call. */ static int _PyObject_HasAttrId(PyObject* obj, void* id) { int result; PyObject* attr_val; result = _PyObject_LookupAttrId(obj, id, &attr_val); Py_XDECREF(attr_val); return result; } #endif /* * This declaration was moved to the internal API only accessible for building * CPython itself. But the implementation is still in `Objects/longobject.c` and * Pythons own `Modules/_pickle.c` still uses it. */ #if (PY_VERSION_HEX >= 0x30E00A4) /* >= 3.14.0a4 */ PyAPI_FUNC(int64_t) _PyLong_NumBits(PyObject *v); #elif (PY_VERSION_HEX >= 0x30D00A1) /* >= 3.13.0a1 */ PyAPI_FUNC(size_t) _PyLong_NumBits(PyObject *v); #endif #if (PY_VERSION_HEX < 0x30B00A7) /* 3.11.0a7 */ # define PyFloat_Pack8 _PyFloat_Pack8 # define PyFloat_Unpack8 _PyFloat_Unpack8 #endif /* Bump this when new opcodes are added to the pickle protocol. */ enum { HIGHEST_PROTOCOL = 3, DEFAULT_PROTOCOL = 3 }; /* Pickle opcodes. These must be kept updated with pickle.py. Extensive docs are in pickletools.py. */ enum opcode { MARK = '(', STOP = '.', POP = '0', POP_MARK = '1', DUP = '2', FLOAT = 'F', INT = 'I', BININT = 'J', BININT1 = 'K', LONG = 'L', BININT2 = 'M', NONE = 'N', PERSID = 'P', BINPERSID = 'Q', REDUCE = 'R', STRING = 'S', BINSTRING = 'T', SHORT_BINSTRING = 'U', UNICODE = 'V', BINUNICODE = 'X', APPEND = 'a', BUILD = 'b', GLOBAL = 'c', DICT = 'd', EMPTY_DICT = '}', APPENDS = 'e', GET = 'g', BINGET = 'h', INST = 'i', LONG_BINGET = 'j', LIST = 'l', EMPTY_LIST = ']', OBJ = 'o', PUT = 'p', BINPUT = 'q', LONG_BINPUT = 'r', SETITEM = 's', TUPLE = 't', EMPTY_TUPLE = ')', SETITEMS = 'u', BINFLOAT = 'G', /* Protocol 2. */ PROTO = '\x80', NEWOBJ = '\x81', EXT1 = '\x82', EXT2 = '\x83', EXT4 = '\x84', TUPLE1 = '\x85', TUPLE2 = '\x86', TUPLE3 = '\x87', NEWTRUE = '\x88', NEWFALSE = '\x89', LONG1 = '\x8a', LONG4 = '\x8b', /* Protocol 3 (Python 3.x) */ BINBYTES = 'B', SHORT_BINBYTES = 'C' }; /* These aren't opcodes -- they're ways to pickle bools before protocol 2 * so that unpicklers written before bools were introduced unpickle them * as ints, but unpicklers after can recognize that bools were intended. * Note that protocol 2 added direct ways to pickle bools. */ #undef TRUE #define TRUE "I01\n" #undef FALSE #define FALSE "I00\n" enum { /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will break if this gets out of synch with pickle.py, but it's unclear that would help anything either. */ BATCHSIZE = 1000, /* Nesting limit until Pickler, when running in "fast mode", starts checking for self-referential data-structures. */ FAST_NESTING_LIMIT = 50, /* Initial size of the write buffer of Pickler. */ WRITE_BUF_SIZE = 4096, /* Maximum size of the write buffer of Pickler when pickling to a stream. This is ignored for in-memory pickling. */ MAX_WRITE_BUF_SIZE = 64 * 1024, /* Prefetch size when unpickling (disabled on unpeekable streams) */ PREFETCH = 8192 * 16 }; /* Exception classes for pickle. These should override the ones defined in pickle.py, when the C-optimized Pickler and Unpickler are used. */ static PyObject *PickleError = NULL; static PyObject *PicklingError = NULL; static PyObject *UnpicklingError = NULL; /* copyreg.dispatch_table, {type_object: pickling_function} */ static PyObject *dispatch_table = NULL; /* For EXT[124] opcodes. */ /* copyreg._extension_registry, {(module_name, function_name): code} */ static PyObject *extension_registry = NULL; /* copyreg._inverted_registry, {code: (module_name, function_name)} */ static PyObject *inverted_registry = NULL; /* copyreg._extension_cache, {code: object} */ static PyObject *extension_cache = NULL; /* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */ static PyObject *name_mapping_2to3 = NULL; /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */ static PyObject *import_mapping_2to3 = NULL; /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */ static PyObject *name_mapping_3to2 = NULL; static PyObject *import_mapping_3to2 = NULL; /* XXX: Are these really nescessary? */ /* As the name says, an empty tuple. */ static PyObject *empty_tuple = NULL; /* For looking up name pairs in copyreg._extension_registry. */ static PyObject *two_tuple = NULL; static int stack_underflow(void) { PyErr_SetString(UnpicklingError, "unpickling stack underflow"); return -1; } /* Internal data type used as the unpickling stack. */ typedef struct { PyObject_VAR_HEAD PyObject **data; Py_ssize_t allocated; /* number of slots in data allocated */ } Pdata; static void Pdata_dealloc(Pdata *self) { Py_ssize_t i = Py_SIZE(self); while (--i >= 0) { Py_DECREF(self->data[i]); } PyMem_FREE(self->data); PyObject_Del(self); } static PyTypeObject Pdata_Type = { PyVarObject_HEAD_INIT(NULL, 0) "_pickle.Pdata", /*tp_name*/ sizeof(Pdata), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Pdata_dealloc, /*tp_dealloc*/ }; static PyObject * Pdata_New(void) { Pdata *self; if (!(self = PyObject_New(Pdata, &Pdata_Type))) return NULL; Py_SET_SIZE(self, 0); self->allocated = 8; self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *)); if (self->data) return (PyObject *)self; Py_DECREF(self); return PyErr_NoMemory(); } /* Retain only the initial clearto items. If clearto >= the current * number of items, this is a (non-erroneous) NOP. */ static int Pdata_clear(Pdata *self, Py_ssize_t clearto) { Py_ssize_t i = Py_SIZE(self); if (clearto < 0) return stack_underflow(); if (clearto >= i) return 0; while (--i >= clearto) { Py_CLEAR(self->data[i]); } Py_SET_SIZE(self, clearto); return 0; } static int Pdata_grow(Pdata *self) { PyObject **data = self->data; Py_ssize_t allocated = self->allocated; Py_ssize_t new_allocated; new_allocated = (allocated >> 3) + 6; /* check for integer overflow */ if (new_allocated > PY_SSIZE_T_MAX - allocated) goto nomemory; new_allocated += allocated; if ((size_t)new_allocated > ((size_t)PY_SSIZE_T_MAX / sizeof(PyObject *))) goto nomemory; data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *)); if (data == NULL) goto nomemory; self->data = data; self->allocated = new_allocated; return 0; nomemory: PyErr_NoMemory(); return -1; } /* D is a Pdata*. Pop the topmost element and store it into V, which * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError * is raised and V is set to NULL. */ static PyObject * Pdata_pop(Pdata *self) { if (Py_SIZE(self) == 0) { PyErr_SetString(UnpicklingError, "bad pickle data"); return NULL; } Py_SET_SIZE(self, Py_SIZE(self) - 1); return self->data[Py_SIZE(self)]; } #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0) static int Pdata_push(Pdata *self, PyObject *obj) { if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) { return -1; } self->data[Py_SIZE(self)] = obj; Py_SET_SIZE(self, Py_SIZE(self) + 1); return 0; } /* Push an object on stack, transferring its ownership to the stack. */ #define PDATA_PUSH(D, O, ER) do { \ if (Pdata_push((D), (O)) < 0) return (ER); } while(0) /* Push an object on stack, adding a new reference to the object. */ #define PDATA_APPEND(D, O, ER) do { \ Py_INCREF((O)); \ if (Pdata_push((D), (O)) < 0) return (ER); } while(0) static PyObject * Pdata_poptuple(Pdata *self, Py_ssize_t start) { PyObject *tuple; Py_ssize_t len, i, j; len = Py_SIZE(self) - start; tuple = PyTuple_New(len); if (tuple == NULL) return NULL; for (i = start, j = 0; j < len; i++, j++) PyTuple_SET_ITEM(tuple, j, self->data[i]); Py_SET_SIZE(self, start); return tuple; } static PyObject * Pdata_poplist(Pdata *self, Py_ssize_t start) { PyObject *list; Py_ssize_t len, i, j; len = Py_SIZE(self) - start; list = PyList_New(len); if (list == NULL) return NULL; for (i = start, j = 0; j < len; i++, j++) PyList_SET_ITEM(list, j, self->data[i]); Py_SET_SIZE(self, start); return list; } typedef struct { PyObject *me_key; Py_ssize_t me_value; } PyMemoEntry; typedef struct { Py_ssize_t mt_mask; Py_ssize_t mt_used; Py_ssize_t mt_allocated; PyMemoEntry *mt_table; } PyMemoTable; typedef struct PicklerObject { PyObject_HEAD PyMemoTable *memo; /* Memo table, keep track of the seen objects to support self-referential objects pickling. */ PyObject *pers_func; /* persistent_id() method, can be NULL */ PyObject *dispatch_table; /* private dispatch_table, can be NULL */ PyObject *arg; PyObject *write; /* write() method of the output stream. */ PyObject *output_buffer; /* Write into a local bytearray buffer before flushing to the stream. */ Py_ssize_t output_len; /* Length of output_buffer. */ Py_ssize_t max_output_len; /* Allocation size of output_buffer. */ int proto; /* Pickle protocol number, >= 0 */ int bin; /* Boolean, true if proto > 0 */ Py_ssize_t buf_size; /* Size of the current buffered pickle data */ int fast; /* Enable fast mode if set to a true value. The fast mode disable the usage of memo, therefore speeding the pickling process by not generating superfluous PUT opcodes. It should not be used if with self-referential objects. */ int fast_nesting; int fix_imports; /* Indicate whether Pickler should fix the name of globals for Python 2.x. */ PyObject *fast_memo; } PicklerObject; typedef struct UnpicklerObject { PyObject_HEAD Pdata *stack; /* Pickle data stack, store unpickled objects. */ /* The unpickler memo is just an array of PyObject *s. Using a dict is unnecessary, since the keys are contiguous ints. */ PyObject **memo; Py_ssize_t memo_size; PyObject *arg; PyObject *pers_func; /* persistent_load() method, can be NULL. */ Py_buffer buffer; char *input_buffer; char *input_line; Py_ssize_t input_len; Py_ssize_t next_read_idx; Py_ssize_t prefetched_idx; /* index of first prefetched byte */ PyObject *read; /* read() method of the input stream. */ PyObject *readline; /* readline() method of the input stream. */ PyObject *peek; /* peek() method of the input stream, or NULL */ char *encoding; /* Name of the encoding to be used for decoding strings pickled using Python 2.x. The default value is "ASCII" */ char *errors; /* Name of errors handling scheme to used when decoding strings. The default value is "strict". */ Py_ssize_t *marks; /* Mark stack, used for unpickling container objects. */ Py_ssize_t num_marks; /* Number of marks in the mark stack. */ Py_ssize_t marks_size; /* Current allocated size of the mark stack. */ int proto; /* Protocol of the pickle loaded. */ int fix_imports; /* Indicate whether Unpickler should fix the name of globals pickled by Python 2.x. */ } UnpicklerObject; /* Forward declarations */ static int save(PicklerObject *, PyObject *, int); static int save_reduce(PicklerObject *, PyObject *, PyObject *); static PyTypeObject Pickler_Type; static PyTypeObject Unpickler_Type; /************************************************************************* A custom hashtable mapping void* to longs. This is used by the pickler for memoization. Using a custom hashtable rather than PyDict allows us to skip a bunch of unnecessary object creation. This makes a huge performance difference. */ #define MT_MINSIZE 8 #define PERTURB_SHIFT 5 static PyMemoTable * PyMemoTable_New(void) { PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable)); if (memo == NULL) { PyErr_NoMemory(); return NULL; } memo->mt_used = 0; memo->mt_allocated = MT_MINSIZE; memo->mt_mask = MT_MINSIZE - 1; memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry)); if (memo->mt_table == NULL) { PyMem_FREE(memo); PyErr_NoMemory(); return NULL; } memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry)); return memo; } static PyMemoTable * PyMemoTable_Copy(PyMemoTable *self) { Py_ssize_t i; PyMemoTable *new = PyMemoTable_New(); if (new == NULL) return NULL; new->mt_used = self->mt_used; new->mt_allocated = self->mt_allocated; new->mt_mask = self->mt_mask; /* The table we get from _New() is probably smaller than we wanted. Free it and allocate one that's the right size. */ PyMem_FREE(new->mt_table); new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry)); if (new->mt_table == NULL) { PyMem_FREE(new); return NULL; } for (i = 0; i < self->mt_allocated; i++) { Py_XINCREF(self->mt_table[i].me_key); } memcpy(new->mt_table, self->mt_table, sizeof(PyMemoEntry) * self->mt_allocated); return new; } static Py_ssize_t PyMemoTable_Size(PyMemoTable *self) { return self->mt_used; } static int PyMemoTable_Clear(PyMemoTable *self) { Py_ssize_t i = self->mt_allocated; while (--i >= 0) { Py_XDECREF(self->mt_table[i].me_key); } self->mt_used = 0; memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry)); return 0; } static void PyMemoTable_Del(PyMemoTable *self) { if (self == NULL) return; PyMemoTable_Clear(self); PyMem_FREE(self->mt_table); PyMem_FREE(self); } /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup() can be considerably simpler than dictobject.c's lookdict(). */ static PyMemoEntry * _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key) { size_t i; size_t perturb; size_t mask = (size_t)self->mt_mask; PyMemoEntry *table = self->mt_table; PyMemoEntry *entry; Py_hash_t hash = (Py_hash_t)key >> 3; i = hash & mask; entry = &table[i]; if (entry->me_key == NULL || entry->me_key == key) return entry; for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; entry = &table[i & mask]; if (entry->me_key == NULL || entry->me_key == key) return entry; } assert(0); /* Never reached */ return NULL; } /* Returns -1 on failure, 0 on success. */ static int _PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size) { PyMemoEntry *oldtable = NULL; PyMemoEntry *oldentry, *newentry; Py_ssize_t new_size = MT_MINSIZE; Py_ssize_t to_process; assert(min_size > 0); /* Find the smallest valid table size >= min_size. */ while (new_size < min_size && new_size > 0) new_size <<= 1; if (new_size <= 0) { PyErr_NoMemory(); return -1; } /* new_size needs to be a power of two. */ assert((new_size & (new_size - 1)) == 0); /* Allocate new table. */ oldtable = self->mt_table; self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry)); if (self->mt_table == NULL) { PyMem_FREE(oldtable); PyErr_NoMemory(); return -1; } self->mt_allocated = new_size; self->mt_mask = new_size - 1; memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size); /* Copy entries from the old table. */ to_process = self->mt_used; for (oldentry = oldtable; to_process > 0; oldentry++) { if (oldentry->me_key != NULL) { to_process--; /* newentry is a pointer to a chunk of the new mt_table, so we're setting the key:value pair in-place. */ newentry = _PyMemoTable_Lookup(self, oldentry->me_key); newentry->me_key = oldentry->me_key; newentry->me_value = oldentry->me_value; } } /* Deallocate the old table. */ PyMem_FREE(oldtable); return 0; } /* Returns NULL on failure, a pointer to the value otherwise. */ static Py_ssize_t * PyMemoTable_Get(PyMemoTable *self, PyObject *key) { PyMemoEntry *entry = _PyMemoTable_Lookup(self, key); if (entry->me_key == NULL) return NULL; return &entry->me_value; } /* Returns -1 on failure, 0 on success. */ static int PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value) { PyMemoEntry *entry; assert(key != NULL); entry = _PyMemoTable_Lookup(self, key); if (entry->me_key != NULL) { entry->me_value = value; return 0; } Py_INCREF(key); entry->me_key = key; entry->me_value = value; self->mt_used++; /* If we added a key, we can safely resize. Otherwise just return! * If used >= 2/3 size, adjust size. Normally, this quaduples the size. * * Quadrupling the size improves average table sparseness * (reducing collisions) at the cost of some memory. It also halves * the number of expensive resize operations in a growing memo table. * * Very large memo tables (over 50K items) use doubling instead. * This may help applications with severe memory constraints. */ if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2)) return 0; return _PyMemoTable_ResizeTable(self, (self->mt_used > 50000 ? 2 : 4) * self->mt_used); } #undef MT_MINSIZE #undef PERTURB_SHIFT /*************************************************************************/ /* Helpers for creating the argument tuple passed to functions. This has the performance advantage of calling PyTuple_New() only once. XXX(avassalotti): Inline directly in _Pickler_FastCall() and _Unpickler_FastCall(). */ #define ARG_TUP(self, obj) do { \ if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \ Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \ PyTuple_SET_ITEM((self)->arg, 0, (obj)); \ } \ else { \ Py_DECREF((obj)); \ } \ } while (0) #define FREE_ARG_TUP(self) do { \ if ((self)->arg->ob_refcnt > 1) \ Py_CLEAR((self)->arg); \ } while (0) /* A temporary cleaner API for fast single argument function call. XXX: Does caching the argument tuple provides any real performance benefits? A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then immediately DECREF it) and 1,200,000 calls when allocating brand new tuples (i.e, call PyTuple_New() and store the returned value in an array), to save one second (wall clock time). Either ways, the loading time a pickle stream large enough to generate this number of calls would be massively overwhelmed by other factors, like I/O throughput, the GC traversal and object allocation overhead. So, I really doubt these functions provide any real benefits. On the other hand, oprofile reports that pickle spends a lot of time in these functions. But, that is probably more related to the function call overhead, than the argument tuple allocation. XXX: And, what is the reference behavior of these? Steal, borrow? At first glance, it seems to steal the reference of 'arg' and borrow the reference of 'func'. */ static PyObject * _Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg) { PyObject *result = NULL; ARG_TUP(self, arg); if (self->arg) { result = PyObject_Call(func, self->arg, NULL); FREE_ARG_TUP(self); } return result; } static int _Pickler_ClearBuffer(PicklerObject *self) { Py_CLEAR(self->output_buffer); self->output_buffer = PyBytes_FromStringAndSize(NULL, self->max_output_len); if (self->output_buffer == NULL) return -1; self->output_len = 0; return 0; } static PyObject * _Pickler_GetString(PicklerObject *self) { PyObject *output_buffer = self->output_buffer; assert(self->output_buffer != NULL); self->output_buffer = NULL; /* Resize down to exact size */ if (_PyBytes_Resize(&output_buffer, self->output_len) < 0) return NULL; return output_buffer; } static int _Pickler_FlushToFile(PicklerObject *self) { PyObject *output, *result; assert(self->write != NULL); output = _Pickler_GetString(self); if (output == NULL) return -1; result = _Pickler_FastCall(self, self->write, output); Py_XDECREF(result); return (result == NULL) ? -1 : 0; } static Py_ssize_t _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n) { Py_ssize_t i, required; char *buffer; assert(s != NULL); required = self->output_len + n; if (required > self->max_output_len) { if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) { /* XXX This reallocates a new buffer every time, which is a bit wasteful. */ if (_Pickler_FlushToFile(self) < 0) return -1; if (_Pickler_ClearBuffer(self) < 0) return -1; } if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) { /* we already flushed above, so the buffer is empty */ PyObject *result; /* XXX we could spare an intermediate copy and pass a memoryview instead */ PyObject *output = PyBytes_FromStringAndSize(s, n); if (s == NULL) return -1; result = _Pickler_FastCall(self, self->write, output); Py_XDECREF(result); return (result == NULL) ? -1 : 0; } else { if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) { PyErr_NoMemory(); return -1; } self->max_output_len = (self->output_len + n) / 2 * 3 + 1; if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0) return -1; } } buffer = PyBytes_AS_STRING(self->output_buffer); if (n < 8) { /* This is faster than memcpy when the string is short. */ for (i = 0; i < n; i++) { buffer[self->output_len + i] = s[i]; } } else { memcpy(buffer + self->output_len, s, n); } self->output_len += n; return n; } static PicklerObject * _Pickler_New(void) { PicklerObject *self; self = PyObject_GC_New(PicklerObject, &Pickler_Type); if (self == NULL) return NULL; self->pers_func = NULL; self->dispatch_table = NULL; self->arg = NULL; self->write = NULL; self->proto = 0; self->bin = 0; self->fast = 0; self->fast_nesting = 0; self->fix_imports = 0; self->fast_memo = NULL; self->memo = PyMemoTable_New(); if (self->memo == NULL) { Py_DECREF(self); return NULL; } self->max_output_len = WRITE_BUF_SIZE; self->output_len = 0; self->output_buffer = PyBytes_FromStringAndSize(NULL, self->max_output_len); if (self->output_buffer == NULL) { Py_DECREF(self); return NULL; } return self; } static int _Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj, PyObject *fix_imports_obj) { long proto = 0; int fix_imports; if (proto_obj == NULL || proto_obj == Py_None) proto = DEFAULT_PROTOCOL; else { proto = PyLong_AsLong(proto_obj); if (proto == -1 && PyErr_Occurred()) return -1; } if (proto < 0) proto = HIGHEST_PROTOCOL; if (proto > HIGHEST_PROTOCOL) { PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d", HIGHEST_PROTOCOL); return -1; } fix_imports = PyObject_IsTrue(fix_imports_obj); if (fix_imports == -1) return -1; self->proto = proto; self->bin = proto > 0; self->fix_imports = fix_imports && proto < 3; return 0; } /* Returns -1 (with an exception set) on failure, 0 on success. This may be called once on a freshly created Pickler. */ static int _Pickler_SetOutputStream(PicklerObject *self, PyObject *file) { _Py_IDENTIFIER(write); assert(file != NULL); self->write = _PyObject_GetAttrId(file, &PyId_write); if (self->write == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_SetString(PyExc_TypeError, "file must have a 'write' attribute"); return -1; } return 0; } /* See documentation for _Pickler_FastCall(). */ static PyObject * _Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg) { PyObject *result = NULL; ARG_TUP(self, arg); if (self->arg) { result = PyObject_Call(func, self->arg, NULL); FREE_ARG_TUP(self); } return result; } /* Returns the size of the input on success, -1 on failure. This takes its own reference to `input`. */ static Py_ssize_t _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input) { if (self->buffer.buf != NULL) PyBuffer_Release(&self->buffer); if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0) return -1; self->input_buffer = self->buffer.buf; self->input_len = self->buffer.len; self->next_read_idx = 0; self->prefetched_idx = self->input_len; return self->input_len; } static int _Unpickler_SkipConsumed(UnpicklerObject *self) { Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx; if (consumed > 0) { PyObject *r; assert(self->peek); /* otherwise we did something wrong */ /* This makes an useless copy... */ r = PyObject_CallFunction(self->read, "n", consumed); if (r == NULL) return -1; Py_DECREF(r); self->prefetched_idx = self->next_read_idx; } return 0; } static const Py_ssize_t READ_WHOLE_LINE = -1; /* If reading from a file, we need to only pull the bytes we need, since there may be multiple pickle objects arranged contiguously in the same input buffer. If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n` bytes from the input stream/buffer. Update the unpickler's input buffer with the newly-read data. Returns -1 on failure; on success, returns the number of bytes read from the file. On success, self->input_len will be 0; this is intentional so that when unpickling from a file, the "we've run out of data" code paths will trigger, causing the Unpickler to go back to the file for more data. Use the returned size to tell you how much data you can process. */ static Py_ssize_t _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n) { PyObject *data; Py_ssize_t read_size, prefetched_size = 0; assert(self->read != NULL); if (_Unpickler_SkipConsumed(self) < 0) return -1; if (n == READ_WHOLE_LINE) data = PyObject_Call(self->readline, empty_tuple, NULL); else { PyObject *len = PyLong_FromSsize_t(n); if (len == NULL) return -1; data = _Unpickler_FastCall(self, self->read, len); } if (data == NULL) return -1; /* Prefetch some data without advancing the file pointer, if possible */ if (self->peek) { PyObject *len, *prefetched; len = PyLong_FromSsize_t(PREFETCH); if (len == NULL) { Py_DECREF(data); return -1; } prefetched = _Unpickler_FastCall(self, self->peek, len); if (prefetched == NULL) { if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) { /* peek() is probably not supported by the given file object */ PyErr_Clear(); Py_CLEAR(self->peek); } else { Py_DECREF(data); return -1; } } else { assert(PyBytes_Check(prefetched)); prefetched_size = PyBytes_GET_SIZE(prefetched); PyBytes_ConcatAndDel(&data, prefetched); if (data == NULL) return -1; } } read_size = _Unpickler_SetStringInput(self, data) - prefetched_size; Py_DECREF(data); self->prefetched_idx = read_size; return read_size; } /* Read `n` bytes from the unpickler's data source, storing the result in `*s`. This should be used for all data reads, rather than accessing the unpickler's input buffer directly. This method deals correctly with reading from input streams, which the input buffer doesn't deal with. Note that when reading from a file-like object, self->next_read_idx won't be updated (it should remain at 0 for the entire unpickling process). You should use this function's return value to know how many bytes you can consume. Returns -1 (with an exception set) on failure. On success, return the number of chars read. */ static Py_ssize_t _Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n) { Py_ssize_t num_read; if (self->next_read_idx + n <= self->input_len) { *s = self->input_buffer + self->next_read_idx; self->next_read_idx += n; return n; } if (!self->read) { PyErr_Format(PyExc_EOFError, "Ran out of input"); return -1; } num_read = _Unpickler_ReadFromFile(self, n); if (num_read < 0) return -1; if (num_read < n) { PyErr_Format(PyExc_EOFError, "Ran out of input"); return -1; } *s = self->input_buffer; self->next_read_idx = n; return n; } static Py_ssize_t _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len, char **result) { char *input_line = PyMem_Realloc(self->input_line, len + 1); if (input_line == NULL) return -1; memcpy(input_line, line, len); input_line[len] = '\0'; self->input_line = input_line; *result = self->input_line; return len; } /* Read a line from the input stream/buffer. If we run off the end of the input before hitting \n, return the data we found. Returns the number of chars read, or -1 on failure. */ static Py_ssize_t _Unpickler_Readline(UnpicklerObject *self, char **result) { Py_ssize_t i, num_read; for (i = self->next_read_idx; i < self->input_len; i++) { if (self->input_buffer[i] == '\n') { char *line_start = self->input_buffer + self->next_read_idx; num_read = i - self->next_read_idx + 1; self->next_read_idx = i + 1; return _Unpickler_CopyLine(self, line_start, num_read, result); } } if (self->read) { num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE); if (num_read < 0) return -1; self->next_read_idx = num_read; return _Unpickler_CopyLine(self, self->input_buffer, num_read, result); } /* If we get here, we've run off the end of the input string. Return the remaining string and let the caller figure it out. */ *result = self->input_buffer + self->next_read_idx; num_read = i - self->next_read_idx; self->next_read_idx = i; return num_read; } /* Returns -1 (with an exception set) on failure, 0 on success. The memo array will be modified in place. */ static int _Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size) { Py_ssize_t i; PyObject **memo; assert(new_size > self->memo_size); memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *)); if (memo == NULL) { PyErr_NoMemory(); return -1; } self->memo = memo; for (i = self->memo_size; i < new_size; i++) self->memo[i] = NULL; self->memo_size = new_size; return 0; } /* Returns NULL if idx is out of bounds. */ static PyObject * _Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx) { if (idx < 0 || idx >= self->memo_size) return NULL; return self->memo[idx]; } /* Returns -1 (with an exception set) on failure, 0 on success. This takes its own reference to `value`. */ static int _Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value) { PyObject *old_item; if (idx >= self->memo_size) { if (_Unpickler_ResizeMemoList(self, idx * 2) < 0) return -1; assert(idx < self->memo_size); } Py_INCREF(value); old_item = self->memo[idx]; self->memo[idx] = value; Py_XDECREF(old_item); return 0; } static PyObject ** _Unpickler_NewMemo(Py_ssize_t new_size) { PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *)); if (memo == NULL) return NULL; memset(memo, 0, new_size * sizeof(PyObject *)); return memo; } /* Free the unpickler's memo, taking care to decref any items left in it. */ static void _Unpickler_MemoCleanup(UnpicklerObject *self) { Py_ssize_t i; PyObject **memo = self->memo; if (self->memo == NULL) return; self->memo = NULL; i = self->memo_size; while (--i >= 0) { Py_XDECREF(memo[i]); } PyMem_FREE(memo); } static UnpicklerObject * _Unpickler_New(void) { UnpicklerObject *self; self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type); if (self == NULL) return NULL; self->stack = (Pdata *)Pdata_New(); if (self->stack == NULL) { Py_DECREF(self); return NULL; } memset(&self->buffer, 0, sizeof(Py_buffer)); self->memo_size = 32; self->memo = _Unpickler_NewMemo(self->memo_size); if (self->memo == NULL) { Py_DECREF(self); return NULL; } self->arg = NULL; self->pers_func = NULL; self->input_buffer = NULL; self->input_line = NULL; self->input_len = 0; self->next_read_idx = 0; self->prefetched_idx = 0; self->read = NULL; self->readline = NULL; self->peek = NULL; self->encoding = NULL; self->errors = NULL; self->marks = NULL; self->num_marks = 0; self->marks_size = 0; self->proto = 0; self->fix_imports = 0; return self; } /* Returns -1 (with an exception set) on failure, 0 on success. This may be called once on a freshly created Pickler. */ static int _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file) { _Py_IDENTIFIER(peek); _Py_IDENTIFIER(read); _Py_IDENTIFIER(readline); self->peek = _PyObject_GetAttrId(file, &PyId_peek); if (self->peek == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); else return -1; } self->read = _PyObject_GetAttrId(file, &PyId_read); self->readline = _PyObject_GetAttrId(file, &PyId_readline); if (self->readline == NULL || self->read == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_SetString(PyExc_TypeError, "file must have 'read' and 'readline' attributes"); Py_CLEAR(self->read); Py_CLEAR(self->readline); Py_CLEAR(self->peek); return -1; } return 0; } /* Returns -1 (with an exception set) on failure, 0 on success. This may be called once on a freshly created Pickler. */ static int _Unpickler_SetInputEncoding(UnpicklerObject *self, const char *encoding, const char *errors) { if (encoding == NULL) encoding = "ASCII"; if (errors == NULL) errors = "strict"; self->encoding = strdup(encoding); self->errors = strdup(errors); if (self->encoding == NULL || self->errors == NULL) { PyErr_NoMemory(); return -1; } return 0; } /* Generate a GET opcode for an object stored in the memo. */ static int memo_get(PicklerObject *self, PyObject *key) { Py_ssize_t *value; char pdata[30]; Py_ssize_t len; value = PyMemoTable_Get(self->memo, key); if (value == NULL) { PyErr_SetObject(PyExc_KeyError, key); return -1; } if (!self->bin) { pdata[0] = GET; PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%" PY_FORMAT_SIZE_T "d\n", *value); len = strlen(pdata); } else { if (*value < 256) { pdata[0] = BINGET; pdata[1] = (unsigned char)(*value & 0xff); len = 2; } else if (*value <= 0xffffffffL) { pdata[0] = LONG_BINGET; pdata[1] = (unsigned char)(*value & 0xff); pdata[2] = (unsigned char)((*value >> 8) & 0xff); pdata[3] = (unsigned char)((*value >> 16) & 0xff); pdata[4] = (unsigned char)((*value >> 24) & 0xff); len = 5; } else { /* unlikely */ PyErr_SetString(PicklingError, "memo id too large for LONG_BINGET"); return -1; } } if (_Pickler_Write(self, pdata, len) < 0) return -1; return 0; } /* Store an object in the memo, assign it a new unique ID based on the number of objects currently stored in the memo and generate a PUT opcode. */ static int memo_put(PicklerObject *self, PyObject *obj) { Py_ssize_t x; char pdata[30]; Py_ssize_t len; int status = 0; if (self->fast) return 0; x = PyMemoTable_Size(self->memo); if (PyMemoTable_Set(self->memo, obj, x) < 0) goto error; if (!self->bin) { pdata[0] = PUT; PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%" PY_FORMAT_SIZE_T "d\n", x); len = strlen(pdata); } else { if (x < 256) { pdata[0] = BINPUT; pdata[1] = (unsigned char)x; len = 2; } else if (x <= 0xffffffffL) { pdata[0] = LONG_BINPUT; pdata[1] = (unsigned char)(x & 0xff); pdata[2] = (unsigned char)((x >> 8) & 0xff); pdata[3] = (unsigned char)((x >> 16) & 0xff); pdata[4] = (unsigned char)((x >> 24) & 0xff); len = 5; } else { /* unlikely */ PyErr_SetString(PicklingError, "memo id too large for LONG_BINPUT"); return -1; } } if (_Pickler_Write(self, pdata, len) < 0) goto error; if (0) { error: status = -1; } return status; } static PyObject * whichmodule(PyObject *global, PyObject *global_name) { Py_ssize_t i, j; static PyObject *module_str = NULL; static PyObject *main_str = NULL; PyObject *module_name; PyObject *modules_dict; PyObject *module; PyObject *obj; if (module_str == NULL) { module_str = PyUnicode_InternFromString("__module__"); if (module_str == NULL) return NULL; main_str = PyUnicode_InternFromString("__main__"); if (main_str == NULL) return NULL; } module_name = PyObject_GetAttr(global, module_str); /* In some rare cases (e.g., bound methods of extension types), __module__ can be None. If it is so, then search sys.modules for the module of global. */ if (module_name == Py_None) { Py_DECREF(module_name); goto search; } if (module_name) { return module_name; } if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); else return NULL; search: modules_dict = PySys_GetObject("modules"); if (modules_dict == NULL) return NULL; i = 0; module_name = NULL; while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) { if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1) continue; obj = PyObject_GetAttr(module, global_name); if (obj == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); else return NULL; continue; } if (obj != global) { Py_DECREF(obj); continue; } Py_DECREF(obj); break; } /* If no module is found, use __main__. */ if (!j) { module_name = main_str; } Py_INCREF(module_name); return module_name; } /* fast_save_enter() and fast_save_leave() are guards against recursive objects when Pickler is used with the "fast mode" (i.e., with object memoization disabled). If the nesting of a list or dict object exceed FAST_NESTING_LIMIT, these guards will start keeping an internal reference to the seen list or dict objects and check whether these objects are recursive. These are not strictly necessary, since save() has a hard-coded recursion limit, but they give a nicer error message than the typical RuntimeError. */ static int fast_save_enter(PicklerObject *self, PyObject *obj) { /* if fast_nesting < 0, we're doing an error exit. */ if (++self->fast_nesting >= FAST_NESTING_LIMIT) { PyObject *key = NULL; if (self->fast_memo == NULL) { self->fast_memo = PyDict_New(); if (self->fast_memo == NULL) { self->fast_nesting = -1; return 0; } } key = PyLong_FromVoidPtr(obj); if (key == NULL) return 0; if (PyDict_GetItem(self->fast_memo, key)) { Py_DECREF(key); PyErr_Format(PyExc_ValueError, "fast mode: can't pickle cyclic objects " "including object type %.200s at %p", obj->ob_type->tp_name, obj); self->fast_nesting = -1; return 0; } if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) { Py_DECREF(key); self->fast_nesting = -1; return 0; } Py_DECREF(key); } return 1; } static int fast_save_leave(PicklerObject *self, PyObject *obj) { if (self->fast_nesting-- >= FAST_NESTING_LIMIT) { PyObject *key = PyLong_FromVoidPtr(obj); if (key == NULL) return 0; if (PyDict_DelItem(self->fast_memo, key) < 0) { Py_DECREF(key); return 0; } Py_DECREF(key); } return 1; } static int save_none(PicklerObject *self, PyObject *obj) { const char none_op = NONE; if (_Pickler_Write(self, &none_op, 1) < 0) return -1; return 0; } static int save_bool(PicklerObject *self, PyObject *obj) { static const char *buf[2] = { FALSE, TRUE }; const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1}; int p = (obj == Py_True); if (self->proto >= 2) { const char bool_op = p ? NEWTRUE : NEWFALSE; if (_Pickler_Write(self, &bool_op, 1) < 0) return -1; } else if (_Pickler_Write(self, buf[p], len[p]) < 0) return -1; return 0; } static int save_int(PicklerObject *self, long x) { char pdata[32]; Py_ssize_t len = 0; if (!self->bin #if SIZEOF_LONG > 4 || x > 0x7fffffffL || x < -0x80000000L #endif ) { /* Text-mode pickle, or long too big to fit in the 4-byte * signed BININT format: store as a string. */ pdata[0] = LONG; /* use LONG for consistency with pickle.py */ PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x); if (_Pickler_Write(self, pdata, strlen(pdata)) < 0) return -1; } else { /* Binary pickle and x fits in a signed 4-byte int. */ pdata[1] = (unsigned char)(x & 0xff); pdata[2] = (unsigned char)((x >> 8) & 0xff); pdata[3] = (unsigned char)((x >> 16) & 0xff); pdata[4] = (unsigned char)((x >> 24) & 0xff); if ((pdata[4] == 0) && (pdata[3] == 0)) { if (pdata[2] == 0) { pdata[0] = BININT1; len = 2; } else { pdata[0] = BININT2; len = 3; } } else { pdata[0] = BININT; len = 5; } if (_Pickler_Write(self, pdata, len) < 0) return -1; } return 0; } static int save_long(PicklerObject *self, PyObject *obj) { PyObject *repr = NULL; Py_ssize_t size; long val = PyLong_AsLong(obj); int status = 0; const char long_op = LONG; if (val == -1 && PyErr_Occurred()) { /* out of range for int pickling */ PyErr_Clear(); } else #if SIZEOF_LONG > 4 if (val <= 0x7fffffffL && val >= -0x80000000L) #endif return save_int(self, val); if (self->proto >= 2) { /* Linear-time pickling. */ size_t nbits; size_t nbytes; unsigned char *pdata; char header[5]; int i; int sign = _PyLong_Sign(obj); if (sign == 0) { header[0] = LONG1; header[1] = 0; /* It's 0 -- an empty bytestring. */ if (_Pickler_Write(self, header, 2) < 0) goto error; return 0; } nbits = _PyLong_NumBits(obj); if (nbits == (size_t)-1 && PyErr_Occurred()) goto error; /* How many bytes do we need? There are nbits >> 3 full * bytes of data, and nbits & 7 leftover bits. If there * are any leftover bits, then we clearly need another * byte. Wnat's not so obvious is that we *probably* * need another byte even if there aren't any leftovers: * the most-significant bit of the most-significant byte * acts like a sign bit, and it's usually got a sense * opposite of the one we need. The exception is longs * of the form -(2**(8*j-1)) for j > 0. Such a long is * its own 256's-complement, so has the right sign bit * even without the extra byte. That's a pain to check * for in advance, though, so we always grab an extra * byte at the start, and cut it back later if possible. */ nbytes = (nbits >> 3) + 1; if (nbytes > 0x7fffffffL) { PyErr_SetString(PyExc_OverflowError, "long too large to pickle"); goto error; } repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes); if (repr == NULL) goto error; pdata = (unsigned char *)PyBytes_AS_STRING(repr); i = _PyLong_AsByteArray((PyLongObject *)obj, pdata, nbytes, 1 /* little endian */ , 1 /* signed */ ); if (i < 0) goto error; /* If the long is negative, this may be a byte more than * needed. This is so iff the MSB is all redundant sign * bits. */ if (sign < 0 && nbytes > 1 && pdata[nbytes - 1] == 0xff && (pdata[nbytes - 2] & 0x80) != 0) { nbytes--; } if (nbytes < 256) { header[0] = LONG1; header[1] = (unsigned char)nbytes; size = 2; } else { header[0] = LONG4; size = (Py_ssize_t) nbytes; for (i = 1; i < 5; i++) { header[i] = (unsigned char)(size & 0xff); size >>= 8; } size = 5; } if (_Pickler_Write(self, header, size) < 0 || _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0) goto error; } else { const char *string; /* proto < 2: write the repr and newline. This is quadratic-time (in the number of digits), in both directions. We add a trailing 'L' to the repr, for compatibility with Python 2.x. */ repr = PyObject_Repr(obj); if (repr == NULL) goto error; string = PyUnicode_AsUTF8AndSize(repr, &size); if (string == NULL) goto error; if (_Pickler_Write(self, &long_op, 1) < 0 || _Pickler_Write(self, string, size) < 0 || _Pickler_Write(self, "L\n", 2) < 0) goto error; } if (0) { error: status = -1; } Py_XDECREF(repr); return status; } static int save_float(PicklerObject *self, PyObject *obj) { double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj); if (self->bin) { char pdata[9]; pdata[0] = BINFLOAT; if (PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0) return -1; if (_Pickler_Write(self, pdata, 9) < 0) return -1; } else { int result = -1; char *buf = NULL; char op = FLOAT; if (_Pickler_Write(self, &op, 1) < 0) goto done; buf = PyOS_double_to_string(x, 'g', 17, 0, NULL); if (!buf) { PyErr_NoMemory(); goto done; } if (_Pickler_Write(self, buf, strlen(buf)) < 0) goto done; if (_Pickler_Write(self, "\n", 1) < 0) goto done; result = 0; done: PyMem_Free(buf); return result; } return 0; } /* Essentially PyObject_Repr(obj) for bytes, but it returns bytes, doesn't add the b prefix nor the quotes. */ static PyObject * raw_bytes_escape(PyObject *obj) { PyObject *repr, *result; Py_ssize_t i, size; char *data, *p; size = PyBytes_GET_SIZE(obj); data = PyBytes_AS_STRING(obj); if (size > PY_SSIZE_T_MAX / 4) return PyErr_NoMemory(); repr = PyByteArray_FromStringAndSize(NULL, size * 4); if (repr == NULL) return NULL; if (size == 0) goto done; p = PyByteArray_AS_STRING(repr); for (i=0; i < size; i++) { char ch = data[i]; /* Map control characters, non-ASCII characters, apostrophe and * backslash to '\xXX' */ if (ch < 0x20 || ch >= 0x80 || ch == '\'' || ch == '\\') { *p++ = '\\'; *p++ = 'x'; *p++ = Py_hexdigits[(ch >> 4) & 0xf]; *p++ = Py_hexdigits[ch & 0xf]; } /* Copy everything else as-is */ else *p++ = ch; } size = p - PyByteArray_AS_STRING(repr); done: result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size); Py_DECREF(repr); return result; } static int save_bytes(PicklerObject *self, PyObject *obj) { if (self->proto < 3) { /* Older pickle protocols do not have an opcode for pickling bytes objects. Therefore, we need to fake the copy protocol (i.e., the __reduce__ method) to permit bytes object unpickling. Here we use a hack to be compatible with Python 2. Since in Python 2 'bytes' is just an alias for 'str' (which has different parameters than the actual bytes object), we use codecs.encode to create the appropriate 'str' object when unpickled using Python 2 *and* the appropriate 'bytes' object when unpickled using Python 3. Again this is a hack and we don't need to do this with newer protocols. */ static PyObject *codecs_encode = NULL; PyObject *reduce_value = NULL; int status; if (codecs_encode == NULL) { PyObject *codecs_module = PyImport_ImportModule("codecs"); if (codecs_module == NULL) { return -1; } codecs_encode = PyObject_GetAttrString(codecs_module, "encode"); Py_DECREF(codecs_module); if (codecs_encode == NULL) { return -1; } } if (PyBytes_GET_SIZE(obj) == 0) { reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type); } else { static PyObject *latin1 = NULL; PyObject *unicode_str = PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), "strict"); if (unicode_str == NULL) return -1; if (latin1 == NULL) { latin1 = PyUnicode_InternFromString("latin1"); if (latin1 == NULL) return -1; } reduce_value = Py_BuildValue("(O(OO))", codecs_encode, unicode_str, latin1); Py_DECREF(unicode_str); } if (reduce_value == NULL) return -1; /* save_reduce() will memoize the object automatically. */ status = save_reduce(self, reduce_value, obj); Py_DECREF(reduce_value); return status; } else if (!self->bin) { const char string_op = STRING; PyObject *encoded = NULL; Py_ssize_t size; encoded = raw_bytes_escape(obj); if (encoded == NULL) goto error; if (_Pickler_Write(self, &string_op, 1) < 0) goto error; if (_Pickler_Write(self, "'", 1) < 0) goto error; size = PyBytes_GET_SIZE(encoded); if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) goto error; if (_Pickler_Write(self, "'\n", 2) < 0) goto error; Py_DECREF(encoded); return 0; error: Py_XDECREF(encoded); return -1; } else { Py_ssize_t size; char header[5]; Py_ssize_t len; size = PyBytes_GET_SIZE(obj); if (size < 0) return -1; if (size < 256) { header[0] = SHORT_BINBYTES; header[1] = (unsigned char)size; len = 2; } else if (size <= 0xffffffffL) { header[0] = BINBYTES; header[1] = (unsigned char)(size & 0xff); header[2] = (unsigned char)((size >> 8) & 0xff); header[3] = (unsigned char)((size >> 16) & 0xff); header[4] = (unsigned char)((size >> 24) & 0xff); len = 5; } else { PyErr_SetString(PyExc_OverflowError, "cannot serialize a bytes object larger than 4 GiB"); return -1; /* string too large */ } if (_Pickler_Write(self, header, len) < 0) return -1; if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0) return -1; if (memo_put(self, obj) < 0) return -1; return 0; } } /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates backslash and newline characters to \uXXXX escapes. */ static PyObject * raw_unicode_escape(PyObject *obj) { PyObject *repr, *result; char *p; Py_ssize_t i, size, expandsize; void *data; unsigned int kind; if (PyUnicode_READY(obj)) return NULL; size = PyUnicode_GET_LENGTH(obj); data = PyUnicode_DATA(obj); kind = PyUnicode_KIND(obj); if (kind == PyUnicode_4BYTE_KIND) expandsize = 10; else expandsize = 6; if (size > PY_SSIZE_T_MAX / expandsize) return PyErr_NoMemory(); repr = PyByteArray_FromStringAndSize(NULL, expandsize * size); if (repr == NULL) return NULL; if (size == 0) goto done; p = PyByteArray_AS_STRING(repr); for (i=0; i < size; i++) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); /* Map 32-bit characters to '\Uxxxxxxxx' */ if (ch >= 0x10000) { *p++ = '\\'; *p++ = 'U'; *p++ = Py_hexdigits[(ch >> 28) & 0xf]; *p++ = Py_hexdigits[(ch >> 24) & 0xf]; *p++ = Py_hexdigits[(ch >> 20) & 0xf]; *p++ = Py_hexdigits[(ch >> 16) & 0xf]; *p++ = Py_hexdigits[(ch >> 12) & 0xf]; *p++ = Py_hexdigits[(ch >> 8) & 0xf]; *p++ = Py_hexdigits[(ch >> 4) & 0xf]; *p++ = Py_hexdigits[ch & 15]; } /* Map 16-bit characters to '\uxxxx' */ else if (ch >= 256 || ch == '\\' || ch == '\n') { *p++ = '\\'; *p++ = 'u'; *p++ = Py_hexdigits[(ch >> 12) & 0xf]; *p++ = Py_hexdigits[(ch >> 8) & 0xf]; *p++ = Py_hexdigits[(ch >> 4) & 0xf]; *p++ = Py_hexdigits[ch & 15]; } /* Copy everything else as-is */ else *p++ = (char) ch; } size = p - PyByteArray_AS_STRING(repr); done: result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size); Py_DECREF(repr); return result; } static int save_unicode(PicklerObject *self, PyObject *obj) { Py_ssize_t size; PyObject *encoded = NULL; if (self->bin) { char pdata[5]; encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass"); if (encoded == NULL) goto error; size = PyBytes_GET_SIZE(encoded); if (size > 0xffffffffL) { PyErr_SetString(PyExc_OverflowError, "cannot serialize a string larger than 4 GiB"); goto error; /* string too large */ } pdata[0] = BINUNICODE; pdata[1] = (unsigned char)(size & 0xff); pdata[2] = (unsigned char)((size >> 8) & 0xff); pdata[3] = (unsigned char)((size >> 16) & 0xff); pdata[4] = (unsigned char)((size >> 24) & 0xff); if (_Pickler_Write(self, pdata, 5) < 0) goto error; if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) goto error; } else { const char unicode_op = UNICODE; encoded = raw_unicode_escape(obj); if (encoded == NULL) goto error; if (_Pickler_Write(self, &unicode_op, 1) < 0) goto error; size = PyBytes_GET_SIZE(encoded); if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) goto error; if (_Pickler_Write(self, "\n", 1) < 0) goto error; } if (memo_put(self, obj) < 0) goto error; Py_DECREF(encoded); return 0; error: Py_XDECREF(encoded); return -1; } /* A helper for save_tuple. Push the len elements in tuple t on the stack. */ static int store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len) { Py_ssize_t i; assert(PyTuple_Size(t) == len); for (i = 0; i < len; i++) { PyObject *element = PyTuple_GET_ITEM(t, i); if (element == NULL) return -1; if (save(self, element, 0) < 0) return -1; } return 0; } /* Tuples are ubiquitous in the pickle protocols, so many techniques are * used across protocols to minimize the space needed to pickle them. * Tuples are also the only builtin immutable type that can be recursive * (a tuple can be reached from itself), and that requires some subtle * magic so that it works in all cases. IOW, this is a long routine. */ static int save_tuple(PicklerObject *self, PyObject *obj) { Py_ssize_t len, i; const char mark_op = MARK; const char tuple_op = TUPLE; const char pop_op = POP; const char pop_mark_op = POP_MARK; const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3}; if ((len = PyTuple_Size(obj)) < 0) return -1; if (len == 0) { char pdata[2]; if (self->proto) { pdata[0] = EMPTY_TUPLE; len = 1; } else { pdata[0] = MARK; pdata[1] = TUPLE; len = 2; } if (_Pickler_Write(self, pdata, len) < 0) return -1; return 0; } /* The tuple isn't in the memo now. If it shows up there after * saving the tuple elements, the tuple must be recursive, in * which case we'll pop everything we put on the stack, and fetch * its value from the memo. */ if (len <= 3 && self->proto >= 2) { /* Use TUPLE{1,2,3} opcodes. */ if (store_tuple_elements(self, obj, len) < 0) return -1; if (PyMemoTable_Get(self->memo, obj)) { /* pop the len elements */ for (i = 0; i < len; i++) if (_Pickler_Write(self, &pop_op, 1) < 0) return -1; /* fetch from memo */ if (memo_get(self, obj) < 0) return -1; return 0; } else { /* Not recursive. */ if (_Pickler_Write(self, len2opcode + len, 1) < 0) return -1; } goto memoize; } /* proto < 2 and len > 0, or proto >= 2 and len > 3. * Generate MARK e1 e2 ... TUPLE */ if (_Pickler_Write(self, &mark_op, 1) < 0) return -1; if (store_tuple_elements(self, obj, len) < 0) return -1; if (PyMemoTable_Get(self->memo, obj)) { /* pop the stack stuff we pushed */ if (self->bin) { if (_Pickler_Write(self, &pop_mark_op, 1) < 0) return -1; } else { /* Note that we pop one more than len, to remove * the MARK too. */ for (i = 0; i <= len; i++) if (_Pickler_Write(self, &pop_op, 1) < 0) return -1; } /* fetch from memo */ if (memo_get(self, obj) < 0) return -1; return 0; } else { /* Not recursive. */ if (_Pickler_Write(self, &tuple_op, 1) < 0) return -1; } memoize: if (memo_put(self, obj) < 0) return -1; return 0; } /* iter is an iterator giving items, and we batch up chunks of * MARK item item ... item APPENDS * opcode sequences. Calling code should have arranged to first create an * empty list, or list-like object, for the APPENDS to operate on. * Returns 0 on success, <0 on error. */ static int batch_list(PicklerObject *self, PyObject *iter) { PyObject *obj = NULL; PyObject *firstitem = NULL; int i, n; const char mark_op = MARK; const char append_op = APPEND; const char appends_op = APPENDS; assert(iter != NULL); /* XXX: I think this function could be made faster by avoiding the iterator interface and fetching objects directly from list using PyList_GET_ITEM. */ if (self->proto == 0) { /* APPENDS isn't available; do one at a time. */ for (;;) { obj = PyIter_Next(iter); if (obj == NULL) { if (PyErr_Occurred()) return -1; break; } i = save(self, obj, 0); Py_DECREF(obj); if (i < 0) return -1; if (_Pickler_Write(self, &append_op, 1) < 0) return -1; } return 0; } /* proto > 0: write in batches of BATCHSIZE. */ do { /* Get first item */ firstitem = PyIter_Next(iter); if (firstitem == NULL) { if (PyErr_Occurred()) goto error; /* nothing more to add */ break; } /* Try to get a second item */ obj = PyIter_Next(iter); if (obj == NULL) { if (PyErr_Occurred()) goto error; /* Only one item to write */ if (save(self, firstitem, 0) < 0) goto error; if (_Pickler_Write(self, &append_op, 1) < 0) goto error; Py_CLEAR(firstitem); break; } /* More than one item to write */ /* Pump out MARK, items, APPENDS. */ if (_Pickler_Write(self, &mark_op, 1) < 0) goto error; if (save(self, firstitem, 0) < 0) goto error; Py_CLEAR(firstitem); n = 1; /* Fetch and save up to BATCHSIZE items */ while (obj) { if (save(self, obj, 0) < 0) goto error; Py_CLEAR(obj); n += 1; if (n == BATCHSIZE) break; obj = PyIter_Next(iter); if (obj == NULL) { if (PyErr_Occurred()) goto error; break; } } if (_Pickler_Write(self, &appends_op, 1) < 0) goto error; } while (n == BATCHSIZE); return 0; error: Py_XDECREF(firstitem); Py_XDECREF(obj); return -1; } /* This is a variant of batch_list() above, specialized for lists (with no * support for list subclasses). Like batch_list(), we batch up chunks of * MARK item item ... item APPENDS * opcode sequences. Calling code should have arranged to first create an * empty list, or list-like object, for the APPENDS to operate on. * Returns 0 on success, -1 on error. * * This version is considerably faster than batch_list(), if less general. * * Note that this only works for protocols > 0. */ static int batch_list_exact(PicklerObject *self, PyObject *obj) { PyObject *item = NULL; Py_ssize_t this_batch, total; const char append_op = APPEND; const char appends_op = APPENDS; const char mark_op = MARK; assert(obj != NULL); assert(self->proto > 0); assert(PyList_CheckExact(obj)); if (PyList_GET_SIZE(obj) == 1) { item = PyList_GET_ITEM(obj, 0); if (save(self, item, 0) < 0) return -1; if (_Pickler_Write(self, &append_op, 1) < 0) return -1; return 0; } /* Write in batches of BATCHSIZE. */ total = 0; do { this_batch = 0; if (_Pickler_Write(self, &mark_op, 1) < 0) return -1; while (total < PyList_GET_SIZE(obj)) { item = PyList_GET_ITEM(obj, total); if (save(self, item, 0) < 0) return -1; total++; if (++this_batch == BATCHSIZE) break; } if (_Pickler_Write(self, &appends_op, 1) < 0) return -1; } while (total < PyList_GET_SIZE(obj)); return 0; } static int save_list(PicklerObject *self, PyObject *obj) { char header[3]; Py_ssize_t len; int status = 0; if (self->fast && !fast_save_enter(self, obj)) goto error; /* Create an empty list. */ if (self->bin) { header[0] = EMPTY_LIST; len = 1; } else { header[0] = MARK; header[1] = LIST; len = 2; } if (_Pickler_Write(self, header, len) < 0) goto error; /* Get list length, and bow out early if empty. */ if ((len = PyList_Size(obj)) < 0) goto error; if (memo_put(self, obj) < 0) goto error; if (len != 0) { /* Materialize the list elements. */ if (PyList_CheckExact(obj) && self->proto > 0) { if (Py_EnterRecursiveCall(" while pickling an object")) goto error; status = batch_list_exact(self, obj); Py_LeaveRecursiveCall(); } else { PyObject *iter = PyObject_GetIter(obj); if (iter == NULL) goto error; if (Py_EnterRecursiveCall(" while pickling an object")) { Py_DECREF(iter); goto error; } status = batch_list(self, iter); Py_LeaveRecursiveCall(); Py_DECREF(iter); } } if (0) { error: status = -1; } if (self->fast && !fast_save_leave(self, obj)) status = -1; return status; } /* iter is an iterator giving (key, value) pairs, and we batch up chunks of * MARK key value ... key value SETITEMS * opcode sequences. Calling code should have arranged to first create an * empty dict, or dict-like object, for the SETITEMS to operate on. * Returns 0 on success, <0 on error. * * This is very much like batch_list(). The difference between saving * elements directly, and picking apart two-tuples, is so long-winded at * the C level, though, that attempts to combine these routines were too * ugly to bear. */ static int batch_dict(PicklerObject *self, PyObject *iter) { PyObject *obj = NULL; PyObject *firstitem = NULL; int i, n; const char mark_op = MARK; const char setitem_op = SETITEM; const char setitems_op = SETITEMS; assert(iter != NULL); if (self->proto == 0) { /* SETITEMS isn't available; do one at a time. */ for (;;) { obj = PyIter_Next(iter); if (obj == NULL) { if (PyErr_Occurred()) return -1; break; } if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) { PyErr_SetString(PyExc_TypeError, "dict items " "iterator must return 2-tuples"); return -1; } i = save(self, PyTuple_GET_ITEM(obj, 0), 0); if (i >= 0) i = save(self, PyTuple_GET_ITEM(obj, 1), 0); Py_DECREF(obj); if (i < 0) return -1; if (_Pickler_Write(self, &setitem_op, 1) < 0) return -1; } return 0; } /* proto > 0: write in batches of BATCHSIZE. */ do { /* Get first item */ firstitem = PyIter_Next(iter); if (firstitem == NULL) { if (PyErr_Occurred()) goto error; /* nothing more to add */ break; } if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) { PyErr_SetString(PyExc_TypeError, "dict items " "iterator must return 2-tuples"); goto error; } /* Try to get a second item */ obj = PyIter_Next(iter); if (obj == NULL) { if (PyErr_Occurred()) goto error; /* Only one item to write */ if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0) goto error; if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0) goto error; if (_Pickler_Write(self, &setitem_op, 1) < 0) goto error; Py_CLEAR(firstitem); break; } /* More than one item to write */ /* Pump out MARK, items, SETITEMS. */ if (_Pickler_Write(self, &mark_op, 1) < 0) goto error; if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0) goto error; if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0) goto error; Py_CLEAR(firstitem); n = 1; /* Fetch and save up to BATCHSIZE items */ while (obj) { if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) { PyErr_SetString(PyExc_TypeError, "dict items " "iterator must return 2-tuples"); goto error; } if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 || save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0) goto error; Py_CLEAR(obj); n += 1; if (n == BATCHSIZE) break; obj = PyIter_Next(iter); if (obj == NULL) { if (PyErr_Occurred()) goto error; break; } } if (_Pickler_Write(self, &setitems_op, 1) < 0) goto error; } while (n == BATCHSIZE); return 0; error: Py_XDECREF(firstitem); Py_XDECREF(obj); return -1; } /* This is a variant of batch_dict() above that specializes for dicts, with no * support for dict subclasses. Like batch_dict(), we batch up chunks of * MARK key value ... key value SETITEMS * opcode sequences. Calling code should have arranged to first create an * empty dict, or dict-like object, for the SETITEMS to operate on. * Returns 0 on success, -1 on error. * * Note that this currently doesn't work for protocol 0. */ static int batch_dict_exact(PicklerObject *self, PyObject *obj) { PyObject *key = NULL, *value = NULL; int i; Py_ssize_t dict_size, ppos = 0; const char mark_op = MARK; const char setitem_op = SETITEM; const char setitems_op = SETITEMS; assert(obj != NULL); assert(self->proto > 0); dict_size = PyDict_Size(obj); /* Special-case len(d) == 1 to save space. */ if (dict_size == 1) { PyDict_Next(obj, &ppos, &key, &value); if (save(self, key, 0) < 0) return -1; if (save(self, value, 0) < 0) return -1; if (_Pickler_Write(self, &setitem_op, 1) < 0) return -1; return 0; } /* Write in batches of BATCHSIZE. */ do { i = 0; if (_Pickler_Write(self, &mark_op, 1) < 0) return -1; while (PyDict_Next(obj, &ppos, &key, &value)) { if (save(self, key, 0) < 0) return -1; if (save(self, value, 0) < 0) return -1; if (++i == BATCHSIZE) break; } if (_Pickler_Write(self, &setitems_op, 1) < 0) return -1; if (PyDict_Size(obj) != dict_size) { PyErr_Format( PyExc_RuntimeError, "dictionary changed size during iteration"); return -1; } } while (i == BATCHSIZE); return 0; } static int save_dict(PicklerObject *self, PyObject *obj) { PyObject *items, *iter; char header[3]; Py_ssize_t len; int status = 0; if (self->fast && !fast_save_enter(self, obj)) goto error; /* Create an empty dict. */ if (self->bin) { header[0] = EMPTY_DICT; len = 1; } else { header[0] = MARK; header[1] = DICT; len = 2; } if (_Pickler_Write(self, header, len) < 0) goto error; /* Get dict size, and bow out early if empty. */ if ((len = PyDict_Size(obj)) < 0) goto error; if (memo_put(self, obj) < 0) goto error; if (len != 0) { /* Save the dict items. */ if (PyDict_CheckExact(obj) && self->proto > 0) { /* We can take certain shortcuts if we know this is a dict and not a dict subclass. */ if (Py_EnterRecursiveCall(" while pickling an object")) goto error; status = batch_dict_exact(self, obj); Py_LeaveRecursiveCall(); } else { _Py_IDENTIFIER(items); items = _PyObject_CallMethodId(obj, &PyId_items, "()"); if (items == NULL) goto error; iter = PyObject_GetIter(items); Py_DECREF(items); if (iter == NULL) goto error; if (Py_EnterRecursiveCall(" while pickling an object")) { Py_DECREF(iter); goto error; } status = batch_dict(self, iter); Py_LeaveRecursiveCall(); Py_DECREF(iter); } } if (0) { error: status = -1; } if (self->fast && !fast_save_leave(self, obj)) status = -1; return status; } static int save_global(PicklerObject *self, PyObject *obj, PyObject *name) { static PyObject *name_str = NULL; PyObject *global_name = NULL; PyObject *module_name = NULL; PyObject *module = NULL; PyObject *cls; int status = 0; const char global_op = GLOBAL; if (name_str == NULL) { name_str = PyUnicode_InternFromString("__name__"); if (name_str == NULL) goto error; } if (name) { global_name = name; Py_INCREF(global_name); } else { global_name = PyObject_GetAttr(obj, name_str); if (global_name == NULL) goto error; } module_name = whichmodule(obj, global_name); if (module_name == NULL) goto error; /* XXX: Change to use the import C API directly with level=0 to disallow relative imports. XXX: PyImport_ImportModuleLevel could be used. However, this bypasses builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore custom import functions (IMHO, this would be a nice security feature). The import C API would need to be extended to support the extra parameters of __import__ to fix that. */ module = PyImport_Import(module_name); if (module == NULL) { PyErr_Format(PicklingError, "Can't pickle %R: import of module %R failed", obj, module_name); goto error; } cls = PyObject_GetAttr(module, global_name); if (cls == NULL) { PyErr_Format(PicklingError, "Can't pickle %R: attribute lookup %S.%S failed", obj, module_name, global_name); goto error; } if (cls != obj) { Py_DECREF(cls); PyErr_Format(PicklingError, "Can't pickle %R: it's not the same object as %S.%S", obj, module_name, global_name); goto error; } Py_DECREF(cls); if (self->proto >= 2) { /* See whether this is in the extension registry, and if * so generate an EXT opcode. */ PyObject *code_obj; /* extension code as Python object */ long code; /* extension code as C value */ char pdata[5]; Py_ssize_t n; PyTuple_SET_ITEM(two_tuple, 0, module_name); PyTuple_SET_ITEM(two_tuple, 1, global_name); code_obj = PyDict_GetItem(extension_registry, two_tuple); /* The object is not registered in the extension registry. This is the most likely code path. */ if (code_obj == NULL) goto gen_global; /* XXX: pickle.py doesn't check neither the type, nor the range of the value returned by the extension_registry. It should for consistency. */ /* Verify code_obj has the right type and value. */ if (!PyLong_Check(code_obj)) { PyErr_Format(PicklingError, "Can't pickle %R: extension code %R isn't an integer", obj, code_obj); goto error; } code = PyLong_AS_LONG(code_obj); if (code <= 0 || code > 0x7fffffffL) { if (!PyErr_Occurred()) PyErr_Format(PicklingError, "Can't pickle %R: extension code %ld is out of range", obj, code); goto error; } /* Generate an EXT opcode. */ if (code <= 0xff) { pdata[0] = EXT1; pdata[1] = (unsigned char)code; n = 2; } else if (code <= 0xffff) { pdata[0] = EXT2; pdata[1] = (unsigned char)(code & 0xff); pdata[2] = (unsigned char)((code >> 8) & 0xff); n = 3; } else { pdata[0] = EXT4; pdata[1] = (unsigned char)(code & 0xff); pdata[2] = (unsigned char)((code >> 8) & 0xff); pdata[3] = (unsigned char)((code >> 16) & 0xff); pdata[4] = (unsigned char)((code >> 24) & 0xff); n = 5; } if (_Pickler_Write(self, pdata, n) < 0) goto error; } else { /* Generate a normal global opcode if we are using a pickle protocol <= 2, or if the object is not registered in the extension registry. */ PyObject *encoded; PyObject *(*unicode_encoder)(PyObject *); gen_global: if (_Pickler_Write(self, &global_op, 1) < 0) goto error; /* Since Python 3.0 now supports non-ASCII identifiers, we encode both the module name and the global name using UTF-8. We do so only when we are using the pickle protocol newer than version 3. This is to ensure compatibility with older Unpickler running on Python 2.x. */ if (self->proto >= 3) { unicode_encoder = PyUnicode_AsUTF8String; } else { unicode_encoder = PyUnicode_AsASCIIString; } /* For protocol < 3 and if the user didn't request against doing so, we convert module names to the old 2.x module names. */ if (self->fix_imports) { PyObject *key; PyObject *item; key = PyTuple_Pack(2, module_name, global_name); if (key == NULL) goto error; item = PyDict_GetItemWithError(name_mapping_3to2, key); Py_DECREF(key); if (item) { if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { PyErr_Format(PyExc_RuntimeError, "_compat_pickle.REVERSE_NAME_MAPPING values " "should be 2-tuples, not %.200s", Py_TYPE(item)->tp_name); goto error; } Py_CLEAR(module_name); Py_CLEAR(global_name); module_name = PyTuple_GET_ITEM(item, 0); global_name = PyTuple_GET_ITEM(item, 1); if (!PyUnicode_Check(module_name) || !PyUnicode_Check(global_name)) { PyErr_Format(PyExc_RuntimeError, "_compat_pickle.REVERSE_NAME_MAPPING values " "should be pairs of str, not (%.200s, %.200s)", Py_TYPE(module_name)->tp_name, Py_TYPE(global_name)->tp_name); goto error; } Py_INCREF(module_name); Py_INCREF(global_name); } else if (PyErr_Occurred()) { goto error; } item = PyDict_GetItemWithError(import_mapping_3to2, module_name); if (item) { if (!PyUnicode_Check(item)) { PyErr_Format(PyExc_RuntimeError, "_compat_pickle.REVERSE_IMPORT_MAPPING values " "should be strings, not %.200s", Py_TYPE(item)->tp_name); goto error; } Py_CLEAR(module_name); module_name = item; Py_INCREF(module_name); } else if (PyErr_Occurred()) { goto error; } } /* Save the name of the module. */ encoded = unicode_encoder(module_name); if (encoded == NULL) { if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) PyErr_Format(PicklingError, "can't pickle module identifier '%S' using " "pickle protocol %i", module_name, self->proto); goto error; } if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), PyBytes_GET_SIZE(encoded)) < 0) { Py_DECREF(encoded); goto error; } Py_DECREF(encoded); if(_Pickler_Write(self, "\n", 1) < 0) goto error; /* Save the name of the module. */ encoded = unicode_encoder(global_name); if (encoded == NULL) { if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) PyErr_Format(PicklingError, "can't pickle global identifier '%S' using " "pickle protocol %i", global_name, self->proto); goto error; } if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), PyBytes_GET_SIZE(encoded)) < 0) { Py_DECREF(encoded); goto error; } Py_DECREF(encoded); if(_Pickler_Write(self, "\n", 1) < 0) goto error; /* Memoize the object. */ if (memo_put(self, obj) < 0) goto error; } if (0) { error: status = -1; } Py_XDECREF(module_name); Py_XDECREF(global_name); Py_XDECREF(module); return status; } static int save_ellipsis(PicklerObject *self, PyObject *obj) { PyObject *str = PyUnicode_FromString("Ellipsis"); int res; if (str == NULL) return -1; res = save_global(self, Py_Ellipsis, str); Py_DECREF(str); return res; } static int save_notimplemented(PicklerObject *self, PyObject *obj) { PyObject *str = PyUnicode_FromString("NotImplemented"); int res; if (str == NULL) return -1; res = save_global(self, Py_NotImplemented, str); Py_DECREF(str); return res; } static int save_pers(PicklerObject *self, PyObject *obj, PyObject *func) { PyObject *pid = NULL; int status = 0; const char persid_op = PERSID; const char binpersid_op = BINPERSID; Py_INCREF(obj); pid = _Pickler_FastCall(self, func, obj); if (pid == NULL) return -1; if (pid != Py_None) { if (self->bin) { if (save(self, pid, 1) < 0 || _Pickler_Write(self, &binpersid_op, 1) < 0) goto error; } else { PyObject *pid_str = NULL; const char *pid_ascii_bytes; Py_ssize_t size; pid_str = PyObject_Str(pid); if (pid_str == NULL) goto error; /* XXX: Should it check whether the persistent id only contains ASCII characters? And what if the pid contains embedded newlines? */ pid_ascii_bytes = PyUnicode_AsUTF8AndSize(pid_str, &size); Py_DECREF(pid_str); if (pid_ascii_bytes == NULL) goto error; if (_Pickler_Write(self, &persid_op, 1) < 0 || _Pickler_Write(self, pid_ascii_bytes, size) < 0 || _Pickler_Write(self, "\n", 1) < 0) goto error; } status = 1; } if (0) { error: status = -1; } Py_XDECREF(pid); return status; } static PyObject * get_class(PyObject *obj) { PyObject *cls; static PyObject *str_class; if (str_class == NULL) { str_class = PyUnicode_InternFromString("__class__"); if (str_class == NULL) return NULL; } cls = PyObject_GetAttr(obj, str_class); if (cls == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) { PyErr_Clear(); cls = (PyObject *) Py_TYPE(obj); Py_INCREF(cls); } } return cls; } /* We're saving obj, and args is the 2-thru-5 tuple returned by the * appropriate __reduce__ method for obj. */ static int save_reduce(PicklerObject *self, PyObject *args, PyObject *obj) { PyObject *callable; PyObject *argtup; PyObject *state = NULL; PyObject *listitems = Py_None; PyObject *dictitems = Py_None; Py_ssize_t size; int use_newobj = self->proto >= 2; const char reduce_op = REDUCE; const char build_op = BUILD; const char newobj_op = NEWOBJ; size = PyTuple_Size(args); if (size < 2 || size > 5) { PyErr_SetString(PicklingError, "tuple returned by " "__reduce__ must contain 2 through 5 elements"); return -1; } if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5, &callable, &argtup, &state, &listitems, &dictitems)) return -1; if (!PyCallable_Check(callable)) { PyErr_SetString(PicklingError, "first item of the tuple " "returned by __reduce__ must be callable"); return -1; } if (!PyTuple_Check(argtup)) { PyErr_SetString(PicklingError, "second item of the tuple " "returned by __reduce__ must be a tuple"); return -1; } if (state == Py_None) state = NULL; if (listitems == Py_None) listitems = NULL; else if (!PyIter_Check(listitems)) { PyErr_Format(PicklingError, "fourth element of the tuple " "returned by __reduce__ must be an iterator, not %s", Py_TYPE(listitems)->tp_name); return -1; } if (dictitems == Py_None) dictitems = NULL; else if (!PyIter_Check(dictitems)) { PyErr_Format(PicklingError, "fifth element of the tuple " "returned by __reduce__ must be an iterator, not %s", Py_TYPE(dictitems)->tp_name); return -1; } /* Protocol 2 special case: if callable's name is __newobj__, use NEWOBJ. */ if (use_newobj) { static PyObject *newobj_str = NULL, *name_str = NULL; PyObject *name; if (newobj_str == NULL) { newobj_str = PyUnicode_InternFromString("__newobj__"); name_str = PyUnicode_InternFromString("__name__"); if (newobj_str == NULL || name_str == NULL) return -1; } name = PyObject_GetAttr(callable, name_str); if (name == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); else return -1; use_newobj = 0; } else { use_newobj = PyUnicode_Check(name) && PyUnicode_Compare(name, newobj_str) == 0; Py_DECREF(name); } } if (use_newobj) { PyObject *cls; PyObject *newargtup; PyObject *obj_class; int p; /* Sanity checks. */ if (Py_SIZE(argtup) < 1) { PyErr_SetString(PicklingError, "__newobj__ arglist is empty"); return -1; } cls = PyTuple_GET_ITEM(argtup, 0); if (!PyType_Check(cls)) { PyErr_SetString(PicklingError, "args[0] from " "__newobj__ args is not a type"); return -1; } if (obj != NULL) { obj_class = get_class(obj); p = obj_class != cls; /* true iff a problem */ Py_DECREF(obj_class); if (p) { PyErr_SetString(PicklingError, "args[0] from " "__newobj__ args has the wrong class"); return -1; } } /* XXX: These calls save() are prone to infinite recursion. Imagine what happen if the value returned by the __reduce__() method of some extension type contains another object of the same type. Ouch! Here is a quick example, that I ran into, to illustrate what I mean: >>> import pickle, copyreg >>> copyreg.dispatch_table.pop(complex) >>> pickle.dumps(1+2j) Traceback (most recent call last): ... RuntimeError: maximum recursion depth exceeded Removing the complex class from copyreg.dispatch_table made the __reduce_ex__() method emit another complex object: >>> (1+1j).__reduce_ex__(2) (, (, (1+1j)), None, None, None) Thus when save() was called on newargstup (the 2nd item) recursion ensued. Of course, the bug was in the complex class which had a broken __getnewargs__() that emitted another complex object. But, the point, here, is it is quite easy to end up with a broken reduce function. */ /* Save the class and its __new__ arguments. */ if (save(self, cls, 0) < 0) return -1; newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup)); if (newargtup == NULL) return -1; p = save(self, newargtup, 0); Py_DECREF(newargtup); if (p < 0) return -1; /* Add NEWOBJ opcode. */ if (_Pickler_Write(self, &newobj_op, 1) < 0) return -1; } else { /* Not using NEWOBJ. */ if (save(self, callable, 0) < 0 || save(self, argtup, 0) < 0 || _Pickler_Write(self, &reduce_op, 1) < 0) return -1; } /* obj can be NULL when save_reduce() is used directly. A NULL obj means the caller do not want to memoize the object. Not particularly useful, but that is to mimic the behavior save_reduce() in pickle.py when obj is None. */ if (obj && memo_put(self, obj) < 0) return -1; if (listitems && batch_list(self, listitems) < 0) return -1; if (dictitems && batch_dict(self, dictitems) < 0) return -1; if (state) { if (save(self, state, 0) < 0 || _Pickler_Write(self, &build_op, 1) < 0) return -1; } return 0; } static int save(PicklerObject *self, PyObject *obj, int pers_save) { PyTypeObject *type; PyObject *reduce_func = NULL; PyObject *reduce_value = NULL; int status = 0; if (Py_EnterRecursiveCall(" while pickling an object")) return -1; /* The extra pers_save argument is necessary to avoid calling save_pers() on its returned object. */ if (!pers_save && self->pers_func) { /* save_pers() returns: -1 to signal an error; 0 if it did nothing successfully; 1 if a persistent id was saved. */ if ((status = save_pers(self, obj, self->pers_func)) != 0) goto done; } type = Py_TYPE(obj); /* The old cPickle had an optimization that used switch-case statement dispatching on the first letter of the type name. This has was removed since benchmarks shown that this optimization was actually slowing things down. */ /* Atom types; these aren't memoized, so don't check the memo. */ if (obj == Py_None) { status = save_none(self, obj); goto done; } else if (obj == Py_Ellipsis) { status = save_ellipsis(self, obj); goto done; } else if (obj == Py_NotImplemented) { status = save_notimplemented(self, obj); goto done; } else if (obj == Py_False || obj == Py_True) { status = save_bool(self, obj); goto done; } else if (type == &PyLong_Type) { status = save_long(self, obj); goto done; } else if (type == &PyFloat_Type) { status = save_float(self, obj); goto done; } /* Check the memo to see if it has the object. If so, generate a GET (or BINGET) opcode, instead of pickling the object once again. */ if (PyMemoTable_Get(self->memo, obj)) { if (memo_get(self, obj) < 0) goto error; goto done; } if (type == &PyBytes_Type) { status = save_bytes(self, obj); goto done; } else if (type == &PyUnicode_Type) { status = save_unicode(self, obj); goto done; } else if (type == &PyDict_Type) { status = save_dict(self, obj); goto done; } else if (type == &PyList_Type) { status = save_list(self, obj); goto done; } else if (type == &PyTuple_Type) { status = save_tuple(self, obj); goto done; } else if (type == &PyType_Type) { status = save_global(self, obj, NULL); goto done; } else if (type == &PyFunction_Type) { status = save_global(self, obj, NULL); if (status < 0 && PyErr_ExceptionMatches(PickleError)) { /* fall back to reduce */ PyErr_Clear(); } else { goto done; } } else if (type == &PyCFunction_Type) { status = save_global(self, obj, NULL); goto done; } /* XXX: This part needs some unit tests. */ /* Get a reduction callable, and call it. This may come from * self.dispatch_table, copyreg.dispatch_table, the object's * __reduce_ex__ method, or the object's __reduce__ method. */ if (self->dispatch_table == NULL) { reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type); /* PyDict_GetItem() unlike PyObject_GetItem() and PyObject_GetAttr() returns a borrowed ref */ Py_XINCREF(reduce_func); } else { reduce_func = PyObject_GetItem(self->dispatch_table, (PyObject *)type); if (reduce_func == NULL) { if (PyErr_ExceptionMatches(PyExc_KeyError)) PyErr_Clear(); else goto error; } } if (reduce_func != NULL) { Py_INCREF(obj); reduce_value = _Pickler_FastCall(self, reduce_func, obj); } else if (PyType_IsSubtype(type, &PyType_Type)) { status = save_global(self, obj, NULL); goto done; } else { static PyObject *reduce_str = NULL; static PyObject *reduce_ex_str = NULL; /* Cache the name of the reduce methods. */ if (reduce_str == NULL) { reduce_str = PyUnicode_InternFromString("__reduce__"); if (reduce_str == NULL) goto error; reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__"); if (reduce_ex_str == NULL) goto error; } /* XXX: If the __reduce__ method is defined, __reduce_ex__ is automatically defined as __reduce__. While this is convenient, this make it impossible to know which method was actually called. Of course, this is not a big deal. But still, it would be nice to let the user know which method was called when something go wrong. Incidentally, this means if __reduce_ex__ is not defined, we don't actually have to check for a __reduce__ method. */ /* Check for a __reduce_ex__ method. */ reduce_func = PyObject_GetAttr(obj, reduce_ex_str); if (reduce_func != NULL) { PyObject *proto; proto = PyLong_FromLong(self->proto); if (proto != NULL) { reduce_value = _Pickler_FastCall(self, reduce_func, proto); } } else { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); else goto error; /* Check for a __reduce__ method. */ reduce_func = PyObject_GetAttr(obj, reduce_str); if (reduce_func != NULL) { reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL); } else { PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R", type->tp_name, obj); goto error; } } } if (reduce_value == NULL) goto error; if (PyUnicode_Check(reduce_value)) { status = save_global(self, obj, reduce_value); goto done; } if (!PyTuple_Check(reduce_value)) { PyErr_SetString(PicklingError, "__reduce__ must return a string or tuple"); goto error; } status = save_reduce(self, reduce_value, obj); if (0) { error: status = -1; } done: Py_LeaveRecursiveCall(); Py_XDECREF(reduce_func); Py_XDECREF(reduce_value); return status; } static int dump(PicklerObject *self, PyObject *obj) { const char stop_op = STOP; if (self->proto >= 2) { char header[2]; header[0] = PROTO; assert(self->proto >= 0 && self->proto < 256); header[1] = (unsigned char)self->proto; if (_Pickler_Write(self, header, 2) < 0) return -1; } if (save(self, obj, 0) < 0 || _Pickler_Write(self, &stop_op, 1) < 0) return -1; return 0; } PyDoc_STRVAR(Pickler_clear_memo_doc, "clear_memo() -> None. Clears the pickler's \"memo\"." "\n" "The memo is the data structure that remembers which objects the\n" "pickler has already seen, so that shared or recursive objects are\n" "pickled by reference and not by value. This method is useful when\n" "re-using picklers."); static PyObject * Pickler_clear_memo(PicklerObject *self) { if (self->memo) PyMemoTable_Clear(self->memo); Py_RETURN_NONE; } PyDoc_STRVAR(Pickler_dump_doc, "dump(obj) -> None. Write a pickled representation of obj to the open file."); static PyObject * Pickler_dump(PicklerObject *self, PyObject *args) { PyObject *obj; /* Check whether the Pickler was initialized correctly (issue3664). Developers often forget to call __init__() in their subclasses, which would trigger a segfault without this check. */ if (self->write == NULL) { PyErr_Format(PicklingError, "Pickler.__init__() was not called by %s.__init__()", Py_TYPE(self)->tp_name); return NULL; } if (!PyArg_ParseTuple(args, "O:dump", &obj)) return NULL; if (_Pickler_ClearBuffer(self) < 0) return NULL; if (dump(self, obj) < 0) return NULL; if (_Pickler_FlushToFile(self) < 0) return NULL; Py_RETURN_NONE; } static struct PyMethodDef Pickler_methods[] = { {"dump", (PyCFunction)Pickler_dump, METH_VARARGS, Pickler_dump_doc}, {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS, Pickler_clear_memo_doc}, {NULL, NULL} /* sentinel */ }; static void Pickler_dealloc(PicklerObject *self) { PyObject_GC_UnTrack(self); Py_XDECREF(self->output_buffer); Py_XDECREF(self->write); Py_XDECREF(self->pers_func); Py_XDECREF(self->dispatch_table); Py_XDECREF(self->arg); Py_XDECREF(self->fast_memo); PyMemoTable_Del(self->memo); Py_TYPE(self)->tp_free((PyObject *)self); } static int Pickler_traverse(PicklerObject *self, visitproc visit, void *arg) { Py_VISIT(self->write); Py_VISIT(self->pers_func); Py_VISIT(self->dispatch_table); Py_VISIT(self->arg); Py_VISIT(self->fast_memo); return 0; } static int Pickler_clear(PicklerObject *self) { Py_CLEAR(self->output_buffer); Py_CLEAR(self->write); Py_CLEAR(self->pers_func); Py_CLEAR(self->dispatch_table); Py_CLEAR(self->arg); Py_CLEAR(self->fast_memo); if (self->memo != NULL) { PyMemoTable *memo = self->memo; self->memo = NULL; PyMemoTable_Del(memo); } return 0; } PyDoc_STRVAR(Pickler_doc, "Pickler(file, protocol=None)" "\n" "This takes a binary file for writing a pickle data stream.\n" "\n" "The optional protocol argument tells the pickler to use the\n" "given protocol; supported protocols are 0, 1, 2, 3. The default\n" "protocol is 3; a backward-incompatible protocol designed for\n" "Python 3.0.\n" "\n" "Specifying a negative protocol version selects the highest\n" "protocol version supported. The higher the protocol used, the\n" "more recent the version of Python needed to read the pickle\n" "produced.\n" "\n" "The file argument must have a write() method that accepts a single\n" "bytes argument. It can thus be a file object opened for binary\n" "writing, a io.BytesIO instance, or any other custom object that\n" "meets this interface.\n" "\n" "If fix_imports is True and protocol is less than 3, pickle will try to\n" "map the new Python 3.x names to the old module names used in Python\n" "2.x, so that the pickle data stream is readable with Python 2.x.\n"); static int Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"file", "protocol", "fix_imports", 0}; PyObject *file; PyObject *proto_obj = NULL; PyObject *fix_imports = Py_True; _Py_IDENTIFIER(persistent_id); _Py_IDENTIFIER(dispatch_table); if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler", kwlist, &file, &proto_obj, &fix_imports)) return -1; /* In case of multiple __init__() calls, clear previous content. */ if (self->write != NULL) (void)Pickler_clear(self); if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0) return -1; if (_Pickler_SetOutputStream(self, file) < 0) return -1; /* memo and output_buffer may have already been created in _Pickler_New */ if (self->memo == NULL) { self->memo = PyMemoTable_New(); if (self->memo == NULL) return -1; } self->output_len = 0; if (self->output_buffer == NULL) { self->max_output_len = WRITE_BUF_SIZE; self->output_buffer = PyBytes_FromStringAndSize(NULL, self->max_output_len); if (self->output_buffer == NULL) return -1; } self->arg = NULL; self->fast = 0; self->fast_nesting = 0; self->fast_memo = NULL; self->pers_func = NULL; if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) { self->pers_func = _PyObject_GetAttrId((PyObject *)self, &PyId_persistent_id); if (self->pers_func == NULL) return -1; } self->dispatch_table = NULL; if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) { self->dispatch_table = _PyObject_GetAttrId((PyObject *)self, &PyId_dispatch_table); if (self->dispatch_table == NULL) return -1; } return 0; } /* Define a proxy object for the Pickler's internal memo object. This is to * avoid breaking code like: * pickler.memo.clear() * and * pickler.memo = saved_memo * Is this a good idea? Not really, but we don't want to break code that uses * it. Note that we don't implement the entire mapping API here. This is * intentional, as these should be treated as black-box implementation details. */ typedef struct { PyObject_HEAD PicklerObject *pickler; /* Pickler whose memo table we're proxying. */ } PicklerMemoProxyObject; PyDoc_STRVAR(pmp_clear_doc, "memo.clear() -> None. Remove all items from memo."); static PyObject * pmp_clear(PicklerMemoProxyObject *self) { if (self->pickler->memo) PyMemoTable_Clear(self->pickler->memo); Py_RETURN_NONE; } PyDoc_STRVAR(pmp_copy_doc, "memo.copy() -> new_memo. Copy the memo to a new object."); static PyObject * pmp_copy(PicklerMemoProxyObject *self) { Py_ssize_t i; PyMemoTable *memo; PyObject *new_memo = PyDict_New(); if (new_memo == NULL) return NULL; memo = self->pickler->memo; for (i = 0; i < memo->mt_allocated; ++i) { PyMemoEntry entry = memo->mt_table[i]; if (entry.me_key != NULL) { int status; PyObject *key, *value; key = PyLong_FromVoidPtr(entry.me_key); value = Py_BuildValue("nO", entry.me_value, entry.me_key); if (key == NULL || value == NULL) { Py_XDECREF(key); Py_XDECREF(value); goto error; } status = PyDict_SetItem(new_memo, key, value); Py_DECREF(key); Py_DECREF(value); if (status < 0) goto error; } } return new_memo; error: Py_XDECREF(new_memo); return NULL; } PyDoc_STRVAR(pmp_reduce_doc, "memo.__reduce__(). Pickling support."); static PyObject * pmp_reduce(PicklerMemoProxyObject *self, PyObject *args) { PyObject *reduce_value, *dict_args; PyObject *contents = pmp_copy(self); if (contents == NULL) return NULL; reduce_value = PyTuple_New(2); if (reduce_value == NULL) { Py_DECREF(contents); return NULL; } dict_args = PyTuple_New(1); if (dict_args == NULL) { Py_DECREF(contents); Py_DECREF(reduce_value); return NULL; } PyTuple_SET_ITEM(dict_args, 0, contents); Py_INCREF((PyObject *)&PyDict_Type); PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type); PyTuple_SET_ITEM(reduce_value, 1, dict_args); return reduce_value; } static PyMethodDef picklerproxy_methods[] = { {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc}, {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc}, {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc}, {NULL, NULL} /* sentinel */ }; static void PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self) { PyObject_GC_UnTrack(self); Py_XDECREF(self->pickler); PyObject_GC_Del((PyObject *)self); } static int PicklerMemoProxy_traverse(PicklerMemoProxyObject *self, visitproc visit, void *arg) { Py_VISIT(self->pickler); return 0; } static int PicklerMemoProxy_clear(PicklerMemoProxyObject *self) { Py_CLEAR(self->pickler); return 0; } static PyTypeObject PicklerMemoProxyType = { PyVarObject_HEAD_INIT(NULL, 0) "_pickle.PicklerMemoProxy", /*tp_name*/ sizeof(PicklerMemoProxyObject), /*tp_basicsize*/ 0, (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ PyObject_HashNotImplemented, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ PyObject_GenericSetAttr, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 0, /* tp_doc */ (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */ (inquiry)PicklerMemoProxy_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ picklerproxy_methods, /* tp_methods */ }; static PyObject * PicklerMemoProxy_New(PicklerObject *pickler) { PicklerMemoProxyObject *self; self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType); if (self == NULL) return NULL; Py_INCREF(pickler); self->pickler = pickler; PyObject_GC_Track(self); return (PyObject *)self; } /*****************************************************************************/ static PyObject * Pickler_get_memo(PicklerObject *self) { return PicklerMemoProxy_New(self); } static int Pickler_set_memo(PicklerObject *self, PyObject *obj) { PyMemoTable *new_memo = NULL; if (obj == NULL) { PyErr_SetString(PyExc_TypeError, "attribute deletion is not supported"); return -1; } if (Py_TYPE(obj) == &PicklerMemoProxyType) { PicklerObject *pickler = ((PicklerMemoProxyObject *)obj)->pickler; new_memo = PyMemoTable_Copy(pickler->memo); if (new_memo == NULL) return -1; } else if (PyDict_Check(obj)) { Py_ssize_t i = 0; PyObject *key, *value; new_memo = PyMemoTable_New(); if (new_memo == NULL) return -1; while (PyDict_Next(obj, &i, &key, &value)) { Py_ssize_t memo_id; PyObject *memo_obj; if (!PyTuple_Check(value) || Py_SIZE(value) != 2) { PyErr_SetString(PyExc_TypeError, "'memo' values must be 2-item tuples"); goto error; } memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0)); if (memo_id == -1 && PyErr_Occurred()) goto error; memo_obj = PyTuple_GET_ITEM(value, 1); if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0) goto error; } } else { PyErr_Format(PyExc_TypeError, "'memo' attribute must be an PicklerMemoProxy object" "or dict, not %.200s", Py_TYPE(obj)->tp_name); return -1; } PyMemoTable_Del(self->memo); self->memo = new_memo; return 0; error: if (new_memo) PyMemoTable_Del(new_memo); return -1; } static PyObject * Pickler_get_persid(PicklerObject *self) { if (self->pers_func == NULL) PyErr_SetString(PyExc_AttributeError, "persistent_id"); else Py_INCREF(self->pers_func); return self->pers_func; } static int Pickler_set_persid(PicklerObject *self, PyObject *value) { PyObject *tmp; if (value == NULL) { PyErr_SetString(PyExc_TypeError, "attribute deletion is not supported"); return -1; } if (!PyCallable_Check(value)) { PyErr_SetString(PyExc_TypeError, "persistent_id must be a callable taking one argument"); return -1; } tmp = self->pers_func; Py_INCREF(value); self->pers_func = value; Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */ return 0; } static PyMemberDef Pickler_members[] = { {"bin", T_INT, offsetof(PicklerObject, bin)}, {"fast", T_INT, offsetof(PicklerObject, fast)}, {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)}, {NULL} }; static PyGetSetDef Pickler_getsets[] = { {"memo", (getter)Pickler_get_memo, (setter)Pickler_set_memo}, {"persistent_id", (getter)Pickler_get_persid, (setter)Pickler_set_persid}, {NULL} }; static PyTypeObject Pickler_Type = { PyVarObject_HEAD_INIT(NULL, 0) "_pickle.Pickler" , /*tp_name*/ sizeof(PicklerObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Pickler_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_reserved*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, Pickler_doc, /*tp_doc*/ (traverseproc)Pickler_traverse, /*tp_traverse*/ (inquiry)Pickler_clear, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ Pickler_methods, /*tp_methods*/ Pickler_members, /*tp_members*/ Pickler_getsets, /*tp_getset*/ 0, /*tp_base*/ 0, /*tp_dict*/ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ (initproc)Pickler_init, /*tp_init*/ PyType_GenericAlloc, /*tp_alloc*/ PyType_GenericNew, /*tp_new*/ PyObject_GC_Del, /*tp_free*/ 0, /*tp_is_gc*/ }; /* Temporary helper for calling self.find_class(). XXX: It would be nice to able to avoid Python function call overhead, by using directly the C version of find_class(), when find_class() is not overridden by a subclass. Although, this could become rather hackish. A simpler optimization would be to call the C function when self is not a subclass instance. */ static PyObject * find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name) { _Py_IDENTIFIER(find_class); return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO", module_name, global_name); } static Py_ssize_t marker(UnpicklerObject *self) { if (self->num_marks < 1) { PyErr_SetString(UnpicklingError, "could not find MARK"); return -1; } return self->marks[--self->num_marks]; } static int load_none(UnpicklerObject *self) { PDATA_APPEND(self->stack, Py_None, -1); return 0; } static int bad_readline(void) { PyErr_SetString(UnpicklingError, "pickle data was truncated"); return -1; } static int load_int(UnpicklerObject *self) { PyObject *value; char *endptr, *s; Py_ssize_t len; long x; if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; if (len < 2) return bad_readline(); errno = 0; /* XXX: Should the base argument of strtol() be explicitly set to 10? XXX(avassalotti): Should this uses PyOS_strtol()? */ x = strtol(s, &endptr, 0); if (errno || (*endptr != '\n' && *endptr != '\0')) { /* Hm, maybe we've got something long. Let's try reading * it as a Python long object. */ errno = 0; /* XXX: Same thing about the base here. */ value = PyLong_FromString(s, NULL, 0); if (value == NULL) { PyErr_SetString(PyExc_ValueError, "could not convert string to int"); return -1; } } else { if (len == 3 && (x == 0 || x == 1)) { if ((value = PyBool_FromLong(x)) == NULL) return -1; } else { if ((value = PyLong_FromLong(x)) == NULL) return -1; } } PDATA_PUSH(self->stack, value, -1); return 0; } static int load_bool(UnpicklerObject *self, PyObject *boolean) { assert(boolean == Py_True || boolean == Py_False); PDATA_APPEND(self->stack, boolean, -1); return 0; } /* s contains x bytes of an unsigned little-endian integer. Return its value * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX. */ static Py_ssize_t calc_binsize(char *bytes, int size) { unsigned char *s = (unsigned char *)bytes; size_t x = 0; assert(size == 4); x = (size_t) s[0]; x |= (size_t) s[1] << 8; x |= (size_t) s[2] << 16; x |= (size_t) s[3] << 24; if (x > PY_SSIZE_T_MAX) return -1; else return (Py_ssize_t) x; } /* s contains x bytes of a little-endian integer. Return its value as a * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian * int, but when x is 4 it's a signed one. This is an historical source * of x-platform bugs. */ static long calc_binint(char *bytes, int size) { unsigned char *s = (unsigned char *)bytes; int i = size; long x = 0; for (i = 0; i < size; i++) { x |= (long)s[i] << (i * 8); } /* Unlike BININT1 and BININT2, BININT (more accurately BININT4) * is signed, so on a box with longs bigger than 4 bytes we need * to extend a BININT's sign bit to the full width. */ if (SIZEOF_LONG > 4 && size == 4) { x |= -(x & (1L << 31)); } return x; } static int load_binintx(UnpicklerObject *self, char *s, int size) { PyObject *value; long x; x = calc_binint(s, size); if ((value = PyLong_FromLong(x)) == NULL) return -1; PDATA_PUSH(self->stack, value, -1); return 0; } static int load_binint(UnpicklerObject *self) { char *s; if (_Unpickler_Read(self, &s, 4) < 0) return -1; return load_binintx(self, s, 4); } static int load_binint1(UnpicklerObject *self) { char *s; if (_Unpickler_Read(self, &s, 1) < 0) return -1; return load_binintx(self, s, 1); } static int load_binint2(UnpicklerObject *self) { char *s; if (_Unpickler_Read(self, &s, 2) < 0) return -1; return load_binintx(self, s, 2); } static int load_long(UnpicklerObject *self) { PyObject *value; char *s; Py_ssize_t len; if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; if (len < 2) return bad_readline(); /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove the 'L' before calling PyLong_FromString. In order to maintain compatibility with Python 3.0.0, we don't actually *require* the 'L' to be present. */ if (s[len-2] == 'L') s[len-2] = '\0'; /* XXX: Should the base argument explicitly set to 10? */ value = PyLong_FromString(s, NULL, 0); if (value == NULL) return -1; PDATA_PUSH(self->stack, value, -1); return 0; } /* 'size' bytes contain the # of bytes of little-endian 256's-complement * data following. */ static int load_counted_long(UnpicklerObject *self, int size) { PyObject *value; char *nbytes; char *pdata; assert(size == 1 || size == 4); if (_Unpickler_Read(self, &nbytes, size) < 0) return -1; size = calc_binint(nbytes, size); if (size < 0) { /* Corrupt or hostile pickle -- we never write one like this */ PyErr_SetString(UnpicklingError, "LONG pickle has negative byte count"); return -1; } if (size == 0) value = PyLong_FromLong(0L); else { /* Read the raw little-endian bytes and convert. */ if (_Unpickler_Read(self, &pdata, size) < 0) return -1; value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size, 1 /* little endian */ , 1 /* signed */ ); } if (value == NULL) return -1; PDATA_PUSH(self->stack, value, -1); return 0; } static int load_float(UnpicklerObject *self) { PyObject *value; char *endptr, *s; Py_ssize_t len; double d; if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; if (len < 2) return bad_readline(); errno = 0; d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError); if (d == -1.0 && PyErr_Occurred()) return -1; if ((endptr[0] != '\n') && (endptr[0] != '\0')) { PyErr_SetString(PyExc_ValueError, "could not convert string to float"); return -1; } value = PyFloat_FromDouble(d); if (value == NULL) return -1; PDATA_PUSH(self->stack, value, -1); return 0; } static int load_binfloat(UnpicklerObject *self) { PyObject *value; double x; char *s; if (_Unpickler_Read(self, &s, 8) < 0) return -1; x = PyFloat_Unpack8((unsigned char *)s, 0); if (x == -1.0 && PyErr_Occurred()) return -1; if ((value = PyFloat_FromDouble(x)) == NULL) return -1; PDATA_PUSH(self->stack, value, -1); return 0; } /* Returns a new reference */ static PyObject * decode_string(UnpicklerObject *self, PyObject *value) { if (strcmp(self->encoding, "bytes") == 0) { Py_INCREF(value); return value; } else if (strcmp(self->errors, "bytes") == 0) { PyObject *decoded = PyUnicode_FromEncodedObject(value, self->encoding, "strict"); if (decoded == NULL) { PyErr_Clear(); Py_INCREF(value); return value; } else { return decoded; } } else { return PyUnicode_FromEncodedObject(value, self->encoding, self->errors); } } static int load_string(UnpicklerObject *self) { PyObject *bytes; PyObject *str = NULL; Py_ssize_t len; char *s, *p; if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; if (len < 2) return bad_readline(); if ((s = strdup(s)) == NULL) { PyErr_NoMemory(); return -1; } /* Strip outermost quotes */ while (len > 0 && s[len - 1] <= ' ') len--; if (len > 1 && s[0] == '"' && s[len - 1] == '"') { s[len - 1] = '\0'; p = s + 1; len -= 2; } else if (len > 1 && s[0] == '\'' && s[len - 1] == '\'') { s[len - 1] = '\0'; p = s + 1; len -= 2; } else { free(s); PyErr_SetString(PyExc_ValueError, "insecure string pickle"); return -1; } /* Use the PyBytes API to decode the string, since that is what is used to encode, and then coerce the result to Unicode. */ bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL); free(s); if (bytes == NULL) return -1; str = decode_string(self, bytes); Py_DECREF(bytes); if (str == NULL) return -1; PDATA_PUSH(self->stack, str, -1); return 0; } static int load_binbytes(UnpicklerObject *self) { PyObject *bytes; Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 4) < 0) return -1; x = calc_binsize(s, 4); if (x < 0) { PyErr_Format(PyExc_OverflowError, "BINBYTES exceeds system's maximum size of %zd bytes", PY_SSIZE_T_MAX ); return -1; } if (_Unpickler_Read(self, &s, x) < 0) return -1; bytes = PyBytes_FromStringAndSize(s, x); if (bytes == NULL) return -1; PDATA_PUSH(self->stack, bytes, -1); return 0; } static int load_short_binbytes(UnpicklerObject *self) { PyObject *bytes; Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 1) < 0) return -1; x = (unsigned char)s[0]; if (_Unpickler_Read(self, &s, x) < 0) return -1; bytes = PyBytes_FromStringAndSize(s, x); if (bytes == NULL) return -1; PDATA_PUSH(self->stack, bytes, -1); return 0; } static int load_binstring(UnpicklerObject *self) { PyObject *bytes, *str; Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 4) < 0) return -1; x = calc_binint(s, 4); if (x < 0) { PyErr_SetString(UnpicklingError, "BINSTRING pickle has negative byte count"); return -1; } if (_Unpickler_Read(self, &s, x) < 0) return -1; /* Convert Python 2.x strings to unicode or bytes. */ bytes = PyBytes_FromStringAndSize(s, x); if (bytes == NULL) return -1; str = decode_string(self, bytes); Py_DECREF(bytes); if (str == NULL) return -1; PDATA_PUSH(self->stack, str, -1); return 0; } static int load_short_binstring(UnpicklerObject *self) { PyObject *bytes, *str; Py_ssize_t x; char *s; if (_Unpickler_Read(self, &s, 1) < 0) return -1; x = (unsigned char)s[0]; if (_Unpickler_Read(self, &s, x) < 0) return -1; /* Convert Python 2.x strings to unicode or bytes. */ bytes = PyBytes_FromStringAndSize(s, x); if (bytes == NULL) return -1; str = decode_string(self, bytes); Py_DECREF(bytes); if (str == NULL) return -1; PDATA_PUSH(self->stack, str, -1); return 0; } static int load_unicode(UnpicklerObject *self) { PyObject *str; Py_ssize_t len; char *s; if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; if (len < 1) return bad_readline(); str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL); if (str == NULL) return -1; PDATA_PUSH(self->stack, str, -1); return 0; } static int load_binunicode(UnpicklerObject *self) { PyObject *str; Py_ssize_t size; char *s; if (_Unpickler_Read(self, &s, 4) < 0) return -1; size = calc_binsize(s, 4); if (size < 0) { PyErr_Format(PyExc_OverflowError, "BINUNICODE exceeds system's maximum size of %zd bytes", PY_SSIZE_T_MAX ); return -1; } if (_Unpickler_Read(self, &s, size) < 0) return -1; str = PyUnicode_DecodeUTF8(s, size, "surrogatepass"); if (str == NULL) return -1; PDATA_PUSH(self->stack, str, -1); return 0; } static int load_tuple(UnpicklerObject *self) { PyObject *tuple; Py_ssize_t i; if ((i = marker(self)) < 0) return -1; tuple = Pdata_poptuple(self->stack, i); if (tuple == NULL) return -1; PDATA_PUSH(self->stack, tuple, -1); return 0; } static int load_counted_tuple(UnpicklerObject *self, int len) { PyObject *tuple; tuple = PyTuple_New(len); if (tuple == NULL) return -1; while (--len >= 0) { PyObject *item; PDATA_POP(self->stack, item); if (item == NULL) return -1; PyTuple_SET_ITEM(tuple, len, item); } PDATA_PUSH(self->stack, tuple, -1); return 0; } static int load_empty_list(UnpicklerObject *self) { PyObject *list; if ((list = PyList_New(0)) == NULL) return -1; PDATA_PUSH(self->stack, list, -1); return 0; } static int load_empty_dict(UnpicklerObject *self) { PyObject *dict; if ((dict = PyDict_New()) == NULL) return -1; PDATA_PUSH(self->stack, dict, -1); return 0; } static int load_list(UnpicklerObject *self) { PyObject *list; Py_ssize_t i; if ((i = marker(self)) < 0) return -1; list = Pdata_poplist(self->stack, i); if (list == NULL) return -1; PDATA_PUSH(self->stack, list, -1); return 0; } static int load_dict(UnpicklerObject *self) { PyObject *dict, *key, *value; Py_ssize_t i, j, k; if ((i = marker(self)) < 0) return -1; j = Py_SIZE(self->stack); if ((dict = PyDict_New()) == NULL) return -1; for (k = i + 1; k < j; k += 2) { key = self->stack->data[k - 1]; value = self->stack->data[k]; if (PyDict_SetItem(dict, key, value) < 0) { Py_DECREF(dict); return -1; } } Pdata_clear(self->stack, i); PDATA_PUSH(self->stack, dict, -1); return 0; } static PyObject * instantiate(PyObject *cls, PyObject *args) { PyObject *result = NULL; _Py_IDENTIFIER(__getinitargs__); /* Caller must assure args are a tuple. Normally, args come from Pdata_poptuple which packs objects from the top of the stack into a newly created tuple. */ assert(PyTuple_Check(args)); if (Py_SIZE(args) > 0 || !PyType_Check(cls) || _PyObject_HasAttrId(cls, &PyId___getinitargs__)) { result = PyObject_CallObject(cls, args); } else { _Py_IDENTIFIER(__new__); result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls); } return result; } static int load_obj(UnpicklerObject *self) { PyObject *cls, *args, *obj = NULL; Py_ssize_t i; if ((i = marker(self)) < 0) return -1; args = Pdata_poptuple(self->stack, i + 1); if (args == NULL) return -1; PDATA_POP(self->stack, cls); if (cls) { obj = instantiate(cls, args); Py_DECREF(cls); } Py_DECREF(args); if (obj == NULL) return -1; PDATA_PUSH(self->stack, obj, -1); return 0; } static int load_inst(UnpicklerObject *self) { PyObject *cls = NULL; PyObject *args = NULL; PyObject *obj = NULL; PyObject *module_name; PyObject *class_name; Py_ssize_t len; Py_ssize_t i; char *s; if ((i = marker(self)) < 0) return -1; if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; if (len < 2) return bad_readline(); /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII identifiers are permitted in Python 3.0, since the INST opcode is only supported by older protocols on Python 2.x. */ module_name = PyUnicode_DecodeASCII(s, len - 1, "strict"); if (module_name == NULL) return -1; if ((len = _Unpickler_Readline(self, &s)) >= 0) { if (len < 2) return bad_readline(); class_name = PyUnicode_DecodeASCII(s, len - 1, "strict"); if (class_name != NULL) { cls = find_class(self, module_name, class_name); Py_DECREF(class_name); } } Py_DECREF(module_name); if (cls == NULL) return -1; if ((args = Pdata_poptuple(self->stack, i)) != NULL) { obj = instantiate(cls, args); Py_DECREF(args); } Py_DECREF(cls); if (obj == NULL) return -1; PDATA_PUSH(self->stack, obj, -1); return 0; } static int load_newobj(UnpicklerObject *self) { PyObject *args = NULL; PyObject *clsraw = NULL; PyTypeObject *cls; /* clsraw cast to its true type */ PyObject *obj; /* Stack is ... cls argtuple, and we want to call * cls.__new__(cls, *argtuple). */ PDATA_POP(self->stack, args); if (args == NULL) goto error; if (!PyTuple_Check(args)) { PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple."); goto error; } PDATA_POP(self->stack, clsraw); cls = (PyTypeObject *)clsraw; if (cls == NULL) goto error; if (!PyType_Check(cls)) { PyErr_SetString(UnpicklingError, "NEWOBJ class argument " "isn't a type object"); goto error; } if (cls->tp_new == NULL) { PyErr_SetString(UnpicklingError, "NEWOBJ class argument " "has NULL tp_new"); goto error; } /* Call __new__. */ obj = cls->tp_new(cls, args, NULL); if (obj == NULL) goto error; Py_DECREF(args); Py_DECREF(clsraw); PDATA_PUSH(self->stack, obj, -1); return 0; error: Py_XDECREF(args); Py_XDECREF(clsraw); return -1; } static int load_global(UnpicklerObject *self) { PyObject *global = NULL; PyObject *module_name; PyObject *global_name; Py_ssize_t len; char *s; if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; if (len < 2) return bad_readline(); module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict"); if (!module_name) return -1; if ((len = _Unpickler_Readline(self, &s)) >= 0) { if (len < 2) { Py_DECREF(module_name); return bad_readline(); } global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict"); if (global_name) { global = find_class(self, module_name, global_name); Py_DECREF(global_name); } } Py_DECREF(module_name); if (global == NULL) return -1; PDATA_PUSH(self->stack, global, -1); return 0; } static int load_persid(UnpicklerObject *self) { PyObject *pid; Py_ssize_t len; char *s; if (self->pers_func) { if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; if (len < 2) return bad_readline(); pid = PyBytes_FromStringAndSize(s, len - 1); if (pid == NULL) return -1; /* Ugh... this does not leak since _Unpickler_FastCall() steals the reference to pid first. */ pid = _Unpickler_FastCall(self, self->pers_func, pid); if (pid == NULL) return -1; PDATA_PUSH(self->stack, pid, -1); return 0; } else { PyErr_SetString(UnpicklingError, "A load persistent id instruction was encountered,\n" "but no persistent_load function was specified."); return -1; } } static int load_binpersid(UnpicklerObject *self) { PyObject *pid; if (self->pers_func) { PDATA_POP(self->stack, pid); if (pid == NULL) return -1; /* Ugh... this does not leak since _Unpickler_FastCall() steals the reference to pid first. */ pid = _Unpickler_FastCall(self, self->pers_func, pid); if (pid == NULL) return -1; PDATA_PUSH(self->stack, pid, -1); return 0; } else { PyErr_SetString(UnpicklingError, "A load persistent id instruction was encountered,\n" "but no persistent_load function was specified."); return -1; } } static int load_pop(UnpicklerObject *self) { Py_ssize_t len = Py_SIZE(self->stack); /* Note that we split the (pickle.py) stack into two stacks, * an object stack and a mark stack. We have to be clever and * pop the right one. We do this by looking at the top of the * mark stack first, and only signalling a stack underflow if * the object stack is empty and the mark stack doesn't match * our expectations. */ if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) { self->num_marks--; } else if (len > 0) { len--; Py_DECREF(self->stack->data[len]); Py_SET_SIZE(self->stack, len); } else { return stack_underflow(); } return 0; } static int load_pop_mark(UnpicklerObject *self) { Py_ssize_t i; if ((i = marker(self)) < 0) return -1; Pdata_clear(self->stack, i); return 0; } static int load_dup(UnpicklerObject *self) { PyObject *last; Py_ssize_t len; if ((len = Py_SIZE(self->stack)) <= 0) return stack_underflow(); last = self->stack->data[len - 1]; PDATA_APPEND(self->stack, last, -1); return 0; } static int load_get(UnpicklerObject *self) { PyObject *key, *value; Py_ssize_t idx; Py_ssize_t len; char *s; if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; if (len < 2) return bad_readline(); key = PyLong_FromString(s, NULL, 10); if (key == NULL) return -1; idx = PyLong_AsSsize_t(key); if (idx == -1 && PyErr_Occurred()) { Py_DECREF(key); return -1; } value = _Unpickler_MemoGet(self, idx); if (value == NULL) { if (!PyErr_Occurred()) PyErr_SetObject(PyExc_KeyError, key); Py_DECREF(key); return -1; } Py_DECREF(key); PDATA_APPEND(self->stack, value, -1); return 0; } static int load_binget(UnpicklerObject *self) { PyObject *value; Py_ssize_t idx; char *s; if (_Unpickler_Read(self, &s, 1) < 0) return -1; idx = Py_CHARMASK(s[0]); value = _Unpickler_MemoGet(self, idx); if (value == NULL) { PyObject *key = PyLong_FromSsize_t(idx); if (!PyErr_Occurred()) PyErr_SetObject(PyExc_KeyError, key); Py_DECREF(key); return -1; } PDATA_APPEND(self->stack, value, -1); return 0; } static int load_long_binget(UnpicklerObject *self) { PyObject *value; Py_ssize_t idx; char *s; if (_Unpickler_Read(self, &s, 4) < 0) return -1; idx = calc_binsize(s, 4); value = _Unpickler_MemoGet(self, idx); if (value == NULL) { PyObject *key = PyLong_FromSsize_t(idx); if (!PyErr_Occurred()) PyErr_SetObject(PyExc_KeyError, key); Py_DECREF(key); return -1; } PDATA_APPEND(self->stack, value, -1); return 0; } /* Push an object from the extension registry (EXT[124]). nbytes is * the number of bytes following the opcode, holding the index (code) value. */ static int load_extension(UnpicklerObject *self, int nbytes) { char *codebytes; /* the nbytes bytes after the opcode */ long code; /* calc_binint returns long */ PyObject *py_code; /* code as a Python int */ PyObject *obj; /* the object to push */ PyObject *pair; /* (module_name, class_name) */ PyObject *module_name, *class_name; assert(nbytes == 1 || nbytes == 2 || nbytes == 4); if (_Unpickler_Read(self, &codebytes, nbytes) < 0) return -1; code = calc_binint(codebytes, nbytes); if (code <= 0) { /* note that 0 is forbidden */ /* Corrupt or hostile pickle. */ PyErr_SetString(UnpicklingError, "EXT specifies code <= 0"); return -1; } /* Look for the code in the cache. */ py_code = PyLong_FromLong(code); if (py_code == NULL) return -1; obj = PyDict_GetItem(extension_cache, py_code); if (obj != NULL) { /* Bingo. */ Py_DECREF(py_code); PDATA_APPEND(self->stack, obj, -1); return 0; } /* Look up the (module_name, class_name) pair. */ pair = PyDict_GetItem(inverted_registry, py_code); if (pair == NULL) { Py_DECREF(py_code); PyErr_Format(PyExc_ValueError, "unregistered extension " "code %ld", code); return -1; } /* Since the extension registry is manipulable via Python code, * confirm that pair is really a 2-tuple of strings. */ if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 || !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) || !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) { Py_DECREF(py_code); PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] " "isn't a 2-tuple of strings", code); return -1; } /* Load the object. */ obj = find_class(self, module_name, class_name); if (obj == NULL) { Py_DECREF(py_code); return -1; } /* Cache code -> obj. */ code = PyDict_SetItem(extension_cache, py_code, obj); Py_DECREF(py_code); if (code < 0) { Py_DECREF(obj); return -1; } PDATA_PUSH(self->stack, obj, -1); return 0; } static int load_put(UnpicklerObject *self) { PyObject *key, *value; Py_ssize_t idx; Py_ssize_t len; char *s; if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; if (len < 2) return bad_readline(); if (Py_SIZE(self->stack) <= 0) return stack_underflow(); value = self->stack->data[Py_SIZE(self->stack) - 1]; key = PyLong_FromString(s, NULL, 10); if (key == NULL) return -1; idx = PyLong_AsSsize_t(key); Py_DECREF(key); if (idx < 0) { if (!PyErr_Occurred()) PyErr_SetString(PyExc_ValueError, "negative PUT argument"); return -1; } return _Unpickler_MemoPut(self, idx, value); } static int load_binput(UnpicklerObject *self) { PyObject *value; Py_ssize_t idx; char *s; if (_Unpickler_Read(self, &s, 1) < 0) return -1; if (Py_SIZE(self->stack) <= 0) return stack_underflow(); value = self->stack->data[Py_SIZE(self->stack) - 1]; idx = Py_CHARMASK(s[0]); return _Unpickler_MemoPut(self, idx, value); } static int load_long_binput(UnpicklerObject *self) { PyObject *value; Py_ssize_t idx; char *s; if (_Unpickler_Read(self, &s, 4) < 0) return -1; if (Py_SIZE(self->stack) <= 0) return stack_underflow(); value = self->stack->data[Py_SIZE(self->stack) - 1]; idx = calc_binsize(s, 4); if (idx < 0) { PyErr_SetString(PyExc_ValueError, "negative LONG_BINPUT argument"); return -1; } return _Unpickler_MemoPut(self, idx, value); } static int do_append(UnpicklerObject *self, Py_ssize_t x) { PyObject *value; PyObject *list; Py_ssize_t len, i; len = Py_SIZE(self->stack); if (x > len || x <= 0) return stack_underflow(); if (len == x) /* nothing to do */ return 0; list = self->stack->data[x - 1]; if (PyList_Check(list)) { PyObject *slice; Py_ssize_t list_len; int ret; slice = Pdata_poplist(self->stack, x); if (!slice) return -1; list_len = PyList_GET_SIZE(list); ret = PyList_SetSlice(list, list_len, list_len, slice); Py_DECREF(slice); return ret; } else { PyObject *append_func; _Py_IDENTIFIER(append); append_func = _PyObject_GetAttrId(list, &PyId_append); if (append_func == NULL) return -1; for (i = x; i < len; i++) { PyObject *result; value = self->stack->data[i]; result = _Unpickler_FastCall(self, append_func, value); if (result == NULL) { Pdata_clear(self->stack, i + 1); Py_SET_SIZE(self->stack, x); Py_DECREF(append_func); return -1; } Py_DECREF(result); } Py_SET_SIZE(self->stack, x); Py_DECREF(append_func); } return 0; } static int load_append(UnpicklerObject *self) { return do_append(self, Py_SIZE(self->stack) - 1); } static int load_appends(UnpicklerObject *self) { return do_append(self, marker(self)); } static int do_setitems(UnpicklerObject *self, Py_ssize_t x) { PyObject *value, *key; PyObject *dict; Py_ssize_t len, i; int status = 0; len = Py_SIZE(self->stack); if (x > len || x <= 0) return stack_underflow(); if (len == x) /* nothing to do */ return 0; if ((len - x) % 2 != 0) { /* Currupt or hostile pickle -- we never write one like this. */ PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS"); return -1; } /* Here, dict does not actually need to be a PyDict; it could be anything that supports the __setitem__ attribute. */ dict = self->stack->data[x - 1]; for (i = x + 1; i < len; i += 2) { key = self->stack->data[i - 1]; value = self->stack->data[i]; if (PyObject_SetItem(dict, key, value) < 0) { status = -1; break; } } Pdata_clear(self->stack, x); return status; } static int load_setitem(UnpicklerObject *self) { return do_setitems(self, Py_SIZE(self->stack) - 2); } static int load_setitems(UnpicklerObject *self) { return do_setitems(self, marker(self)); } static int load_build(UnpicklerObject *self) { PyObject *state, *inst, *slotstate; PyObject *setstate; int status = 0; _Py_IDENTIFIER(__setstate__); /* Stack is ... instance, state. We want to leave instance at * the stack top, possibly mutated via instance.__setstate__(state). */ if (Py_SIZE(self->stack) < 2) return stack_underflow(); PDATA_POP(self->stack, state); if (state == NULL) return -1; inst = self->stack->data[Py_SIZE(self->stack) - 1]; setstate = _PyObject_GetAttrId(inst, &PyId___setstate__); if (setstate == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); else { Py_DECREF(state); return -1; } } else { PyObject *result; /* The explicit __setstate__ is responsible for everything. */ /* Ugh... this does not leak since _Unpickler_FastCall() steals the reference to state first. */ result = _Unpickler_FastCall(self, setstate, state); Py_DECREF(setstate); if (result == NULL) return -1; Py_DECREF(result); return 0; } /* A default __setstate__. First see whether state embeds a * slot state dict too (a proto 2 addition). */ if (PyTuple_Check(state) && Py_SIZE(state) == 2) { PyObject *tmp = state; state = PyTuple_GET_ITEM(tmp, 0); slotstate = PyTuple_GET_ITEM(tmp, 1); Py_INCREF(state); Py_INCREF(slotstate); Py_DECREF(tmp); } else slotstate = NULL; /* Set inst.__dict__ from the state dict (if any). */ if (state != Py_None) { PyObject *dict; PyObject *d_key, *d_value; Py_ssize_t i; _Py_IDENTIFIER(__dict__); if (!PyDict_Check(state)) { PyErr_SetString(UnpicklingError, "state is not a dictionary"); goto error; } dict = _PyObject_GetAttrId(inst, &PyId___dict__); if (dict == NULL) goto error; i = 0; while (PyDict_Next(state, &i, &d_key, &d_value)) { /* normally the keys for instance attributes are interned. we should try to do that here. */ Py_INCREF(d_key); if (PyUnicode_CheckExact(d_key)) PyUnicode_InternInPlace(&d_key); if (PyObject_SetItem(dict, d_key, d_value) < 0) { Py_DECREF(d_key); goto error; } Py_DECREF(d_key); } Py_DECREF(dict); } /* Also set instance attributes from the slotstate dict (if any). */ if (slotstate != NULL) { PyObject *d_key, *d_value; Py_ssize_t i; if (!PyDict_Check(slotstate)) { PyErr_SetString(UnpicklingError, "slot state is not a dictionary"); goto error; } i = 0; while (PyDict_Next(slotstate, &i, &d_key, &d_value)) { if (PyObject_SetAttr(inst, d_key, d_value) < 0) goto error; } } if (0) { error: status = -1; } Py_DECREF(state); Py_XDECREF(slotstate); return status; } static int load_mark(UnpicklerObject *self) { /* Note that we split the (pickle.py) stack into two stacks, an * object stack and a mark stack. Here we push a mark onto the * mark stack. */ if ((self->num_marks + 1) >= self->marks_size) { size_t alloc; Py_ssize_t *marks; /* Use the size_t type to check for overflow. */ alloc = ((size_t)self->num_marks << 1) + 20; if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) || alloc <= ((size_t)self->num_marks + 1)) { PyErr_NoMemory(); return -1; } if (self->marks == NULL) marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t)); else marks = (Py_ssize_t *) PyMem_Realloc(self->marks, alloc * sizeof(Py_ssize_t)); if (marks == NULL) { PyErr_NoMemory(); return -1; } self->marks = marks; self->marks_size = (Py_ssize_t)alloc; } self->marks[self->num_marks++] = Py_SIZE(self->stack); return 0; } static int load_reduce(UnpicklerObject *self) { PyObject *callable = NULL; PyObject *argtup = NULL; PyObject *obj = NULL; PDATA_POP(self->stack, argtup); if (argtup == NULL) return -1; PDATA_POP(self->stack, callable); if (callable) { obj = PyObject_CallObject(callable, argtup); Py_DECREF(callable); } Py_DECREF(argtup); if (obj == NULL) return -1; PDATA_PUSH(self->stack, obj, -1); return 0; } /* Just raises an error if we don't know the protocol specified. PROTO * is the first opcode for protocols >= 2. */ static int load_proto(UnpicklerObject *self) { char *s; int i; if (_Unpickler_Read(self, &s, 1) < 0) return -1; i = (unsigned char)s[0]; if (i <= HIGHEST_PROTOCOL) { self->proto = i; return 0; } PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i); return -1; } static PyObject * load(UnpicklerObject *self) { PyObject *err; PyObject *value = NULL; char *s; self->num_marks = 0; if (Py_SIZE(self->stack)) Pdata_clear(self->stack, 0); /* Convenient macros for the dispatch while-switch loop just below. */ #define OP(opcode, load_func) \ case opcode: if (load_func(self) < 0) break; continue; #define OP_ARG(opcode, load_func, arg) \ case opcode: if (load_func(self, (arg)) < 0) break; continue; while (1) { if (_Unpickler_Read(self, &s, 1) < 0) break; switch ((enum opcode)s[0]) { OP(NONE, load_none) OP(BININT, load_binint) OP(BININT1, load_binint1) OP(BININT2, load_binint2) OP(INT, load_int) OP(LONG, load_long) OP_ARG(LONG1, load_counted_long, 1) OP_ARG(LONG4, load_counted_long, 4) OP(FLOAT, load_float) OP(BINFLOAT, load_binfloat) OP(BINBYTES, load_binbytes) OP(SHORT_BINBYTES, load_short_binbytes) OP(BINSTRING, load_binstring) OP(SHORT_BINSTRING, load_short_binstring) OP(STRING, load_string) OP(UNICODE, load_unicode) OP(BINUNICODE, load_binunicode) OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0) OP_ARG(TUPLE1, load_counted_tuple, 1) OP_ARG(TUPLE2, load_counted_tuple, 2) OP_ARG(TUPLE3, load_counted_tuple, 3) OP(TUPLE, load_tuple) OP(EMPTY_LIST, load_empty_list) OP(LIST, load_list) OP(EMPTY_DICT, load_empty_dict) OP(DICT, load_dict) OP(OBJ, load_obj) OP(INST, load_inst) OP(NEWOBJ, load_newobj) OP(GLOBAL, load_global) OP(APPEND, load_append) OP(APPENDS, load_appends) OP(BUILD, load_build) OP(DUP, load_dup) OP(BINGET, load_binget) OP(LONG_BINGET, load_long_binget) OP(GET, load_get) OP(MARK, load_mark) OP(BINPUT, load_binput) OP(LONG_BINPUT, load_long_binput) OP(PUT, load_put) OP(POP, load_pop) OP(POP_MARK, load_pop_mark) OP(SETITEM, load_setitem) OP(SETITEMS, load_setitems) OP(PERSID, load_persid) OP(BINPERSID, load_binpersid) OP(REDUCE, load_reduce) OP(PROTO, load_proto) OP_ARG(EXT1, load_extension, 1) OP_ARG(EXT2, load_extension, 2) OP_ARG(EXT4, load_extension, 4) OP_ARG(NEWTRUE, load_bool, Py_True) OP_ARG(NEWFALSE, load_bool, Py_False) case STOP: break; default: if (s[0] == '\0') PyErr_SetNone(PyExc_EOFError); else PyErr_Format(UnpicklingError, "invalid load key, '%c'.", s[0]); return NULL; } break; /* and we are done! */ } if (_Unpickler_SkipConsumed(self) < 0) return NULL; /* XXX: It is not clear what this is actually for. */ if ((err = PyErr_Occurred())) { if (err == PyExc_EOFError) { PyErr_SetNone(PyExc_EOFError); } return NULL; } PDATA_POP(self->stack, value); return value; } PyDoc_STRVAR(Unpickler_load_doc, "load() -> object. Load a pickle." "\n" "Read a pickled object representation from the open file object given in\n" "the constructor, and return the reconstituted object hierarchy specified\n" "therein.\n"); static PyObject * Unpickler_load(UnpicklerObject *self) { /* Check whether the Unpickler was initialized correctly. This prevents segfaulting if a subclass overridden __init__ with a function that does not call Unpickler.__init__(). Here, we simply ensure that self->read is not NULL. */ if (self->read == NULL) { PyErr_Format(UnpicklingError, "Unpickler.__init__() was not called by %s.__init__()", Py_TYPE(self)->tp_name); return NULL; } return load(self); } /* No-load functions to support noload, which is used to find persistent references. */ static int noload_obj(UnpicklerObject *self) { int i; if ((i = marker(self)) < 0) return -1; return Pdata_clear(self->stack, i+1); } static int noload_inst(UnpicklerObject *self) { int i; char *s; if ((i = marker(self)) < 0) return -1; Pdata_clear(self->stack, i); if (_Unpickler_Readline(self, &s) < 0) return -1; if (_Unpickler_Readline(self, &s) < 0) return -1; PDATA_APPEND(self->stack, Py_None, -1); return 0; } static int noload_newobj(UnpicklerObject *self) { PyObject *obj; PDATA_POP(self->stack, obj); /* pop argtuple */ if (obj == NULL) return -1; Py_DECREF(obj); PDATA_POP(self->stack, obj); /* pop cls */ if (obj == NULL) return -1; Py_DECREF(obj); PDATA_APPEND(self->stack, Py_None, -1); return 0; } static int noload_global(UnpicklerObject *self) { char *s; if (_Unpickler_Readline(self, &s) < 0) return -1; if (_Unpickler_Readline(self, &s) < 0) return -1; PDATA_APPEND(self->stack, Py_None,-1); return 0; } static int noload_reduce(UnpicklerObject *self) { if (Py_SIZE(self->stack) < 2) return stack_underflow(); Pdata_clear(self->stack, Py_SIZE(self->stack)-2); PDATA_APPEND(self->stack, Py_None,-1); return 0; } static int noload_build(UnpicklerObject *self) { if (Py_SIZE(self->stack) < 1) return stack_underflow(); Pdata_clear(self->stack, Py_SIZE(self->stack)-1); return 0; } static int noload_extension(UnpicklerObject *self, int nbytes) { char *codebytes; assert(nbytes == 1 || nbytes == 2 || nbytes == 4); if (_Unpickler_Read(self, &codebytes, nbytes) < 0) return -1; PDATA_APPEND(self->stack, Py_None, -1); return 0; } static int do_noload_append(UnpicklerObject *self, Py_ssize_t x) { PyObject *list = 0; Py_ssize_t len; len=Py_SIZE(self->stack); if (!( len >= x && x > 0 )) return stack_underflow(); /* nothing to do */ if (len==x) return 0; list=self->stack->data[x-1]; if (list == Py_None) { return Pdata_clear(self->stack, x); } else { return do_append(self, x); } } static int noload_append(UnpicklerObject *self) { return do_noload_append(self, Py_SIZE(self->stack) - 1); } static int noload_appends(UnpicklerObject *self) { return do_noload_append(self, marker(self)); } static int do_noload_setitems(UnpicklerObject *self, Py_ssize_t x) { PyObject *dict = 0; Py_ssize_t len; if (!( (len=Py_SIZE(self->stack)) >= x && x > 0 )) return stack_underflow(); dict=self->stack->data[x-1]; if (dict == Py_None) { return Pdata_clear(self->stack, x); } else { return do_setitems(self, x); } } static int noload_setitem(UnpicklerObject *self) { return do_noload_setitems(self, Py_SIZE(self->stack) - 2); } static int noload_setitems(UnpicklerObject *self) { return do_noload_setitems(self, marker(self)); } static PyObject * noload(UnpicklerObject *self) { PyObject *err = 0, *val = 0; char *s; self->num_marks = 0; Pdata_clear(self->stack, 0); while (1) { if (_Unpickler_Read(self, &s, 1) < 0) break; switch (s[0]) { case NONE: if (load_none(self) < 0) break; continue; case BININT: if (load_binint(self) < 0) break; continue; case BININT1: if (load_binint1(self) < 0) break; continue; case BININT2: if (load_binint2(self) < 0) break; continue; case INT: if (load_int(self) < 0) break; continue; case LONG: if (load_long(self) < 0) break; continue; case LONG1: if (load_counted_long(self, 1) < 0) break; continue; case LONG4: if (load_counted_long(self, 4) < 0) break; continue; case FLOAT: if (load_float(self) < 0) break; continue; case BINFLOAT: if (load_binfloat(self) < 0) break; continue; case BINSTRING: if (load_binstring(self) < 0) break; continue; case SHORT_BINSTRING: if (load_short_binstring(self) < 0) break; continue; case STRING: if (load_string(self) < 0) break; continue; case UNICODE: if (load_unicode(self) < 0) break; continue; case BINUNICODE: if (load_binunicode(self) < 0) break; continue; case EMPTY_TUPLE: if (load_counted_tuple(self, 0) < 0) break; continue; case TUPLE1: if (load_counted_tuple(self, 1) < 0) break; continue; case TUPLE2: if (load_counted_tuple(self, 2) < 0) break; continue; case TUPLE3: if (load_counted_tuple(self, 3) < 0) break; continue; case TUPLE: if (load_tuple(self) < 0) break; continue; case EMPTY_LIST: if (load_empty_list(self) < 0) break; continue; case LIST: if (load_list(self) < 0) break; continue; case EMPTY_DICT: if (load_empty_dict(self) < 0) break; continue; case DICT: if (load_dict(self) < 0) break; continue; case OBJ: if (noload_obj(self) < 0) break; continue; case INST: if (noload_inst(self) < 0) break; continue; case NEWOBJ: if (noload_newobj(self) < 0) break; continue; case GLOBAL: if (noload_global(self) < 0) break; continue; case APPEND: if (noload_append(self) < 0) break; continue; case APPENDS: if (noload_appends(self) < 0) break; continue; case BUILD: if (noload_build(self) < 0) break; continue; case DUP: if (load_dup(self) < 0) break; continue; case BINGET: if (load_binget(self) < 0) break; continue; case LONG_BINGET: if (load_long_binget(self) < 0) break; continue; case GET: if (load_get(self) < 0) break; continue; case EXT1: if (noload_extension(self, 1) < 0) break; continue; case EXT2: if (noload_extension(self, 2) < 0) break; continue; case EXT4: if (noload_extension(self, 4) < 0) break; continue; case MARK: if (load_mark(self) < 0) break; continue; case BINPUT: if (load_binput(self) < 0) break; continue; case LONG_BINPUT: if (load_long_binput(self) < 0) break; continue; case PUT: if (load_put(self) < 0) break; continue; case POP: if (load_pop(self) < 0) break; continue; case POP_MARK: if (load_pop_mark(self) < 0) break; continue; case SETITEM: if (noload_setitem(self) < 0) break; continue; case SETITEMS: if (noload_setitems(self) < 0) break; continue; case STOP: break; case PERSID: if (load_persid(self) < 0) break; continue; case BINPERSID: if (load_binpersid(self) < 0) break; continue; case REDUCE: if (noload_reduce(self) < 0) break; continue; case PROTO: if (load_proto(self) < 0) break; continue; case NEWTRUE: if (load_bool(self, Py_True) < 0) break; continue; case NEWFALSE: if (load_bool(self, Py_False) < 0) break; continue; case BINBYTES: if (load_binbytes(self) < 0) break; continue; case SHORT_BINBYTES: if (load_short_binbytes(self) < 0) break; continue; default: PyErr_Format(UnpicklingError, "invalid load key, '%c'.", s[0]); return NULL; } break; } if ((err = PyErr_Occurred())) { if (err == PyExc_EOFError) { PyErr_SetNone(PyExc_EOFError); } return NULL; } PDATA_POP(self->stack, val); return val; } PyDoc_STRVAR(Unpickler_noload_doc, "noload() -- not load a pickle, but go through most of the motions\n" "\n" "This function can be used to read past a pickle without instantiating\n" "any objects or importing any modules. It can also be used to find all\n" "persistent references without instantiating any objects or importing\n" "any modules.\n"); static PyObject * Unpickler_noload(UnpicklerObject *self, PyObject *unused) { return noload(self); } /* The name of find_class() is misleading. In newer pickle protocols, this function is used for loading any global (i.e., functions), not just classes. The name is kept only for backward compatibility. */ PyDoc_STRVAR(Unpickler_find_class_doc, "find_class(module_name, global_name) -> object.\n" "\n" "Return an object from a specified module, importing the module if\n" "necessary. Subclasses may override this method (e.g. to restrict\n" "unpickling of arbitrary classes and functions).\n" "\n" "This method is called whenever a class or a function object is\n" "needed. Both arguments passed are str objects.\n"); static PyObject * Unpickler_find_class(UnpicklerObject *self, PyObject *args) { PyObject *global; PyObject *modules_dict; PyObject *module; PyObject *module_name, *global_name; if (!PyArg_UnpackTuple(args, "find_class", 2, 2, &module_name, &global_name)) return NULL; /* Try to map the old names used in Python 2.x to the new ones used in Python 3.x. We do this only with old pickle protocols and when the user has not disabled the feature. */ if (self->proto < 3 && self->fix_imports) { PyObject *key; PyObject *item; /* Check if the global (i.e., a function or a class) was renamed or moved to another module. */ key = PyTuple_Pack(2, module_name, global_name); if (key == NULL) return NULL; item = PyDict_GetItemWithError(name_mapping_2to3, key); Py_DECREF(key); if (item) { if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { PyErr_Format(PyExc_RuntimeError, "_compat_pickle.NAME_MAPPING values should be " "2-tuples, not %.200s", Py_TYPE(item)->tp_name); return NULL; } module_name = PyTuple_GET_ITEM(item, 0); global_name = PyTuple_GET_ITEM(item, 1); if (!PyUnicode_Check(module_name) || !PyUnicode_Check(global_name)) { PyErr_Format(PyExc_RuntimeError, "_compat_pickle.NAME_MAPPING values should be " "pairs of str, not (%.200s, %.200s)", Py_TYPE(module_name)->tp_name, Py_TYPE(global_name)->tp_name); return NULL; } } else if (PyErr_Occurred()) { return NULL; } /* Check if the module was renamed. */ item = PyDict_GetItemWithError(import_mapping_2to3, module_name); if (item) { if (!PyUnicode_Check(item)) { PyErr_Format(PyExc_RuntimeError, "_compat_pickle.IMPORT_MAPPING values should be " "strings, not %.200s", Py_TYPE(item)->tp_name); return NULL; } module_name = item; } else if (PyErr_Occurred()) { return NULL; } } modules_dict = PySys_GetObject("modules"); if (modules_dict == NULL) return NULL; module = PyDict_GetItemWithError(modules_dict, module_name); if (module == NULL) { if (PyErr_Occurred()) return NULL; module = PyImport_Import(module_name); if (module == NULL) return NULL; global = PyObject_GetAttr(module, global_name); Py_DECREF(module); } else { global = PyObject_GetAttr(module, global_name); } return global; } static struct PyMethodDef Unpickler_methods[] = { {"load", (PyCFunction)Unpickler_load, METH_NOARGS, Unpickler_load_doc}, {"noload", (PyCFunction)Unpickler_noload, METH_NOARGS, Unpickler_noload_doc}, {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS, Unpickler_find_class_doc}, {NULL, NULL} /* sentinel */ }; static void Unpickler_dealloc(UnpicklerObject *self) { PyObject_GC_UnTrack((PyObject *)self); Py_XDECREF(self->readline); Py_XDECREF(self->read); Py_XDECREF(self->peek); Py_XDECREF(self->stack); Py_XDECREF(self->pers_func); Py_XDECREF(self->arg); if (self->buffer.buf != NULL) { PyBuffer_Release(&self->buffer); self->buffer.buf = NULL; } _Unpickler_MemoCleanup(self); PyMem_Free(self->marks); PyMem_Free(self->input_line); free(self->encoding); free(self->errors); Py_TYPE(self)->tp_free((PyObject *)self); } static int Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg) { Py_VISIT(self->readline); Py_VISIT(self->read); Py_VISIT(self->peek); Py_VISIT(self->stack); Py_VISIT(self->pers_func); Py_VISIT(self->arg); return 0; } static int Unpickler_clear(UnpicklerObject *self) { Py_CLEAR(self->readline); Py_CLEAR(self->read); Py_CLEAR(self->peek); Py_CLEAR(self->stack); Py_CLEAR(self->pers_func); Py_CLEAR(self->arg); if (self->buffer.buf != NULL) { PyBuffer_Release(&self->buffer); self->buffer.buf = NULL; } _Unpickler_MemoCleanup(self); PyMem_Free(self->marks); self->marks = NULL; PyMem_Free(self->input_line); self->input_line = NULL; free(self->encoding); self->encoding = NULL; free(self->errors); self->errors = NULL; return 0; } PyDoc_STRVAR(Unpickler_doc, "Unpickler(file, *, encoding='ASCII', errors='strict')" "\n" "This takes a binary file for reading a pickle data stream.\n" "\n" "The protocol version of the pickle is detected automatically, so no\n" "proto argument is needed.\n" "\n" "The file-like object must have two methods, a read() method\n" "that takes an integer argument, and a readline() method that\n" "requires no arguments. Both methods should return bytes.\n" "Thus file-like object can be a binary file object opened for\n" "reading, a BytesIO object, or any other custom object that\n" "meets this interface.\n" "\n" "Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n" "which are used to control compatiblity support for pickle stream\n" "generated by Python 2.x. If *fix_imports* is True, pickle will try to\n" "map the old Python 2.x names to the new names used in Python 3.x. The\n" "*encoding* and *errors* tell pickle how to decode 8-bit string\n" "instances pickled by Python 2.x; these default to 'ASCII' and\n" "'strict', respectively. *encoding* can be 'bytes' to read 8-bit string\n" "instances as byte objects.\n"); static int Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0}; PyObject *file; PyObject *fix_imports = Py_True; char *encoding = NULL; char *errors = NULL; _Py_IDENTIFIER(persistent_load); /* XXX: That is an horrible error message. But, I don't know how to do better... */ if (Py_SIZE(args) != 1) { PyErr_Format(PyExc_TypeError, "%s takes exactly one positional argument (%zd given)", Py_TYPE(self)->tp_name, Py_SIZE(args)); return -1; } /* Arguments parsing needs to be done in the __init__() method to allow subclasses to define their own __init__() method, which may (or may not) support Unpickler arguments. However, this means we need to be extra careful in the other Unpickler methods, since a subclass could forget to call Unpickler.__init__() thus breaking our internal invariants. */ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist, &file, &fix_imports, &encoding, &errors)) return -1; /* In case of multiple __init__() calls, clear previous content. */ if (self->read != NULL) (void)Unpickler_clear(self); if (_Unpickler_SetInputStream(self, file) < 0) return -1; if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0) return -1; self->fix_imports = PyObject_IsTrue(fix_imports); if (self->fix_imports == -1) return -1; if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) { self->pers_func = _PyObject_GetAttrId((PyObject *)self, &PyId_persistent_load); if (self->pers_func == NULL) return -1; } else { self->pers_func = NULL; } self->stack = (Pdata *)Pdata_New(); if (self->stack == NULL) return -1; self->memo_size = 32; self->memo = _Unpickler_NewMemo(self->memo_size); if (self->memo == NULL) return -1; self->arg = NULL; self->proto = 0; return 0; } /* Define a proxy object for the Unpickler's internal memo object. This is to * avoid breaking code like: * unpickler.memo.clear() * and * unpickler.memo = saved_memo * Is this a good idea? Not really, but we don't want to break code that uses * it. Note that we don't implement the entire mapping API here. This is * intentional, as these should be treated as black-box implementation details. * * We do, however, have to implement pickling/unpickling support because of * real-world code like cvs2svn. */ typedef struct { PyObject_HEAD UnpicklerObject *unpickler; } UnpicklerMemoProxyObject; PyDoc_STRVAR(ump_clear_doc, "memo.clear() -> None. Remove all items from memo."); static PyObject * ump_clear(UnpicklerMemoProxyObject *self) { _Unpickler_MemoCleanup(self->unpickler); self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size); if (self->unpickler->memo == NULL) return NULL; Py_RETURN_NONE; } PyDoc_STRVAR(ump_copy_doc, "memo.copy() -> new_memo. Copy the memo to a new object."); static PyObject * ump_copy(UnpicklerMemoProxyObject *self) { Py_ssize_t i; PyObject *new_memo = PyDict_New(); if (new_memo == NULL) return NULL; for (i = 0; i < self->unpickler->memo_size; i++) { int status; PyObject *key, *value; value = self->unpickler->memo[i]; if (value == NULL) continue; key = PyLong_FromSsize_t(i); if (key == NULL) goto error; status = PyDict_SetItem(new_memo, key, value); Py_DECREF(key); if (status < 0) goto error; } return new_memo; error: Py_DECREF(new_memo); return NULL; } PyDoc_STRVAR(ump_reduce_doc, "memo.__reduce__(). Pickling support."); static PyObject * ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args) { PyObject *reduce_value; PyObject *constructor_args; PyObject *contents = ump_copy(self); if (contents == NULL) return NULL; reduce_value = PyTuple_New(2); if (reduce_value == NULL) { Py_DECREF(contents); return NULL; } constructor_args = PyTuple_New(1); if (constructor_args == NULL) { Py_DECREF(contents); Py_DECREF(reduce_value); return NULL; } PyTuple_SET_ITEM(constructor_args, 0, contents); Py_INCREF((PyObject *)&PyDict_Type); PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type); PyTuple_SET_ITEM(reduce_value, 1, constructor_args); return reduce_value; } static PyMethodDef unpicklerproxy_methods[] = { {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc}, {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc}, {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc}, {NULL, NULL} /* sentinel */ }; static void UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self) { PyObject_GC_UnTrack(self); Py_XDECREF(self->unpickler); PyObject_GC_Del((PyObject *)self); } static int UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self, visitproc visit, void *arg) { Py_VISIT(self->unpickler); return 0; } static int UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self) { Py_CLEAR(self->unpickler); return 0; } static PyTypeObject UnpicklerMemoProxyType = { PyVarObject_HEAD_INIT(NULL, 0) "_pickle.UnpicklerMemoProxy", /*tp_name*/ sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/ 0, (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ PyObject_HashNotImplemented, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ PyObject_GenericSetAttr, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 0, /* tp_doc */ (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */ (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ unpicklerproxy_methods, /* tp_methods */ }; static PyObject * UnpicklerMemoProxy_New(UnpicklerObject *unpickler) { UnpicklerMemoProxyObject *self; self = PyObject_GC_New(UnpicklerMemoProxyObject, &UnpicklerMemoProxyType); if (self == NULL) return NULL; Py_INCREF(unpickler); self->unpickler = unpickler; PyObject_GC_Track(self); return (PyObject *)self; } /*****************************************************************************/ static PyObject * Unpickler_get_memo(UnpicklerObject *self) { return UnpicklerMemoProxy_New(self); } static int Unpickler_set_memo(UnpicklerObject *self, PyObject *obj) { PyObject **new_memo; Py_ssize_t new_memo_size = 0; Py_ssize_t i; if (obj == NULL) { PyErr_SetString(PyExc_TypeError, "attribute deletion is not supported"); return -1; } if (Py_TYPE(obj) == &UnpicklerMemoProxyType) { UnpicklerObject *unpickler = ((UnpicklerMemoProxyObject *)obj)->unpickler; new_memo_size = unpickler->memo_size; new_memo = _Unpickler_NewMemo(new_memo_size); if (new_memo == NULL) return -1; for (i = 0; i < new_memo_size; i++) { Py_XINCREF(unpickler->memo[i]); new_memo[i] = unpickler->memo[i]; } } else if (PyDict_Check(obj)) { Py_ssize_t i = 0; PyObject *key, *value; new_memo_size = PyDict_Size(obj); new_memo = _Unpickler_NewMemo(new_memo_size); if (new_memo == NULL) return -1; while (PyDict_Next(obj, &i, &key, &value)) { Py_ssize_t idx; if (!PyLong_Check(key)) { PyErr_SetString(PyExc_TypeError, "memo key must be integers"); goto error; } idx = PyLong_AsSsize_t(key); if (idx == -1 && PyErr_Occurred()) goto error; if (_Unpickler_MemoPut(self, idx, value) < 0) goto error; } } else { PyErr_Format(PyExc_TypeError, "'memo' attribute must be an UnpicklerMemoProxy object" "or dict, not %.200s", Py_TYPE(obj)->tp_name); return -1; } _Unpickler_MemoCleanup(self); self->memo_size = new_memo_size; self->memo = new_memo; return 0; error: if (new_memo_size) { i = new_memo_size; while (--i >= 0) { Py_XDECREF(new_memo[i]); } PyMem_FREE(new_memo); } return -1; } static PyObject * Unpickler_get_persload(UnpicklerObject *self) { if (self->pers_func == NULL) PyErr_SetString(PyExc_AttributeError, "persistent_load"); else Py_INCREF(self->pers_func); return self->pers_func; } static int Unpickler_set_persload(UnpicklerObject *self, PyObject *value) { PyObject *tmp; if (value == NULL) { PyErr_SetString(PyExc_TypeError, "attribute deletion is not supported"); return -1; } if (!PyCallable_Check(value)) { PyErr_SetString(PyExc_TypeError, "persistent_load must be a callable taking " "one argument"); return -1; } tmp = self->pers_func; Py_INCREF(value); self->pers_func = value; Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */ return 0; } static PyGetSetDef Unpickler_getsets[] = { {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo}, {"persistent_load", (getter)Unpickler_get_persload, (setter)Unpickler_set_persload}, {NULL} }; static PyTypeObject Unpickler_Type = { PyVarObject_HEAD_INIT(NULL, 0) "_pickle.Unpickler", /*tp_name*/ sizeof(UnpicklerObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Unpickler_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_reserved*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, Unpickler_doc, /*tp_doc*/ (traverseproc)Unpickler_traverse, /*tp_traverse*/ (inquiry)Unpickler_clear, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ Unpickler_methods, /*tp_methods*/ 0, /*tp_members*/ Unpickler_getsets, /*tp_getset*/ 0, /*tp_base*/ 0, /*tp_dict*/ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ (initproc)Unpickler_init, /*tp_init*/ PyType_GenericAlloc, /*tp_alloc*/ PyType_GenericNew, /*tp_new*/ PyObject_GC_Del, /*tp_free*/ 0, /*tp_is_gc*/ }; PyDoc_STRVAR(pickle_dump_doc, "dump(obj, file, protocol=None, *, fix_imports=True) -> None\n" "\n" "Write a pickled representation of obj to the open file object file. This\n" "is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n" "efficient.\n" "\n" "The optional protocol argument tells the pickler to use the given protocol;\n" "supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n" "backward-incompatible protocol designed for Python 3.0.\n" "\n" "Specifying a negative protocol version selects the highest protocol version\n" "supported. The higher the protocol used, the more recent the version of\n" "Python needed to read the pickle produced.\n" "\n" "The file argument must have a write() method that accepts a single bytes\n" "argument. It can thus be a file object opened for binary writing, a\n" "io.BytesIO instance, or any other custom object that meets this interface.\n" "\n" "If fix_imports is True and protocol is less than 3, pickle will try to\n" "map the new Python 3.x names to the old module names used in Python 2.x,\n" "so that the pickle data stream is readable with Python 2.x.\n"); static PyObject * pickle_dump(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0}; PyObject *obj; PyObject *file; PyObject *proto = NULL; PyObject *fix_imports = Py_True; PicklerObject *pickler; /* fix_imports is a keyword-only argument. */ if (Py_SIZE(args) > 3) { PyErr_Format(PyExc_TypeError, "pickle.dump() takes at most 3 positional " "argument (%zd given)", Py_SIZE(args)); return NULL; } if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist, &obj, &file, &proto, &fix_imports)) return NULL; pickler = _Pickler_New(); if (pickler == NULL) return NULL; if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0) goto error; if (_Pickler_SetOutputStream(pickler, file) < 0) goto error; if (dump(pickler, obj) < 0) goto error; if (_Pickler_FlushToFile(pickler) < 0) goto error; Py_DECREF(pickler); Py_RETURN_NONE; error: Py_XDECREF(pickler); return NULL; } PyDoc_STRVAR(pickle_dumps_doc, "dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n" "\n" "Return the pickled representation of the object as a bytes\n" "object, instead of writing it to a file.\n" "\n" "The optional protocol argument tells the pickler to use the given protocol;\n" "supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n" "backward-incompatible protocol designed for Python 3.0.\n" "\n" "Specifying a negative protocol version selects the highest protocol version\n" "supported. The higher the protocol used, the more recent the version of\n" "Python needed to read the pickle produced.\n" "\n" "If fix_imports is True and *protocol* is less than 3, pickle will try to\n" "map the new Python 3.x names to the old module names used in Python 2.x,\n" "so that the pickle data stream is readable with Python 2.x.\n"); static PyObject * pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"obj", "protocol", "fix_imports", 0}; PyObject *obj; PyObject *proto = NULL; PyObject *result; PyObject *fix_imports = Py_True; PicklerObject *pickler; /* fix_imports is a keyword-only argument. */ if (Py_SIZE(args) > 2) { PyErr_Format(PyExc_TypeError, "pickle.dumps() takes at most 2 positional " "argument (%zd given)", Py_SIZE(args)); return NULL; } if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist, &obj, &proto, &fix_imports)) return NULL; pickler = _Pickler_New(); if (pickler == NULL) return NULL; if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0) goto error; if (dump(pickler, obj) < 0) goto error; result = _Pickler_GetString(pickler); Py_DECREF(pickler); return result; error: Py_XDECREF(pickler); return NULL; } PyDoc_STRVAR(pickle_load_doc, "load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n" "\n" "Read a pickled object representation from the open file object file and\n" "return the reconstituted object hierarchy specified therein. This is\n" "equivalent to ``Unpickler(file).load()``, but may be more efficient.\n" "\n" "The protocol version of the pickle is detected automatically, so no protocol\n" "argument is needed. Bytes past the pickled object's representation are\n" "ignored.\n" "\n" "The argument file must have two methods, a read() method that takes an\n" "integer argument, and a readline() method that requires no arguments. Both\n" "methods should return bytes. Thus *file* can be a binary file object opened\n" "for reading, a BytesIO object, or any other custom object that meets this\n" "interface.\n" "\n" "Optional keyword arguments are fix_imports, encoding and errors,\n" "which are used to control compatiblity support for pickle stream generated\n" "by Python 2.x. If fix_imports is True, pickle will try to map the old\n" "Python 2.x names to the new names used in Python 3.x. The encoding and\n" "errors tell pickle how to decode 8-bit string instances pickled by Python\n" "2.x; these default to 'ASCII' and 'strict', respectively.\n"); static PyObject * pickle_load(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0}; PyObject *file; PyObject *fix_imports = Py_True; PyObject *result; char *encoding = NULL; char *errors = NULL; UnpicklerObject *unpickler; /* fix_imports, encoding and errors are a keyword-only argument. */ if (Py_SIZE(args) != 1) { PyErr_Format(PyExc_TypeError, "pickle.load() takes exactly one positional " "argument (%zd given)", Py_SIZE(args)); return NULL; } if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist, &file, &fix_imports, &encoding, &errors)) return NULL; unpickler = _Unpickler_New(); if (unpickler == NULL) return NULL; if (_Unpickler_SetInputStream(unpickler, file) < 0) goto error; if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0) goto error; unpickler->fix_imports = PyObject_IsTrue(fix_imports); if (unpickler->fix_imports == -1) goto error; result = load(unpickler); Py_DECREF(unpickler); return result; error: Py_XDECREF(unpickler); return NULL; } PyDoc_STRVAR(pickle_loads_doc, "loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n" "\n" "Read a pickled object hierarchy from a bytes object and return the\n" "reconstituted object hierarchy specified therein\n" "\n" "The protocol version of the pickle is detected automatically, so no protocol\n" "argument is needed. Bytes past the pickled object's representation are\n" "ignored.\n" "\n" "Optional keyword arguments are fix_imports, encoding and errors, which\n" "are used to control compatiblity support for pickle stream generated\n" "by Python 2.x. If fix_imports is True, pickle will try to map the old\n" "Python 2.x names to the new names used in Python 3.x. The encoding and\n" "errors tell pickle how to decode 8-bit string instances pickled by Python\n" "2.x; these default to 'ASCII' and 'strict', respectively.\n"); static PyObject * pickle_loads(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0}; PyObject *input; PyObject *fix_imports = Py_True; PyObject *result; char *encoding = NULL; char *errors = NULL; UnpicklerObject *unpickler; /* fix_imports, encoding and errors are a keyword-only argument. */ if (Py_SIZE(args) != 1) { PyErr_Format(PyExc_TypeError, "pickle.loads() takes exactly one positional " "argument (%zd given)", Py_SIZE(args)); return NULL; } if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist, &input, &fix_imports, &encoding, &errors)) return NULL; unpickler = _Unpickler_New(); if (unpickler == NULL) return NULL; if (_Unpickler_SetStringInput(unpickler, input) < 0) goto error; if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0) goto error; unpickler->fix_imports = PyObject_IsTrue(fix_imports); if (unpickler->fix_imports == -1) goto error; result = load(unpickler); Py_DECREF(unpickler); return result; error: Py_XDECREF(unpickler); return NULL; } static struct PyMethodDef pickle_methods[] = { {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS, pickle_dump_doc}, {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS, pickle_dumps_doc}, {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS, pickle_load_doc}, {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS, pickle_loads_doc}, {NULL, NULL} /* sentinel */ }; static int initmodule(void) { PyObject *copyreg = NULL; PyObject *compat_pickle = NULL; /* XXX: We should ensure that the types of the dictionaries imported are exactly PyDict objects. Otherwise, it is possible to crash the pickle since we use the PyDict API directly to access these dictionaries. */ copyreg = PyImport_ImportModule("copyreg"); if (!copyreg) goto error; dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table"); if (!dispatch_table) goto error; extension_registry = \ PyObject_GetAttrString(copyreg, "_extension_registry"); if (!extension_registry) goto error; inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry"); if (!inverted_registry) goto error; extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache"); if (!extension_cache) goto error; Py_CLEAR(copyreg); /* Load the 2.x -> 3.x stdlib module mapping tables */ compat_pickle = PyImport_ImportModule("_compat_pickle"); if (!compat_pickle) goto error; name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING"); if (!name_mapping_2to3) goto error; if (!PyDict_CheckExact(name_mapping_2to3)) { PyErr_Format(PyExc_RuntimeError, "_compat_pickle.NAME_MAPPING should be a dict, not %.200s", Py_TYPE(name_mapping_2to3)->tp_name); goto error; } import_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING"); if (!import_mapping_2to3) goto error; if (!PyDict_CheckExact(import_mapping_2to3)) { PyErr_Format(PyExc_RuntimeError, "_compat_pickle.IMPORT_MAPPING should be a dict, " "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name); goto error; } /* ... and the 3.x -> 2.x mapping tables */ name_mapping_3to2 = PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING"); if (!name_mapping_3to2) goto error; if (!PyDict_CheckExact(name_mapping_3to2)) { PyErr_Format(PyExc_RuntimeError, "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, " "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name); goto error; } import_mapping_3to2 = PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING"); if (!import_mapping_3to2) goto error; if (!PyDict_CheckExact(import_mapping_3to2)) { PyErr_Format(PyExc_RuntimeError, "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, " "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name); goto error; } Py_CLEAR(compat_pickle); empty_tuple = PyTuple_New(0); if (empty_tuple == NULL) goto error; two_tuple = PyTuple_New(2); if (two_tuple == NULL) goto error; /* We use this temp container with no regard to refcounts, or to * keeping containees alive. Exempt from GC, because we don't * want anything looking at two_tuple() by magic. */ PyObject_GC_UnTrack(two_tuple); return 0; error: Py_CLEAR(copyreg); Py_CLEAR(dispatch_table); Py_CLEAR(extension_registry); Py_CLEAR(inverted_registry); Py_CLEAR(extension_cache); Py_CLEAR(compat_pickle); Py_CLEAR(name_mapping_2to3); Py_CLEAR(import_mapping_2to3); Py_CLEAR(name_mapping_3to2); Py_CLEAR(import_mapping_3to2); Py_CLEAR(empty_tuple); Py_CLEAR(two_tuple); return -1; } static struct PyModuleDef _picklemodule = { PyModuleDef_HEAD_INIT, "_pickle", pickle_module_doc, -1, pickle_methods, NULL, NULL, NULL, NULL }; PyMODINIT_FUNC PyInit__pickle(void) { PyObject *m; if (PyType_Ready(&Unpickler_Type) < 0) return NULL; if (PyType_Ready(&Pickler_Type) < 0) return NULL; if (PyType_Ready(&Pdata_Type) < 0) return NULL; if (PyType_Ready(&PicklerMemoProxyType) < 0) return NULL; if (PyType_Ready(&UnpicklerMemoProxyType) < 0) return NULL; /* Create the module and add the functions. */ m = PyModule_Create(&_picklemodule); if (m == NULL) return NULL; Py_INCREF(&Pickler_Type); if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0) return NULL; Py_INCREF(&Unpickler_Type); if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0) return NULL; /* Initialize the exceptions. */ PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL); if (PickleError == NULL) return NULL; PicklingError = \ PyErr_NewException("_pickle.PicklingError", PickleError, NULL); if (PicklingError == NULL) return NULL; UnpicklingError = \ PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL); if (UnpicklingError == NULL) return NULL; if (PyModule_AddObject(m, "PickleError", PickleError) < 0) return NULL; if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0) return NULL; if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0) return NULL; if (initmodule() < 0) return NULL; return m; } ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/fastpickle.py0000644000076600000240000000177014753071602020636 0ustar00m.howitzstaff''' The zodbpickle.pickle module exposes the standard behavior of the pickle module. This is backward compatible, but has the effect that by default, on Python3 you get the fast implementation, while on Python2 you get the slow implementation. This module is a version that always exposes the fast implementation of pickling and avoids the need to explicitly touch internals. Note: We are intentionally using "import *" in this context. The imported modules define an __all__ variable, which contains all the names that it wants to export. So this is a rare case where 'import *' is exactly the right thing to do. ''' import sys import warnings from .pickle_3 import * # do not share the globals with a slow version del sys.modules['zodbpickle.pickle_3'] # isort: off # also make sure that we really have the fast version if is_pure: # noqa: F405 warnings.warn("fastpickle imported under 'PURE_PYTHON' environment") else: from ._pickle import * # noqa: E402 module level import not at top of file ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/pickle.py0000644000076600000240000000003014753071602017744 0ustar00m.howitzstafffrom .pickle_3 import * ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/pickle_3.py0000644000076600000240000014631214753071602020204 0ustar00m.howitzstaff"""Create portable serialized representations of Python objects. See module copyreg for a mechanism for registering custom picklers. See module pickletools source for extensive comments. Classes: Pickler Unpickler Functions: dump(object, file) dumps(object) -> string load(file) -> object loads(string) -> object Misc variables: __version__ format_version compatible_formats """ import codecs import io import marshal import os import re import struct import sys from copyreg import _extension_cache from copyreg import _extension_registry from copyreg import _inverted_registry from copyreg import dispatch_table from types import BuiltinFunctionType from types import FunctionType import _compat_pickle __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", "Unpickler", "dump", "dumps", "load", "loads", "is_pure"] is_pure = int(os.environ.get('PURE_PYTHON', '0')) # Shortcut for use in isinstance testing bytes_types = (bytes, bytearray) __all__.append('bytes_types') # These are purely informational; no code uses these. format_version = "3.0" # File format version we write compatible_formats = ["1.0", # Original protocol 0 "1.1", # Protocol 0 with INST added "1.2", # Original protocol 1 "1.3", # Protocol 1 with BINFLOAT added "2.0", # Protocol 2 "3.0", # Protocol 3 ] # Old format versions we can read # This is the highest protocol number we know how to read. HIGHEST_PROTOCOL = 3 # The protocol we write by default. May be less than HIGHEST_PROTOCOL. # We intentionally write a protocol that Python 2.x cannot read; # there are too many issues with that. DEFAULT_PROTOCOL = 3 # Why use struct.pack() for pickling but marshal.loads() for # unpickling? struct.pack() is 40% faster than marshal.dumps(), but # marshal.loads() is twice as fast as struct.unpack()! mloads = marshal.loads class PickleError(Exception): """A common base class for the other pickling exceptions.""" pass class PicklingError(PickleError): """This exception is raised when an unpicklable object is passed to the dump() method. """ pass class UnpicklingError(PickleError): """This exception is raised when there is a problem unpickling an object, such as a security violation. Note that other exceptions may also be raised during unpickling, including (but not necessarily limited to) AttributeError, EOFError, ImportError, and IndexError. """ pass # An instance of _Stop is raised by Unpickler.load_stop() in response to # the STOP opcode, passing the object that is the result of unpickling. class _Stop(Exception): def __init__(self, value): self.value = value # Jython has PyStringMap; it's a dict subclass with string keys try: from org.python.core import PyStringMap except ModuleNotFoundError: PyStringMap = None # Pickle opcodes. See pickletools.py for extensive docs. The listing # here is in kind-of alphabetical order of 1-character pickle code. # pickletools groups them by purpose. # autopep8: off MARK = b'(' # push special markobject on stack STOP = b'.' # every pickle ends with STOP POP = b'0' # discard topmost stack item POP_MARK = b'1' # discard stack top through topmost markobject DUP = b'2' # duplicate top stack item FLOAT = b'F' # push float object; decimal string argument INT = b'I' # push integer or bool; decimal string argument BININT = b'J' # push four-byte signed int BININT1 = b'K' # push 1-byte unsigned int LONG = b'L' # push long; decimal string argument BININT2 = b'M' # push 2-byte unsigned int NONE = b'N' # push None PERSID = b'P' # push persistent object; id is taken from string arg BINPERSID = b'Q' # " " " ; " " " " stack REDUCE = b'R' # apply callable to argtuple, both on stack STRING = b'S' # push string; NL-terminated string argument BINSTRING = b'T' # push string; counted binary string argument SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument BINUNICODE = b'X' # " " " ; counted UTF-8 string argument APPEND = b'a' # append stack top to list below it BUILD = b'b' # call __setstate__ or __dict__.update() GLOBAL = b'c' # push self.find_class(modname, name); 2 string args DICT = b'd' # build a dict from stack items EMPTY_DICT = b'}' # push empty dict APPENDS = b'e' # extend list on stack by topmost stack slice GET = b'g' # push item from memo on stack; index is string arg BINGET = b'h' # " " " " " " ; " " 1-byte arg INST = b'i' # build & push class instance LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg LIST = b'l' # build list from topmost stack items EMPTY_LIST = b']' # push empty list OBJ = b'o' # build & push class instance PUT = b'p' # store stack top in memo; index is string arg BINPUT = b'q' # " " " " " ; " " 1-byte arg LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg SETITEM = b's' # add key+value pair to dict TUPLE = b't' # build tuple from topmost stack items EMPTY_TUPLE = b')' # push empty tuple SETITEMS = b'u' # modify dict by adding topmost key+value pairs BINFLOAT = b'G' # push float; arg is 8-byte float encoding TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py # Protocol 2 PROTO = b'\x80' # identify pickle protocol NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple EXT1 = b'\x82' # push object from extension registry; 1-byte index EXT2 = b'\x83' # ditto, but 2-byte index EXT4 = b'\x84' # ditto, but 4-byte index TUPLE1 = b'\x85' # build 1-tuple from stack top TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items NEWTRUE = b'\x88' # push True NEWFALSE = b'\x89' # push False LONG1 = b'\x8a' # push long from < 256 bytes LONG4 = b'\x8b' # push really big long _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] # Protocol 3 (Python 3.x) BINBYTES = b'B' # push bytes; counted binary string argument SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes # autopep8: on __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)]) # Pickling machinery class _Pickler: def __init__(self, file, protocol=None, *, fix_imports=True): """This takes a binary file for writing a pickle data stream. The optional protocol argument tells the pickler to use the given protocol; supported protocols are 0, 1, 2, 3. The default protocol is 3; a backward-incompatible protocol designed for Python 3.0. Specifying a negative protocol version selects the highest protocol version supported. The higher the protocol used, the more recent the version of Python needed to read the pickle produced. The file argument must have a write() method that accepts a single bytes argument. It can thus be a file object opened for binary writing, a io.BytesIO instance, or any other custom object that meets this interface. If fix_imports is True and protocol is less than 3, pickle will try to map the new Python 3.x names to the old module names used in Python 2.x, so that the pickle data stream is readable with Python 2.x. """ if protocol is None: protocol = DEFAULT_PROTOCOL if protocol < 0: protocol = HIGHEST_PROTOCOL elif not 0 <= protocol <= HIGHEST_PROTOCOL: raise ValueError( "pickle protocol must be <= %d" % HIGHEST_PROTOCOL) try: self.write = file.write except AttributeError: raise TypeError("file must have a 'write' attribute") self.memo = {} self.proto = int(protocol) self.bin = protocol >= 1 self.fast = 0 self.fix_imports = fix_imports and protocol < 3 def clear_memo(self): """Clears the pickler's "memo". The memo is the data structure that remembers which objects the pickler has already seen, so that shared or recursive objects are pickled by reference and not by value. This method is useful when re-using picklers. """ self.memo.clear() def dump(self, obj): """Write a pickled representation of obj to the open file.""" # Check whether Pickler was initialized correctly. This is # only needed to mimic the behavior of _pickle.Pickler.dump(). if not hasattr(self, "write"): raise PicklingError("Pickler.__init__() was not called by " "%s.__init__()" % (self.__class__.__name__,)) if self.proto >= 2: self.write(PROTO + bytes([self.proto])) self.save(obj) self.write(STOP) def memoize(self, obj): """Store an object in the memo.""" # The Pickler memo is a dictionary mapping object ids to 2-tuples # that contain the Unpickler memo key and the object being memoized. # The memo key is written to the pickle and will become # the key in the Unpickler's memo. The object is stored in the # Pickler memo so that transient objects are kept alive during # pickling. # The use of the Unpickler memo length as the memo key is just a # convention. The only requirement is that the memo values be unique. # But there appears no advantage to any other scheme, and this # scheme allows the Unpickler memo to be implemented as a plain (but # growable) array, indexed by memo key. if self.fast: return assert id(obj) not in self.memo memo_len = len(self.memo) self.write(self.put(memo_len)) self.memo[id(obj)] = memo_len, obj # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i. def put(self, i, pack=struct.pack): if self.bin: if i < 256: return BINPUT + bytes([i]) else: return LONG_BINPUT + pack("= 2 and getattr(func, "__name__", "") == "__newobj__": # A __reduce__ implementation can direct protocol 2 to # use the more efficient NEWOBJ opcode, while still # allowing protocol 0 and 1 to work normally. For this to # work, the function returned by __reduce__ should be # called __newobj__, and its first argument should be a # class. The implementation for __newobj__ # should be as follows, although pickle has no way to # verify this: # # def __newobj__(cls, *args): # return cls.__new__(cls, *args) # # Protocols 0 and 1 will pickle a reference to __newobj__, # while protocol 2 (and above) will pickle a reference to # cls, the remaining args tuple, and the NEWOBJ code, # which calls cls.__new__(cls, *args) at unpickling time # (see load_newobj below). If __reduce__ returns a # three-tuple, the state from the third tuple item will be # pickled regardless of the protocol, calling __setstate__ # at unpickling time (see load_build below). # # Note that no standard __newobj__ implementation exists; # you have to provide your own. This is to enforce # compatibility with Python 2.2 (pickles written using # protocol 0 or 1 in Python 2.3 should be unpicklable by # Python 2.2). cls = args[0] if not hasattr(cls, "__new__"): raise PicklingError( "args[0] from __newobj__ args has no __new__") if obj is not None and cls is not obj.__class__: raise PicklingError( "args[0] from __newobj__ args has the wrong class") args = args[1:] save(cls) save(args) write(NEWOBJ) else: save(func) save(args) write(REDUCE) if obj is not None: self.memoize(obj) # More new special cases (that work with older protocols as # well): when __reduce__ returns a tuple with 4 or 5 items, # the 4th and 5th item should be iterators that provide list # items and dict items (as (key, value) tuples), or None. if listitems is not None: self._batch_appends(listitems) if dictitems is not None: self._batch_setitems(dictitems) if state is not None: save(state) write(BUILD) # Methods below this point are dispatched through the dispatch table dispatch = {} def save_none(self, obj): self.write(NONE) dispatch[type(None)] = save_none def save_ellipsis(self, obj): self.save_global(Ellipsis, 'Ellipsis') dispatch[type(Ellipsis)] = save_ellipsis def save_notimplemented(self, obj): self.save_global(NotImplemented, 'NotImplemented') dispatch[type(NotImplemented)] = save_notimplemented def save_bool(self, obj): if self.proto >= 2: self.write(obj and NEWTRUE or NEWFALSE) else: self.write(obj and TRUE or FALSE) dispatch[bool] = save_bool def save_long(self, obj, pack=struct.pack): if self.bin: # If the int is small enough to fit in a signed 4-byte 2's-comp # format, we can store it more efficiently than the general # case. # First one- and two-byte unsigned ints: if obj >= 0: if obj <= 0xff: self.write(BININT1 + bytes([obj])) return if obj <= 0xffff: self.write(BININT2 + bytes([obj & 0xff, obj >> 8])) return # Next check for 4-byte signed ints: high_bits = obj >> 31 # note that Python shift sign-extends if high_bits == 0 or high_bits == -1: # All high bits are copies of bit 2**31, so the value # fits in a 4-byte signed int. self.write(BININT + pack("= 2: encoded = encode_long(obj) n = len(encoded) if n < 256: self.write(LONG1 + bytes([n]) + encoded) else: self.write(LONG4 + pack("d', obj)) else: self.write(FLOAT + repr(obj).encode("ascii") + b'\n') dispatch[float] = save_float def save_bytes(self, obj, pack=struct.pack): if self.proto < 3: if len(obj) == 0: self.save_reduce(bytes, (), obj=obj) else: self.save_reduce(codecs.encode, (str(obj, 'latin1'), 'latin1'), obj=obj) return n = len(obj) if n < 256: self.write(SHORT_BINBYTES + bytes([n]) + bytes(obj)) else: self.write(BINBYTES + pack("= 2: for element in obj: save(element) # Subtle. Same as in the big comment below. if id(obj) in memo: get = self.get(memo[id(obj)][0]) write(POP * n + get) else: write(_tuplesize2code[n]) self.memoize(obj) return # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple # has more than 3 elements. write(MARK) for element in obj: save(element) if id(obj) in memo: # Subtle. d was not in memo when we entered save_tuple(), so # the process of saving the tuple's elements must have saved # the tuple itself: the tuple is recursive. The proper action # now is to throw away everything we put on the stack, and # simply GET the tuple (it's already constructed). This check # could have been done in the "for element" loop instead, but # recursive tuples are a rare thing. get = self.get(memo[id(obj)][0]) if proto: write(POP_MARK + get) else: # proto 0 -- POP_MARK not available write(POP * (n + 1) + get) return # No recursion. self.write(TUPLE) self.memoize(obj) dispatch[tuple] = save_tuple def save_list(self, obj): write = self.write if self.bin: write(EMPTY_LIST) else: # proto 0 -- can't use EMPTY_LIST write(MARK + LIST) self.memoize(obj) self._batch_appends(obj) dispatch[list] = save_list _BATCHSIZE = 1000 def _batch_appends(self, items): # Helper to batch up APPENDS sequences save = self.save write = self.write if not self.bin: for x in items: save(x) write(APPEND) return items = iter(items) r = range(self._BATCHSIZE) while items is not None: tmp = [] for i in r: try: x = next(items) tmp.append(x) except StopIteration: items = None break n = len(tmp) if n > 1: write(MARK) for x in tmp: save(x) write(APPENDS) elif n: save(tmp[0]) write(APPEND) # else tmp is empty, and we're done def save_dict(self, obj): write = self.write if self.bin: write(EMPTY_DICT) else: # proto 0 -- can't use EMPTY_DICT write(MARK + DICT) self.memoize(obj) self._batch_setitems(obj.items()) dispatch[dict] = save_dict if PyStringMap is not None: dispatch[PyStringMap] = save_dict def _batch_setitems(self, items): # Helper to batch up SETITEMS sequences; proto >= 1 only save = self.save write = self.write if not self.bin: for k, v in items: save(k) save(v) write(SETITEM) return items = iter(items) r = range(self._BATCHSIZE) while items is not None: tmp = [] for i in r: try: tmp.append(next(items)) except StopIteration: items = None break n = len(tmp) if n > 1: write(MARK) for k, v in tmp: save(k) save(v) write(SETITEMS) elif n: k, v = tmp[0] save(k) save(v) write(SETITEM) # else tmp is empty, and we're done def save_global(self, obj, name=None, pack=struct.pack): write = self.write if name is None: name = obj.__name__ module = getattr(obj, "__module__", None) if module is None: module = whichmodule(obj, name) try: __import__(module, level=0) mod = sys.modules[module] klass = getattr(mod, name) except (ImportError, KeyError, AttributeError): raise PicklingError( "Can't pickle %r: it's not found as %s.%s" % (obj, module, name)) else: if klass is not obj: raise PicklingError( "Can't pickle %r: it's not the same object as %s.%s" % (obj, module, name)) if self.proto >= 2: code = _extension_registry.get((module, name)) if code: assert code > 0 if code <= 0xff: write(EXT1 + bytes([code])) elif code <= 0xffff: write(EXT2 + bytes([code & 0xff, code >> 8])) else: write(EXT4 + pack("= 3. if self.proto >= 3: write(GLOBAL + bytes(module, "utf-8") + b'\n' + bytes(name, "utf-8") + b'\n') else: if self.fix_imports: if (module, name) in _compat_pickle.REVERSE_NAME_MAPPING: module, name = _compat_pickle.REVERSE_NAME_MAPPING[( module, name)] if module in _compat_pickle.REVERSE_IMPORT_MAPPING: module = _compat_pickle.REVERSE_IMPORT_MAPPING[module] try: write(GLOBAL + bytes(module, "ascii") + b'\n' + bytes(name, "ascii") + b'\n') except UnicodeEncodeError: raise PicklingError( "can't pickle global identifier '%s.%s' using " "pickle protocol %i" % (module, name, self.proto)) self.memoize(obj) dispatch[FunctionType] = save_global dispatch[BuiltinFunctionType] = save_global dispatch[type] = save_global # Pickling helpers def _keep_alive(x, memo): """Keeps a reference to the object x in the memo. Because we remember objects by their id, we have to assure that possibly temporary objects are kept alive by referencing them. We store a reference at the id of the memo, which should normally not be used unless someone tries to deepcopy the memo itself... """ try: memo[id(memo)].append(x) except KeyError: # aha, this is the first one :-) memo[id(memo)] = [x] # A cache for whichmodule(), mapping a function object to the name of # the module in which the function was found. classmap = {} # called classmap for backwards compatibility def whichmodule(func, funcname): """Figure out the module in which a function occurs. Search sys.modules for the module. Cache in classmap. Return a module name. If the function cannot be found, return "__main__". """ # Python functions should always get an __module__ from their globals. mod = getattr(func, "__module__", None) if mod is not None: return mod if func in classmap: return classmap[func] for name, module in list(sys.modules.items()): if module is None: continue # skip dummy package entries if name != '__main__' and getattr(module, funcname, None) is func: break else: name = '__main__' classmap[func] = name return name # Unpickling machinery class _Unpickler: def __init__(self, file, *, fix_imports=True, encoding="ASCII", errors="strict"): """This takes a binary file for reading a pickle data stream. The protocol version of the pickle is detected automatically, so no proto argument is needed. The file-like object must have two methods, a read() method that takes an integer argument, and a readline() method that requires no arguments. Both methods should return bytes. Thus file-like object can be a binary file object opened for reading, a BytesIO object, or any other custom object that meets this interface. Optional keyword arguments are *fix_imports*, *encoding* and *errors*, which are used to control compatiblity support for pickle stream generated by Python 2.x. If *fix_imports* is True, pickle will try to map the old Python 2.x names to the new names used in Python 3.x. The *encoding* and *errors* tell pickle how to decode 8-bit string instances pickled by Python 2.x; these default to 'ASCII' and 'strict', respectively. *encoding* can be 'bytes' to read 8-bit string instances as bytes objects, but this breaks instance pickles so should only be used for special purposes. *errors* can also be 'bytes', which means any string that can't be decoded will be left as a bytes object. """ self.readline = file.readline self.read = file.read self.memo = {} self.encoding = encoding self.errors = errors self.proto = 0 self.fix_imports = fix_imports def load(self): """Read a pickled object representation from the open file. Return the reconstituted object hierarchy specified in the file. """ # Check whether Unpickler was initialized correctly. This is # only needed to mimic the behavior of _pickle.Unpickler.dump(). if not hasattr(self, "read"): raise UnpicklingError("Unpickler.__init__() was not called by " "%s.__init__()" % (self.__class__.__name__,)) self.mark = object() # any new unique object self.stack = [] self.append = self.stack.append read = self.read dispatch = self.dispatch try: while True: key = read(1) if not key: raise EOFError assert isinstance(key, bytes_types) dispatch[key[0]](self) except _Stop as stopinst: return stopinst.value def noload(self): """Read a pickled object representation from the open file. If the object was an intrinsic type such as a literal list, dict or tuple, return it. Otherwise (if the object was an instance), return nothing useful. """ # Check whether Unpickler was initialized correctly. This is # only needed to mimic the behavior of _pickle.Unpickler.dump(). if not hasattr(self, "read"): raise UnpicklingError("Unpickler.__init__() was not called by " "%s.__init__()" % (self.__class__.__name__,)) self.mark = object() # any new unique object self.stack = [] self.append = self.stack.append read = self.read dispatch = self.nl_dispatch try: while True: key = read(1) if not key: raise EOFError assert isinstance(key, bytes_types) dispatch[key[0]](self) except _Stop as stopinst: return stopinst.value # Return largest index k such that self.stack[k] is self.mark. # If the stack doesn't contain a mark, eventually raises IndexError. # This could be sped by maintaining another stack, of indices at which # the mark appears. For that matter, the latter stack would suffice, # and we wouldn't need to push mark objects on self.stack at all. # Doing so is probably a good thing, though, since if the pickle is # corrupt (or hostile) we may get a clue from finding self.mark embedded # in unpickled objects. def marker(self): stack = self.stack mark = self.mark k = len(stack) - 1 while stack[k] is not mark: k = k - 1 return k def persistent_load(self, pid): raise UnpicklingError("unsupported persistent id encountered") dispatch = {} def load_proto(self): proto = ord(self.read(1)) if not 0 <= proto <= HIGHEST_PROTOCOL: raise ValueError("unsupported pickle protocol: %d" % proto) self.proto = proto dispatch[PROTO[0]] = load_proto def load_persid(self): pid = self.readline()[:-1].decode("ascii") self.append(self.persistent_load(pid)) dispatch[PERSID[0]] = load_persid def load_binpersid(self): pid = self.stack.pop() self.append(self.persistent_load(pid)) dispatch[BINPERSID[0]] = load_binpersid def load_none(self): self.append(None) dispatch[NONE[0]] = load_none def load_false(self): self.append(False) dispatch[NEWFALSE[0]] = load_false def load_true(self): self.append(True) dispatch[NEWTRUE[0]] = load_true def load_int(self): data = self.readline() if data == FALSE[1:]: val = False elif data == TRUE[1:]: val = True else: try: val = int(data, 0) except ValueError: val = int(data, 0) self.append(val) dispatch[INT[0]] = load_int def load_binint(self): self.append(mloads(b'i' + self.read(4))) dispatch[BININT[0]] = load_binint def load_binint1(self): self.append(ord(self.read(1))) dispatch[BININT1[0]] = load_binint1 def load_binint2(self): self.append(mloads(b'i' + self.read(2) + b'\000\000')) dispatch[BININT2[0]] = load_binint2 def load_long(self): val = self.readline()[:-1].decode("ascii") if val and val[-1] == 'L': val = val[:-1] self.append(int(val, 0)) dispatch[LONG[0]] = load_long def load_long1(self): n = ord(self.read(1)) data = self.read(n) self.append(decode_long(data)) dispatch[LONG1[0]] = load_long1 def load_long4(self): n = mloads(b'i' + self.read(4)) if n < 0: # Corrupt or hostile pickle -- we never write one like this raise UnpicklingError("LONG pickle has negative byte count") data = self.read(n) self.append(decode_long(data)) dispatch[LONG4[0]] = load_long4 def load_float(self): self.append(float(self.readline()[:-1])) dispatch[FLOAT[0]] = load_float def load_binfloat(self, unpack=struct.unpack): self.append(unpack('>d', self.read(8))[0]) dispatch[BINFLOAT[0]] = load_binfloat def decode_string(self, value): if self.encoding == "bytes": return value elif self.errors == "bytes": try: return value.decode(self.encoding) except UnicodeDecodeError: return value else: return value.decode(self.encoding, self.errors) def load_string(self): orig = self.readline() rep = orig[:-1] for q in (b'"', b"'"): # double or single quote if rep.startswith(q): if len(rep) < 2 or not rep.endswith(q): raise ValueError("insecure string pickle") rep = rep[len(q):-len(q)] break else: raise ValueError("insecure string pickle: %r" % orig) self.append(self.decode_string(codecs.escape_decode(rep)[0])) dispatch[STRING[0]] = load_string def load_binstring(self): # Deprecated BINSTRING uses signed 32-bit length len = mloads(b'i' + self.read(4)) if len < 0: raise UnpicklingError("BINSTRING pickle has negative byte count") data = self.read(len) self.append(self.decode_string(data)) dispatch[BINSTRING[0]] = load_binstring def load_binbytes(self, unpack=struct.unpack, maxsize=sys.maxsize): len, = unpack(' maxsize: raise UnpicklingError( "BINBYTES exceeds system's maximum size of %d bytes" % maxsize) self.append(self.read(len)) dispatch[BINBYTES[0]] = load_binbytes def load_unicode(self): self.append(str(self.readline()[:-1], 'raw-unicode-escape')) dispatch[UNICODE[0]] = load_unicode def load_binunicode(self, unpack=struct.unpack, maxsize=sys.maxsize): len, = unpack(' maxsize: raise UnpicklingError( "BINUNICODE exceeds system's maximum size of %d bytes" % maxsize) self.append(str(self.read(len), 'utf-8', 'surrogatepass')) dispatch[BINUNICODE[0]] = load_binunicode def load_short_binstring(self): len = ord(self.read(1)) data = self.read(len) self.append(self.decode_string(data)) dispatch[SHORT_BINSTRING[0]] = load_short_binstring def load_short_binbytes(self): len = ord(self.read(1)) self.append(bytes(self.read(len))) dispatch[SHORT_BINBYTES[0]] = load_short_binbytes def load_tuple(self): k = self.marker() self.stack[k:] = [tuple(self.stack[k + 1:])] dispatch[TUPLE[0]] = load_tuple def load_empty_tuple(self): self.append(()) dispatch[EMPTY_TUPLE[0]] = load_empty_tuple def load_tuple1(self): self.stack[-1] = (self.stack[-1],) dispatch[TUPLE1[0]] = load_tuple1 def load_tuple2(self): self.stack[-2:] = [(self.stack[-2], self.stack[-1])] dispatch[TUPLE2[0]] = load_tuple2 def load_tuple3(self): self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] dispatch[TUPLE3[0]] = load_tuple3 def load_empty_list(self): self.append([]) dispatch[EMPTY_LIST[0]] = load_empty_list def load_empty_dictionary(self): self.append({}) dispatch[EMPTY_DICT[0]] = load_empty_dictionary def load_list(self): k = self.marker() self.stack[k:] = [self.stack[k + 1:]] dispatch[LIST[0]] = load_list def load_dict(self): k = self.marker() d = {} items = self.stack[k + 1:] for i in range(0, len(items), 2): key = items[i] value = items[i + 1] d[key] = value self.stack[k:] = [d] dispatch[DICT[0]] = load_dict # INST and OBJ differ only in how they get a class object. It's not # only sensible to do the rest in a common routine, the two routines # previously diverged and grew different bugs. # klass is the class to instantiate, and k points to the topmost mark # object, following which are the arguments for klass.__init__. def _instantiate(self, klass, k): args = tuple(self.stack[k + 1:]) del self.stack[k:] if (args or not isinstance(klass, type) or hasattr(klass, "__getinitargs__")): try: value = klass(*args) except TypeError as err: raise TypeError("in constructor for %s: %s" % (klass.__name__, str(err)), sys.exc_info()[2]) else: value = klass.__new__(klass) self.append(value) def load_inst(self): module = self.readline()[:-1].decode("ascii") name = self.readline()[:-1].decode("ascii") klass = self.find_class(module, name) self._instantiate(klass, self.marker()) dispatch[INST[0]] = load_inst def load_obj(self): # Stack is ... markobject classobject arg1 arg2 ... k = self.marker() klass = self.stack.pop(k + 1) self._instantiate(klass, k) dispatch[OBJ[0]] = load_obj def load_newobj(self): args = self.stack.pop() cls = self.stack[-1] obj = cls.__new__(cls, *args) self.stack[-1] = obj dispatch[NEWOBJ[0]] = load_newobj def load_global(self): module = self.readline()[:-1].decode("utf-8") name = self.readline()[:-1].decode("utf-8") klass = self.find_class(module, name) self.append(klass) dispatch[GLOBAL[0]] = load_global def load_ext1(self): code = ord(self.read(1)) self.get_extension(code) dispatch[EXT1[0]] = load_ext1 def load_ext2(self): code = mloads(b'i' + self.read(2) + b'\000\000') self.get_extension(code) dispatch[EXT2[0]] = load_ext2 def load_ext4(self): code = mloads(b'i' + self.read(4)) self.get_extension(code) dispatch[EXT4[0]] = load_ext4 def get_extension(self, code): nil = [] obj = _extension_cache.get(code, nil) if obj is not nil: self.append(obj) return key = _inverted_registry.get(code) if not key: if code <= 0: # note that 0 is forbidden # Corrupt or hostile pickle. raise UnpicklingError("EXT specifies code <= 0") raise ValueError("unregistered extension code %d" % code) obj = self.find_class(*key) _extension_cache[code] = obj self.append(obj) def find_class(self, module, name): # Subclasses may override this. if self.proto < 3 and self.fix_imports: if (module, name) in _compat_pickle.NAME_MAPPING: module, name = _compat_pickle.NAME_MAPPING[(module, name)] if module in _compat_pickle.IMPORT_MAPPING: module = _compat_pickle.IMPORT_MAPPING[module] __import__(module, level=0) mod = sys.modules[module] klass = getattr(mod, name) return klass def load_reduce(self): stack = self.stack args = stack.pop() func = stack[-1] try: value = func(*args) except BaseException: print(sys.exc_info()) print(func, args) raise stack[-1] = value dispatch[REDUCE[0]] = load_reduce def load_pop(self): del self.stack[-1] dispatch[POP[0]] = load_pop def load_pop_mark(self): k = self.marker() del self.stack[k:] dispatch[POP_MARK[0]] = load_pop_mark def load_dup(self): self.append(self.stack[-1]) dispatch[DUP[0]] = load_dup def load_get(self): i = int(self.readline()[:-1]) self.append(self.memo[i]) dispatch[GET[0]] = load_get def load_binget(self): i = self.read(1)[0] self.append(self.memo[i]) dispatch[BINGET[0]] = load_binget def load_long_binget(self, unpack=struct.unpack): i, = unpack(' maxsize: raise ValueError("negative LONG_BINPUT argument") self.memo[i] = self.stack[-1] dispatch[LONG_BINPUT[0]] = load_long_binput def load_append(self): stack = self.stack value = stack.pop() list = stack[-1] list.append(value) dispatch[APPEND[0]] = load_append def load_appends(self): stack = self.stack mark = self.marker() list_obj = stack[mark - 1] items = stack[mark + 1:] if isinstance(list_obj, list): list_obj.extend(items) else: append = list_obj.append for item in items: append(item) del stack[mark:] dispatch[APPENDS[0]] = load_appends def load_setitem(self): stack = self.stack value = stack.pop() key = stack.pop() dict = stack[-1] dict[key] = value dispatch[SETITEM[0]] = load_setitem def load_setitems(self): stack = self.stack mark = self.marker() dict = stack[mark - 1] for i in range(mark + 1, len(stack), 2): dict[stack[i]] = stack[i + 1] del stack[mark:] dispatch[SETITEMS[0]] = load_setitems def load_build(self): stack = self.stack state = stack.pop() inst = stack[-1] setstate = getattr(inst, "__setstate__", None) if setstate: setstate(state) return slotstate = None if isinstance(state, tuple) and len(state) == 2: state, slotstate = state if state: inst_dict = inst.__dict__ intern = sys.intern for k, v in state.items(): if isinstance(k, str): inst_dict[intern(k)] = v else: inst_dict[k] = v if slotstate: for k, v in slotstate.items(): setattr(inst, k, v) dispatch[BUILD[0]] = load_build def load_mark(self): self.append(self.mark) dispatch[MARK[0]] = load_mark def load_stop(self): value = self.stack.pop() raise _Stop(value) dispatch[STOP[0]] = load_stop nl_dispatch = dispatch.copy() def noload_obj(self): # Stack is ... markobject classobject arg1 arg2 ... k = self.marker() self.stack.pop(k + 1) # klass nl_dispatch[OBJ[0]] = noload_obj def noload_inst(self): self.readline() # skip module self.readline()[:-1] # skip name k = self.marker() self.stack.pop(k + 1) # klass self.append(None) nl_dispatch[INST[0]] = noload_inst def noload_newobj(self): self.stack.pop() # skip args self.stack.pop() # skip cls self.stack.append(None) nl_dispatch[NEWOBJ[0]] = noload_newobj def noload_global(self): self.readline() # skip module self.readline()[:-1] # skip name self.append(None) nl_dispatch[GLOBAL[0]] = noload_global def noload_append(self): if self.stack[-2] is not None: self.load_append() else: self.stack.pop() nl_dispatch[APPEND[0]] = noload_append def noload_appends(self): stack = self.stack mark = self.marker() list = stack[mark - 1] if list is not None: list.extend(stack[mark + 1:]) del self.stack[mark:] nl_dispatch[APPENDS[0]] = noload_appends def noload_setitem(self): if self.stack[-3] is not None: self.load_setitem() else: self.stack.pop() # skip value self.stack.pop() # skip key nl_dispatch[SETITEM[0]] = noload_setitem def noload_setitems(self): stack = self.stack mark = self.marker() dict = stack[mark - 1] if dict is not None: for i in range(mark + 1, len(stack), 2): dict[stack[i]] = stack[i + 1] del stack[mark:] nl_dispatch[SETITEMS[0]] = noload_setitems def noload_reduce(self): self.stack.pop() # skip args self.stack.pop() # skip func self.stack.append(None) nl_dispatch[REDUCE[0]] = noload_reduce def noload_build(self): self.stack.pop() # state nl_dispatch[BUILD[0]] = noload_build def noload_ext1(self): code = ord(self.read(1)) self.get_extension(code) self.stack.pop() self.stack.append(None) nl_dispatch[EXT1[0]] = noload_ext1 def noload_ext2(self): code = mloads(b'i' + self.read(2) + b'\000\000') self.get_extension(code) self.stack.pop() self.stack.append(None) nl_dispatch[EXT2[0]] = noload_ext2 def noload_ext4(self): code = mloads(b'i' + self.read(4)) self.get_extension(code) self.stack.pop() self.stack.append(None) nl_dispatch[EXT4[0]] = noload_ext4 # Encode/decode longs. def encode_long(x): r"""Encode a long to a two's complement little-endian binary string. Note that 0 is a special case, returning an empty string, to save a byte in the LONG1 pickling context. >>> encode_long(0) b'' >>> encode_long(255) b'\xff\x00' >>> encode_long(32767) b'\xff\x7f' >>> encode_long(-256) b'\x00\xff' >>> encode_long(-32768) b'\x00\x80' >>> encode_long(-128) b'\x80' >>> encode_long(127) b'\x7f' >>> """ if x == 0: return b'' nbytes = (x.bit_length() >> 3) + 1 result = x.to_bytes(nbytes, byteorder='little', signed=True) if x < 0 and nbytes > 1: if result[-1] == 0xff and (result[-2] & 0x80) != 0: result = result[:-1] return result def decode_long(data): r"""Decode a long from a two's complement little-endian binary string. >>> decode_long(b'') 0 >>> decode_long(b"\xff\x00") 255 >>> decode_long(b"\xff\x7f") 32767 >>> decode_long(b"\x00\xff") -256 >>> decode_long(b"\x00\x80") -32768 >>> decode_long(b"\x80") -128 >>> decode_long(b"\x7f") 127 """ return int.from_bytes(data, byteorder='little', signed=True) # Shorthands def _dump(obj, file, protocol=None, *, fix_imports=True): Pickler(file, protocol, fix_imports=fix_imports).dump(obj) def _dumps(obj, protocol=None, *, fix_imports=True): f = io.BytesIO() Pickler(f, protocol, fix_imports=fix_imports).dump(obj) res = f.getvalue() assert isinstance(res, bytes_types) return res def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): return Unpickler(file, fix_imports=fix_imports, encoding=encoding, errors=errors).load() def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): if isinstance(s, str): raise TypeError("Can't load pickle from unicode string") file = io.BytesIO(s) return Unpickler(file, fix_imports=fix_imports, encoding=encoding, errors=errors).load() # Use the faster _pickle if possible if is_pure: Pickler, Unpickler = _Pickler, _Unpickler dump, dumps, load, loads = _dump, _dumps, _load, _loads else: try: from zodbpickle._pickle import * except ModuleNotFoundError: Pickler, Unpickler = _Pickler, _Unpickler dump, dumps, load, loads = _dump, _dumps, _load, _loads # Doctest def _test(): import doctest return doctest.testmod() if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description='display contents of the pickle files') parser.add_argument( 'pickle_file', type=argparse.FileType('br'), nargs='*', help='the pickle file') parser.add_argument( '-t', '--test', action='store_true', help='run self-test suite') parser.add_argument( '-v', action='store_true', help='run verbosely; only affects self-test run') args = parser.parse_args() if args.test: _test() else: if not args.pickle_file: parser.print_help() else: import pprint for f in args.pickle_file: obj = load(f) pprint.pprint(obj) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/pickletools_3.py0000644000076600000240000023470614753071602021272 0ustar00m.howitzstaff'''"Executable documentation" for the pickle module. Extensive comments about the pickle protocols and pickle-machine opcodes can be found here. Some functions meant for external use: genops(pickle) Generate all the opcodes in a pickle, as (opcode, arg, position) triples. dis(pickle, out=None, memo=None, indentlevel=4) Print a symbolic disassembly of a pickle. ''' import codecs import re import sys from pickle import decode_long from struct import unpack as _unpack from zodbpickle import pickle_3 as pickle __all__ = ['dis', 'genops', 'optimize'] bytes_types = pickle.bytes_types # Other ideas: # # - A pickle verifier: read a pickle and check it exhaustively for # well-formedness. dis() does a lot of this already. # # - A protocol identifier: examine a pickle and return its protocol number # (== the highest .proto attr value among all the opcodes in the pickle). # dis() already prints this info at the end. # # - A pickle optimizer: for example, tuple-building code is sometimes more # elaborate than necessary, catering for the possibility that the tuple # is recursive. Or lots of times a PUT is generated that's never accessed # by a later GET. """ "A pickle" is a program for a virtual pickle machine (PM, but more accurately called an unpickling machine). It's a sequence of opcodes, interpreted by the PM, building an arbitrarily complex Python object. For the most part, the PM is very simple: there are no looping, testing, or conditional instructions, no arithmetic and no function calls. Opcodes are executed once each, from first to last, until a STOP opcode is reached. The PM has two data areas, "the stack" and "the memo". Many opcodes push Python objects onto the stack; e.g., INT pushes a Python integer object on the stack, whose value is gotten from a decimal string literal immediately following the INT opcode in the pickle bytestream. Other opcodes take Python objects off the stack. The result of unpickling is whatever object is left on the stack when the final STOP opcode is executed. The memo is simply an array of objects, or it can be implemented as a dict mapping little integers to objects. The memo serves as the PM's "long term memory", and the little integers indexing the memo are akin to variable names. Some opcodes pop a stack object into the memo at a given index, and others push a memo object at a given index onto the stack again. At heart, that's all the PM has. Subtleties arise for these reasons: + Object identity. Objects can be arbitrarily complex, and subobjects may be shared (for example, the list [a, a] refers to the same object a twice). It can be vital that unpickling recreate an isomorphic object graph, faithfully reproducing sharing. + Recursive objects. For example, after "L = []; L.append(L)", L is a list, and L[0] is the same list. This is related to the object identity point, and some sequences of pickle opcodes are subtle in order to get the right result in all cases. + Things pickle doesn't know everything about. Examples of things pickle does know everything about are Python's builtin scalar and container types, like ints and tuples. They generally have opcodes dedicated to them. For things like module references and instances of user-defined classes, pickle's knowledge is limited. Historically, many enhancements have been made to the pickle protocol in order to do a better (faster, and/or more compact) job on those. + Backward compatibility and micro-optimization. As explained below, pickle opcodes never go away, not even when better ways to do a thing get invented. The repertoire of the PM just keeps growing over time. For example, protocol 0 had two opcodes for building Python integers (INT and LONG), protocol 1 added three more for more-efficient pickling of short integers, and protocol 2 added two more for more-efficient pickling of long integers (before protocol 2, the only ways to pickle a Python long took time quadratic in the number of digits, for both pickling and unpickling). "Opcode bloat" isn't so much a subtlety as a source of wearying complication. Pickle protocols: For compatibility, the meaning of a pickle opcode never changes. Instead new pickle opcodes get added, and each version's unpickler can handle all the pickle opcodes in all protocol versions to date. So old pickles continue to be readable forever. The pickler can generally be told to restrict itself to the subset of opcodes available under previous protocol versions too, so that users can create pickles under the current version readable by older versions. However, a pickle does not contain its version number embedded within it. If an older unpickler tries to read a pickle using a later protocol, the result is most likely an exception due to seeing an unknown (in the older unpickler) opcode. The original pickle used what's now called "protocol 0", and what was called "text mode" before Python 2.3. The entire pickle bytestream is made up of printable 7-bit ASCII characters, plus the newline character, in protocol 0. That's why it was called text mode. Protocol 0 is small and elegant, but sometimes painfully inefficient. The second major set of additions is now called "protocol 1", and was called "binary mode" before Python 2.3. This added many opcodes with arguments consisting of arbitrary bytes, including NUL bytes and unprintable "high bit" bytes. Binary mode pickles can be substantially smaller than equivalent text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte int as 4 bytes following the opcode, which is cheaper to unpickle than the (perhaps) 11-character decimal string attached to INT. Protocol 1 also added a number of opcodes that operate on many stack elements at once (like APPENDS and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE). The third major set of additions came in Python 2.3, and is called "protocol 2". This added: - A better way to pickle instances of new-style classes (NEWOBJ). - A way for a pickle to identify its protocol (PROTO). - Time- and space- efficient pickling of long ints (LONG{1,4}). - Shortcuts for small tuples (TUPLE{1,2,3}}. - Dedicated opcodes for bools (NEWTRUE, NEWFALSE). - The "extension registry", a vector of popular objects that can be pushed efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but the registry contents are predefined (there's nothing akin to the memo's PUT). Another independent change with Python 2.3 is the abandonment of any pretense that it might be safe to load pickles received from untrusted parties -- no sufficient security analysis has been done to guarantee this and there isn't a use case that warrants the expense of such an analysis. To this end, all tests for __safe_for_unpickling__ or for copyreg.safe_constructors are removed from the unpickling code. References to these variables in the descriptions below are to be seen as describing unpickling in Python 2.2 and before. """ # Meta-rule: Descriptions are stored in instances of descriptor objects, # with plain constructors. No meta-language is defined from which # descriptors could be constructed. If you want, e.g., XML, write a little # program to generate XML from the objects. ############################################################################## # Some pickle opcodes have an argument, following the opcode in the # bytestream. An argument is of a specific type, described by an instance # of ArgumentDescriptor. These are not to be confused with arguments taken # off the stack -- ArgumentDescriptor applies only to arguments embedded in # the opcode stream, immediately following an opcode. # Represents the number of bytes consumed by an argument delimited by the # next newline character. UP_TO_NEWLINE = -1 # Represents the number of bytes consumed by a two-argument opcode where # the first argument gives the number of bytes in the second argument. TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int class ArgumentDescriptor: __slots__ = ( # name of descriptor record, also a module global name; a string 'name', # length of argument, in bytes; an int; UP_TO_NEWLINE and # TAKEN_FROM_ARGUMENT{1,4} are negative values for variable-length # cases 'n', # a function taking a file-like object, reading this kind of argument # from the object at the current position, advancing the current # position by n bytes, and returning the value of the argument 'reader', # human-readable docs for this arg descriptor; a string 'doc', ) def __init__(self, name, n, reader, doc): assert isinstance(name, str) self.name = name assert isinstance(n, int) and (n >= 0 or n in (UP_TO_NEWLINE, TAKEN_FROM_ARGUMENT1, TAKEN_FROM_ARGUMENT4, TAKEN_FROM_ARGUMENT4U)) self.n = n self.reader = reader assert isinstance(doc, str) self.doc = doc def read_uint1(f): r""" >>> import io >>> read_uint1(io.BytesIO(b'\xff')) 255 """ data = f.read(1) if data: return data[0] raise ValueError("not enough data in stream to read uint1") uint1 = ArgumentDescriptor( name='uint1', n=1, reader=read_uint1, doc="One-byte unsigned integer.") def read_uint2(f): r""" >>> import io >>> read_uint2(io.BytesIO(b'\xff\x00')) 255 >>> read_uint2(io.BytesIO(b'\xff\xff')) 65535 """ data = f.read(2) if len(data) == 2: return _unpack(">> import io >>> read_int4(io.BytesIO(b'\xff\x00\x00\x00')) 255 >>> read_int4(io.BytesIO(b'\x00\x00\x00\x80')) == -(2**31) True """ data = f.read(4) if len(data) == 4: return _unpack(">> import io >>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00')) 255 >>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31 True """ data = f.read(4) if len(data) == 4: return _unpack(">> import io >>> read_stringnl(io.BytesIO(b"'abcd'\nefg\n")) 'abcd' >>> read_stringnl(io.BytesIO(b"\n")) Traceback (most recent call last): ... ValueError: no string quotes around b'' >>> read_stringnl(io.BytesIO(b"\n"), stripquotes=False) '' >>> read_stringnl(io.BytesIO(b"''\n")) '' >>> read_stringnl(io.BytesIO(b'"abcd"')) Traceback (most recent call last): ... ValueError: no newline found when trying to read stringnl Embedded escapes are undone in the result. >>> read_stringnl(io.BytesIO(br"'a\n\\b\x00c\td'" + b"\n'e'")) 'a\n\\b\x00c\td' """ data = f.readline() if not data.endswith(b'\n'): raise ValueError("no newline found when trying to read stringnl") data = data[:-1] # lose the newline if stripquotes: for q in (b'"', b"'"): if data.startswith(q): if not data.endswith(q): raise ValueError("strinq quote %r not found at both " "ends of %r" % (q, data)) data = data[1:-1] break else: raise ValueError("no string quotes around %r" % data) if decode: data = codecs.escape_decode(data)[0].decode("ascii") return data stringnl = ArgumentDescriptor( name='stringnl', n=UP_TO_NEWLINE, reader=read_stringnl, doc="""A newline-terminated string. This is a repr-style string, with embedded escapes, and bracketing quotes. """) def read_stringnl_noescape(f): return read_stringnl(f, stripquotes=False) stringnl_noescape = ArgumentDescriptor( name='stringnl_noescape', n=UP_TO_NEWLINE, reader=read_stringnl_noescape, doc="""A newline-terminated string. This is a str-style string, without embedded escapes, or bracketing quotes. It should consist solely of printable ASCII characters. """) def read_stringnl_noescape_pair(f): r""" >>> import io >>> read_stringnl_noescape_pair(io.BytesIO(b"Queue\nEmpty\njunk")) 'Queue Empty' """ return f"{read_stringnl_noescape(f)} {read_stringnl_noescape(f)}" stringnl_noescape_pair = ArgumentDescriptor( name='stringnl_noescape_pair', n=UP_TO_NEWLINE, reader=read_stringnl_noescape_pair, doc="""A pair of newline-terminated strings. These are str-style strings, without embedded escapes, or bracketing quotes. They should consist solely of printable ASCII characters. The pair is returned as a single string, with a single blank separating the two strings. """) def read_string4(f): r""" >>> import io >>> read_string4(io.BytesIO(b"\x00\x00\x00\x00abc")) '' >>> read_string4(io.BytesIO(b"\x03\x00\x00\x00abcdef")) 'abc' >>> read_string4(io.BytesIO(b"\x00\x00\x00\x03abcdef")) Traceback (most recent call last): ... ValueError: expected 50331648 bytes in a string4, but only 6 remain """ n = read_int4(f) if n < 0: raise ValueError("string4 byte count < 0: %d" % n) data = f.read(n) if len(data) == n: return data.decode("latin-1") raise ValueError("expected %d bytes in a string4, but only %d remain" % (n, len(data))) string4 = ArgumentDescriptor( name="string4", n=TAKEN_FROM_ARGUMENT4, reader=read_string4, doc="""A counted string. The first argument is a 4-byte little-endian signed int giving the number of bytes in the string, and the second argument is that many bytes. """) def read_string1(f): r""" >>> import io >>> read_string1(io.BytesIO(b"\x00")) '' >>> read_string1(io.BytesIO(b"\x03abcdef")) 'abc' """ n = read_uint1(f) assert n >= 0 data = f.read(n) if len(data) == n: return data.decode("latin-1") raise ValueError("expected %d bytes in a string1, but only %d remain" % (n, len(data))) string1 = ArgumentDescriptor( name="string1", n=TAKEN_FROM_ARGUMENT1, reader=read_string1, doc="""A counted string. The first argument is a 1-byte unsigned int giving the number of bytes in the string, and the second argument is that many bytes. """) def read_bytes1(f): r""" >>> import io >>> read_bytes1(io.BytesIO(b"\x00")) b'' >>> read_bytes1(io.BytesIO(b"\x03abcdef")) b'abc' """ n = read_uint1(f) assert n >= 0 data = f.read(n) if len(data) == n: return data raise ValueError("expected %d bytes in a bytes1, but only %d remain" % (n, len(data))) bytes1 = ArgumentDescriptor( name="bytes1", n=TAKEN_FROM_ARGUMENT1, reader=read_bytes1, doc="""A counted bytes string. The first argument is a 1-byte unsigned int giving the number of bytes, and the second argument is that many bytes. """) def read_bytes4(f): r""" >>> import io >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc")) b'' >>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef")) b'abc' >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef")) Traceback (most recent call last): ... ValueError: expected 50331648 bytes in a bytes4, but only 6 remain """ n = read_uint4(f) if n > sys.maxsize: raise ValueError("bytes4 byte count > sys.maxsize: %d" % n) data = f.read(n) if len(data) == n: return data raise ValueError("expected %d bytes in a bytes4, but only %d remain" % (n, len(data))) bytes4 = ArgumentDescriptor( name="bytes4", n=TAKEN_FROM_ARGUMENT4U, reader=read_bytes4, doc="""A counted bytes string. The first argument is a 4-byte little-endian unsigned int giving the number of bytes, and the second argument is that many bytes. """) def read_unicodestringnl(f): r""" >>> import io >>> read_unicodestringnl(io.BytesIO(b"abc\\uabcd\njunk")) == 'abc\uabcd' True """ data = f.readline() if not data.endswith(b'\n'): raise ValueError("no newline found when trying to read " "unicodestringnl") data = data[:-1] # lose the newline return str(data, 'raw-unicode-escape') unicodestringnl = ArgumentDescriptor( name='unicodestringnl', n=UP_TO_NEWLINE, reader=read_unicodestringnl, doc="""A newline-terminated Unicode string. This is raw-unicode-escape encoded, so consists of printable ASCII characters, and may contain embedded escape sequences. """) def read_unicodestring4(f): r""" >>> import io >>> s = 'abcd\uabcd' >>> enc = s.encode('utf-8') >>> enc b'abcd\xea\xaf\x8d' >>> n = bytes([len(enc), 0, 0, 0]) # little-endian 4-byte length >>> t = read_unicodestring4(io.BytesIO(n + enc + b'junk')) >>> s == t True >>> read_unicodestring4(io.BytesIO(n + enc[:-1])) Traceback (most recent call last): ... ValueError: expected 7 bytes in a unicodestring4, but only 6 remain """ n = read_uint4(f) if n > sys.maxsize: raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n) data = f.read(n) if len(data) == n: return str(data, 'utf-8', 'surrogatepass') raise ValueError("expected %d bytes in a unicodestring4, but only %d " "remain" % (n, len(data))) unicodestring4 = ArgumentDescriptor( name="unicodestring4", n=TAKEN_FROM_ARGUMENT4U, reader=read_unicodestring4, doc="""A counted Unicode string. The first argument is a 4-byte little-endian signed int giving the number of bytes in the string, and the second argument-- the UTF-8 encoding of the Unicode string -- contains that many bytes. """) def read_decimalnl_short(f): r""" >>> import io >>> read_decimalnl_short(io.BytesIO(b"1234\n56")) 1234 >>> read_decimalnl_short(io.BytesIO(b"1234L\n56")) Traceback (most recent call last): ... ValueError: trailing 'L' not allowed in b'1234L' """ s = read_stringnl(f, decode=False, stripquotes=False) if s.endswith(b"L"): raise ValueError("trailing 'L' not allowed in %r" % s) # It's not necessarily true that the result fits in a Python short int: # the pickle may have been written on a 64-bit box. There's also a hack # for True and False here. if s == b"00": return False elif s == b"01": return True return int(s) def read_decimalnl_long(f): r""" >>> import io >>> read_decimalnl_long(io.BytesIO(b"1234L\n56")) 1234 >>> read_decimalnl_long(io.BytesIO(b"123456789012345678901234L\n6")) 123456789012345678901234 """ s = read_stringnl(f, decode=False, stripquotes=False) if s[-1:] == b'L': s = s[:-1] return int(s) decimalnl_short = ArgumentDescriptor( name='decimalnl_short', n=UP_TO_NEWLINE, reader=read_decimalnl_short, doc="""A newline-terminated decimal integer literal. This never has a trailing 'L', and the integer fit in a short Python int on the box where the pickle was written -- but there's no guarantee it will fit in a short Python int on the box where the pickle is read. """) decimalnl_long = ArgumentDescriptor( name='decimalnl_long', n=UP_TO_NEWLINE, reader=read_decimalnl_long, doc="""A newline-terminated decimal integer literal. This has a trailing 'L', and can represent integers of any size. """) def read_floatnl(f): r""" >>> import io >>> read_floatnl(io.BytesIO(b"-1.25\n6")) -1.25 """ s = read_stringnl(f, decode=False, stripquotes=False) return float(s) floatnl = ArgumentDescriptor( name='floatnl', n=UP_TO_NEWLINE, reader=read_floatnl, doc="""A newline-terminated decimal floating literal. In general this requires 17 significant digits for roundtrip identity, and pickling then unpickling infinities, NaNs, and minus zero doesn't work across boxes, or on some boxes even on itself (e.g., Windows can't read the strings it produces for infinities or NaNs). """) def read_float8(f): r""" >>> import io, struct >>> raw = struct.pack(">d", -1.25) >>> raw b'\xbf\xf4\x00\x00\x00\x00\x00\x00' >>> read_float8(io.BytesIO(raw + b"\n")) -1.25 """ data = f.read(8) if len(data) == 8: return _unpack(">d", data)[0] raise ValueError("not enough data in stream to read float8") float8 = ArgumentDescriptor( name='float8', n=8, reader=read_float8, doc="""An 8-byte binary representation of a float, big-endian. The format is unique to Python, and shared with the struct module (format string '>d') "in theory" (the struct and pickle implementations don't share the code -- they should). It's strongly related to the IEEE-754 double format, and, in normal cases, is in fact identical to the big-endian 754 double format. On other boxes the dynamic range is limited to that of a 754 double, and "add a half and chop" rounding is used to reduce the precision to 53 bits. However, even on a 754 box, infinities, NaNs, and minus zero may not be handled correctly (may not survive roundtrip pickling intact). """) # Protocol 2 formats def read_long1(f): r""" >>> import io >>> read_long1(io.BytesIO(b"\x00")) 0 >>> read_long1(io.BytesIO(b"\x02\xff\x00")) 255 >>> read_long1(io.BytesIO(b"\x02\xff\x7f")) 32767 >>> read_long1(io.BytesIO(b"\x02\x00\xff")) -256 >>> read_long1(io.BytesIO(b"\x02\x00\x80")) -32768 """ n = read_uint1(f) data = f.read(n) if len(data) != n: raise ValueError("not enough data in stream to read long1") return decode_long(data) long1 = ArgumentDescriptor( name="long1", n=TAKEN_FROM_ARGUMENT1, reader=read_long1, doc="""A binary long, little-endian, using 1-byte size. This first reads one byte as an unsigned size, then reads that many bytes and interprets them as a little-endian 2's-complement long. If the size is 0, that's taken as a shortcut for the long 0L. """) def read_long4(f): r""" >>> import io >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\xff\x00")) 255 >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\xff\x7f")) 32767 >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\x00\xff")) -256 >>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\x00\x80")) -32768 >>> read_long1(io.BytesIO(b"\x00\x00\x00\x00")) 0 """ n = read_int4(f) if n < 0: raise ValueError("long4 byte count < 0: %d" % n) data = f.read(n) if len(data) != n: raise ValueError("not enough data in stream to read long4") return decode_long(data) long4 = ArgumentDescriptor( name="long4", n=TAKEN_FROM_ARGUMENT4, reader=read_long4, doc="""A binary representation of a long, little-endian. This first reads four bytes as a signed size (but requires the size to be >= 0), then reads that many bytes and interprets them as a little-endian 2's-complement long. If the size is 0, that's taken as a shortcut for the int 0, although LONG1 should really be used then instead (and in any case where # of bytes < 256). """) ############################################################################## # Object descriptors. The stack used by the pickle machine holds objects, # and in the stack_before and stack_after attributes of OpcodeInfo # descriptors we need names to describe the various types of objects that can # appear on the stack. class StackObject: __slots__ = ( # name of descriptor record, for info only 'name', # type of object, or tuple of type objects (meaning the object can # be of any type in the tuple) 'obtype', # human-readable docs for this kind of stack object; a string 'doc', ) def __init__(self, name, obtype, doc): assert isinstance(name, str) self.name = name assert isinstance(obtype, type) or isinstance(obtype, tuple) if isinstance(obtype, tuple): for contained in obtype: assert isinstance(contained, type) self.obtype = obtype assert isinstance(doc, str) self.doc = doc def __repr__(self): return self.name pyint = StackObject( name='int', obtype=int, doc="A short (as opposed to long) Python integer object.") pylong = StackObject( name='long', obtype=int, doc="A long (as opposed to short) Python integer object.") pyinteger_or_bool = StackObject( name='int_or_bool', obtype=(int, bool), doc="A Python integer object (short or long), or " "a Python bool.") pybool = StackObject( name='bool', obtype=(bool,), doc="A Python bool object.") pyfloat = StackObject( name='float', obtype=float, doc="A Python float object.") pystring = StackObject( name='string', obtype=bytes, doc="A Python (8-bit) string object.") pybytes = StackObject( name='bytes', obtype=bytes, doc="A Python bytes object.") pyunicode = StackObject( name='str', obtype=str, doc="A Python (Unicode) string object.") pynone = StackObject( name="None", obtype=type(None), doc="The Python None object.") pytuple = StackObject( name="tuple", obtype=tuple, doc="A Python tuple object.") pylist = StackObject( name="list", obtype=list, doc="A Python list object.") pydict = StackObject( name="dict", obtype=dict, doc="A Python dict object.") anyobject = StackObject( name='any', obtype=object, doc="Any kind of object whatsoever.") markobject = StackObject( name="mark", obtype=StackObject, doc="""'The mark' is a unique object. Opcodes that operate on a variable number of objects generally don't embed the count of objects in the opcode, or pull it off the stack. Instead the MARK opcode is used to push a special marker object on the stack, and then some other opcodes grab all the objects from the top of the stack down to (but not including) the topmost marker object. """) stackslice = StackObject( name="stackslice", obtype=StackObject, doc="""An object representing a contiguous slice of the stack. This is used in conjuction with markobject, to represent all of the stack following the topmost markobject. For example, the POP_MARK opcode changes the stack from [..., markobject, stackslice] to [...] No matter how many object are on the stack after the topmost markobject, POP_MARK gets rid of all of them (including the topmost markobject too). """) ############################################################################## # Descriptors for pickle opcodes. class OpcodeInfo: __slots__ = ( # symbolic name of opcode; a string 'name', # the code used in a bytestream to represent the opcode; a # one-character string 'code', # If the opcode has an argument embedded in the byte string, an # instance of ArgumentDescriptor specifying its type. Note that # arg.reader(s) can be used to read and decode the argument from # the bytestream s, and arg.doc documents the format of the raw # argument bytes. If the opcode doesn't have an argument embedded # in the bytestream, arg should be None. 'arg', # what the stack looks like before this opcode runs; a list 'stack_before', # what the stack looks like after this opcode runs; a list 'stack_after', # the protocol number in which this opcode was introduced; an int 'proto', # human-readable docs for this opcode; a string 'doc', ) def __init__(self, name, code, arg, stack_before, stack_after, proto, doc): assert isinstance(name, str) self.name = name assert isinstance(code, str) assert len(code) == 1 self.code = code assert arg is None or isinstance(arg, ArgumentDescriptor) self.arg = arg assert isinstance(stack_before, list) for x in stack_before: assert isinstance(x, StackObject) self.stack_before = stack_before assert isinstance(stack_after, list) for x in stack_after: assert isinstance(x, StackObject) self.stack_after = stack_after assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL self.proto = proto assert isinstance(doc, str) self.doc = doc I = OpcodeInfo opcodes = [ # Ways to spell integers. I(name='INT', code='I', arg=decimalnl_short, stack_before=[], stack_after=[pyinteger_or_bool], proto=0, doc="""Push an integer or bool. The argument is a newline-terminated decimal literal string. The intent may have been that this always fit in a short Python int, but INT can be generated in pickles written on a 64-bit box that require a Python long on a 32-bit box. The difference between this and LONG then is that INT skips a trailing 'L', and produces a short int whenever possible. Another difference is due to that, when bool was introduced as a distinct type in 2.3, builtin names True and False were also added to 2.2.2, mapping to ints 1 and 0. For compatibility in both directions, True gets pickled as INT + "I01\\n", and False as INT + "I00\\n". Leading zeroes are never produced for a genuine integer. The 2.3 (and later) unpicklers special-case these and return bool instead; earlier unpicklers ignore the leading "0" and return the int. """), I(name='BININT', code='J', arg=int4, stack_before=[], stack_after=[pyint], proto=1, doc="""Push a four-byte signed integer. This handles the full range of Python (short) integers on a 32-bit box, directly as binary bytes (1 for the opcode and 4 for the integer). If the integer is non-negative and fits in 1 or 2 bytes, pickling via BININT1 or BININT2 saves space. """), I(name='BININT1', code='K', arg=uint1, stack_before=[], stack_after=[pyint], proto=1, doc="""Push a one-byte unsigned integer. This is a space optimization for pickling very small non-negative ints, in range(256). """), I(name='BININT2', code='M', arg=uint2, stack_before=[], stack_after=[pyint], proto=1, doc="""Push a two-byte unsigned integer. This is a space optimization for pickling small positive ints, in range(256, 2**16). Integers in range(256) can also be pickled via BININT2, but BININT1 instead saves a byte. """), I(name='LONG', code='L', arg=decimalnl_long, stack_before=[], stack_after=[pylong], proto=0, doc="""Push a long integer. The same as INT, except that the literal ends with 'L', and always unpickles to a Python long. There doesn't seem a real purpose to the trailing 'L'. Note that LONG takes time quadratic in the number of digits when unpickling (this is simply due to the nature of decimal->binary conversion). Proto 2 added linear-time (in C; still quadratic-time in Python) LONG1 and LONG4 opcodes. """), I(name="LONG1", code='\x8a', arg=long1, stack_before=[], stack_after=[pylong], proto=2, doc="""Long integer using one-byte length. A more efficient encoding of a Python long; the long1 encoding says it all."""), I(name="LONG4", code='\x8b', arg=long4, stack_before=[], stack_after=[pylong], proto=2, doc="""Long integer using found-byte length. A more efficient encoding of a Python long; the long4 encoding says it all."""), # Ways to spell strings (8-bit, not Unicode). I(name='STRING', code='S', arg=stringnl, stack_before=[], stack_after=[pystring], proto=0, doc="""Push a Python string object. The argument is a repr-style string, with bracketing quote characters, and perhaps embedded escapes. The argument extends until the next newline character. (Actually, they are decoded into a str instance using the encoding given to the Unpickler constructor. or the default, 'ASCII'.) """), I(name='BINSTRING', code='T', arg=string4, stack_before=[], stack_after=[pystring], proto=1, doc="""Push a Python string object. There are two arguments: the first is a 4-byte little-endian signed int giving the number of bytes in the string, and the second is that many bytes, which are taken literally as the string content. (Actually, they are decoded into a str instance using the encoding given to the Unpickler constructor. or the default, 'ASCII'.) """), I(name='SHORT_BINSTRING', code='U', arg=string1, stack_before=[], stack_after=[pystring], proto=1, doc="""Push a Python string object. There are two arguments: the first is a 1-byte unsigned int giving the number of bytes in the string, and the second is that many bytes, which are taken literally as the string content. (Actually, they are decoded into a str instance using the encoding given to the Unpickler constructor. or the default, 'ASCII'.) """), # Bytes (protocol 3 only; older protocols don't support bytes at all) I(name='BINBYTES', code='B', arg=bytes4, stack_before=[], stack_after=[pybytes], proto=3, doc="""Push a Python bytes object. There are two arguments: the first is a 4-byte little-endian unsigned int giving the number of bytes, and the second is that many bytes, which are taken literally as the bytes content. """), I(name='SHORT_BINBYTES', code='C', arg=bytes1, stack_before=[], stack_after=[pybytes], proto=3, doc="""Push a Python bytes object. There are two arguments: the first is a 1-byte unsigned int giving the number of bytes, and the second is that many bytes, which are taken literally as the string content. """), # Ways to spell None. I(name='NONE', code='N', arg=None, stack_before=[], stack_after=[pynone], proto=0, doc="Push None on the stack."), # Ways to spell bools, starting with proto 2. See INT for how this was # done before proto 2. I(name='NEWTRUE', code='\x88', arg=None, stack_before=[], stack_after=[pybool], proto=2, doc="""True. Push True onto the stack."""), I(name='NEWFALSE', code='\x89', arg=None, stack_before=[], stack_after=[pybool], proto=2, doc="""True. Push False onto the stack."""), # Ways to spell Unicode strings. I(name='UNICODE', code='V', arg=unicodestringnl, stack_before=[], stack_after=[pyunicode], proto=0, # this may be pure-text, but it's a later addition doc="""Push a Python Unicode string object. The argument is a raw-unicode-escape encoding of a Unicode string, and so may contain embedded escape sequences. The argument extends until the next newline character. """), I(name='BINUNICODE', code='X', arg=unicodestring4, stack_before=[], stack_after=[pyunicode], proto=1, doc="""Push a Python Unicode string object. There are two arguments: the first is a 4-byte little-endian unsigned int giving the number of bytes in the string. The second is that many bytes, and is the UTF-8 encoding of the Unicode string. """), # Ways to spell floats. I(name='FLOAT', code='F', arg=floatnl, stack_before=[], stack_after=[pyfloat], proto=0, doc="""Newline-terminated decimal float literal. The argument is repr(a_float), and in general requires 17 significant digits for roundtrip conversion to be an identity (this is so for IEEE-754 double precision values, which is what Python float maps to on most boxes). In general, FLOAT cannot be used to transport infinities, NaNs, or minus zero across boxes (or even on a single box, if the platform C library can't read the strings it produces for such things -- Windows is like that), but may do less damage than BINFLOAT on boxes with greater precision or dynamic range than IEEE-754 double. """), I(name='BINFLOAT', code='G', arg=float8, stack_before=[], stack_after=[pyfloat], proto=1, doc="""Float stored in binary form, with 8 bytes of data. This generally requires less than half the space of FLOAT encoding. In general, BINFLOAT cannot be used to transport infinities, NaNs, or minus zero, raises an exception if the exponent exceeds the range of an IEEE-754 double, and retains no more than 53 bits of precision (if there are more than that, "add a half and chop" rounding is used to cut it back to 53 significant bits). """), # Ways to build lists. I(name='EMPTY_LIST', code=']', arg=None, stack_before=[], stack_after=[pylist], proto=1, doc="Push an empty list."), I(name='APPEND', code='a', arg=None, stack_before=[pylist, anyobject], stack_after=[pylist], proto=0, doc="""Append an object to a list. Stack before: ... pylist anyobject Stack after: ... pylist+[anyobject] although pylist is really extended in-place. """), I(name='APPENDS', code='e', arg=None, stack_before=[pylist, markobject, stackslice], stack_after=[pylist], proto=1, doc="""Extend a list by a slice of stack objects. Stack before: ... pylist markobject stackslice Stack after: ... pylist+stackslice although pylist is really extended in-place. """), I(name='LIST', code='l', arg=None, stack_before=[markobject, stackslice], stack_after=[pylist], proto=0, doc="""Build a list out of the topmost stack slice, after markobject. All the stack entries following the topmost markobject are placed into a single Python list, which single list object replaces all of the stack from the topmost markobject onward. For example, Stack before: ... markobject 1 2 3 'abc' Stack after: ... [1, 2, 3, 'abc'] """), # Ways to build tuples. I(name='EMPTY_TUPLE', code=')', arg=None, stack_before=[], stack_after=[pytuple], proto=1, doc="Push an empty tuple."), I(name='TUPLE', code='t', arg=None, stack_before=[markobject, stackslice], stack_after=[pytuple], proto=0, doc="""Build a tuple out of the topmost stack slice, after markobject. All the stack entries following the topmost markobject are placed into a single Python tuple, which single tuple object replaces all of the stack from the topmost markobject onward. For example, Stack before: ... markobject 1 2 3 'abc' Stack after: ... (1, 2, 3, 'abc') """), I(name='TUPLE1', code='\x85', arg=None, stack_before=[anyobject], stack_after=[pytuple], proto=2, doc="""Build a one-tuple out of the topmost item on the stack. This code pops one value off the stack and pushes a tuple of length 1 whose one item is that value back onto it. In other words: stack[-1] = tuple(stack[-1:]) """), I(name='TUPLE2', code='\x86', arg=None, stack_before=[anyobject, anyobject], stack_after=[pytuple], proto=2, doc="""Build a two-tuple out of the top two items on the stack. This code pops two values off the stack and pushes a tuple of length 2 whose items are those values back onto it. In other words: stack[-2:] = [tuple(stack[-2:])] """), I(name='TUPLE3', code='\x87', arg=None, stack_before=[anyobject, anyobject, anyobject], stack_after=[pytuple], proto=2, doc="""Build a three-tuple out of the top three items on the stack. This code pops three values off the stack and pushes a tuple of length 3 whose items are those values back onto it. In other words: stack[-3:] = [tuple(stack[-3:])] """), # Ways to build dicts. I(name='EMPTY_DICT', code='}', arg=None, stack_before=[], stack_after=[pydict], proto=1, doc="Push an empty dict."), I(name='DICT', code='d', arg=None, stack_before=[markobject, stackslice], stack_after=[pydict], proto=0, doc="""Build a dict out of the topmost stack slice, after markobject. All the stack entries following the topmost markobject are placed into a single Python dict, which single dict object replaces all of the stack from the topmost markobject onward. The stack slice alternates key, value, key, value, .... For example, Stack before: ... markobject 1 2 3 'abc' Stack after: ... {1: 2, 3: 'abc'} """), I(name='SETITEM', code='s', arg=None, stack_before=[pydict, anyobject, anyobject], stack_after=[pydict], proto=0, doc="""Add a key+value pair to an existing dict. Stack before: ... pydict key value Stack after: ... pydict where pydict has been modified via pydict[key] = value. """), I(name='SETITEMS', code='u', arg=None, stack_before=[pydict, markobject, stackslice], stack_after=[pydict], proto=1, doc="""Add an arbitrary number of key+value pairs to an existing dict. The slice of the stack following the topmost markobject is taken as an alternating sequence of keys and values, added to the dict immediately under the topmost markobject. Everything at and after the topmost markobject is popped, leaving the mutated dict at the top of the stack. Stack before: ... pydict markobject key_1 value_1 ... key_n value_n Stack after: ... pydict where pydict has been modified via pydict[key_i] = value_i for i in 1, 2, ..., n, and in that order. """), # Stack manipulation. I(name='POP', code='0', arg=None, stack_before=[anyobject], stack_after=[], proto=0, doc="Discard the top stack item, shrinking the stack by one item."), I(name='DUP', code='2', arg=None, stack_before=[anyobject], stack_after=[anyobject, anyobject], proto=0, doc="Push the top stack item onto the stack again, duplicating it."), I(name='MARK', code='(', arg=None, stack_before=[], stack_after=[markobject], proto=0, doc="""Push markobject onto the stack. markobject is a unique object, used by other opcodes to identify a region of the stack containing a variable number of objects for them to work on. See markobject.doc for more detail. """), I(name='POP_MARK', code='1', arg=None, stack_before=[markobject, stackslice], stack_after=[], proto=1, doc="""Pop all the stack objects at and above the topmost markobject. When an opcode using a variable number of stack objects is done, POP_MARK is used to remove those objects, and to remove the markobject that delimited their starting position on the stack. """), # Memo manipulation. There are really only two operations (get and put), # each in all-text, "short binary", and "long binary" flavors. I(name='GET', code='g', arg=decimalnl_short, stack_before=[], stack_after=[anyobject], proto=0, doc="""Read an object from the memo and push it on the stack. The index of the memo object to push is given by the newline-terminated decimal string following. BINGET and LONG_BINGET are space-optimized versions. """), I(name='BINGET', code='h', arg=uint1, stack_before=[], stack_after=[anyobject], proto=1, doc="""Read an object from the memo and push it on the stack. The index of the memo object to push is given by the 1-byte unsigned integer following. """), I(name='LONG_BINGET', code='j', arg=uint4, stack_before=[], stack_after=[anyobject], proto=1, doc="""Read an object from the memo and push it on the stack. The index of the memo object to push is given by the 4-byte unsigned little-endian integer following. """), I(name='PUT', code='p', arg=decimalnl_short, stack_before=[], stack_after=[], proto=0, doc="""Store the stack top into the memo. The stack is not popped. The index of the memo location to write into is given by the newline- terminated decimal string following. BINPUT and LONG_BINPUT are space-optimized versions. """), I(name='BINPUT', code='q', arg=uint1, stack_before=[], stack_after=[], proto=1, doc="""Store the stack top into the memo. The stack is not popped. The index of the memo location to write into is given by the 1-byte unsigned integer following. """), I(name='LONG_BINPUT', code='r', arg=uint4, stack_before=[], stack_after=[], proto=1, doc="""Store the stack top into the memo. The stack is not popped. The index of the memo location to write into is given by the 4-byte unsigned little-endian integer following. """), # Access the extension registry (predefined objects). Akin to the GET # family. I(name='EXT1', code='\x82', arg=uint1, stack_before=[], stack_after=[anyobject], proto=2, doc="""Extension code. This code and the similar EXT2 and EXT4 allow using a registry of popular objects that are pickled by name, typically classes. It is envisioned that through a global negotiation and registration process, third parties can set up a mapping between ints and object names. In order to guarantee pickle interchangeability, the extension code registry ought to be global, although a range of codes may be reserved for private use. EXT1 has a 1-byte integer argument. This is used to index into the extension registry, and the object at that index is pushed on the stack. """), I(name='EXT2', code='\x83', arg=uint2, stack_before=[], stack_after=[anyobject], proto=2, doc="""Extension code. See EXT1. EXT2 has a two-byte integer argument. """), I(name='EXT4', code='\x84', arg=int4, stack_before=[], stack_after=[anyobject], proto=2, doc="""Extension code. See EXT1. EXT4 has a four-byte integer argument. """), # Push a class object, or module function, on the stack, via its module # and name. I(name='GLOBAL', code='c', arg=stringnl_noescape_pair, stack_before=[], stack_after=[anyobject], proto=0, doc="""Push a global object (module.attr) on the stack. Two newline-terminated strings follow the GLOBAL opcode. The first is taken as a module name, and the second as a class name. The class object module.class is pushed on the stack. More accurately, the object returned by self.find_class(module, class) is pushed on the stack, so unpickling subclasses can override this form of lookup. """), # Ways to build objects of classes pickle doesn't know about directly # (user-defined classes). I despair of documenting this accurately # and comprehensibly -- you really have to read the pickle code to # find all the special cases. I(name='REDUCE', code='R', arg=None, stack_before=[anyobject, anyobject], stack_after=[anyobject], proto=0, doc="""Push an object built from a callable and an argument tuple. The opcode is named to remind of the __reduce__() method. Stack before: ... callable pytuple Stack after: ... callable(*pytuple) The callable and the argument tuple are the first two items returned by a __reduce__ method. Applying the callable to the argtuple is supposed to reproduce the original object, or at least get it started. If the __reduce__ method returns a 3-tuple, the last component is an argument to be passed to the object's __setstate__, and then the REDUCE opcode is followed by code to create setstate's argument, and then a BUILD opcode to apply __setstate__ to that argument. If not isinstance(callable, type), REDUCE complains unless the callable has been registered with the copyreg module's safe_constructors dict, or the callable has a magic '__safe_for_unpickling__' attribute with a true value. I'm not sure why it does this, but I've sure seen this complaint often enough when I didn't want to . """), I(name='BUILD', code='b', arg=None, stack_before=[anyobject, anyobject], stack_after=[anyobject], proto=0, doc="""Finish building an object, via __setstate__ or dict update. Stack before: ... anyobject argument Stack after: ... anyobject where anyobject may have been mutated, as follows: If the object has a __setstate__ method, anyobject.__setstate__(argument) is called. Else the argument must be a dict, the object must have a __dict__, and the object is updated via anyobject.__dict__.update(argument) """), I(name='INST', code='i', arg=stringnl_noescape_pair, stack_before=[markobject, stackslice], stack_after=[anyobject], proto=0, doc="""Build a class instance. This is the protocol 0 version of protocol 1's OBJ opcode. INST is followed by two newline-terminated strings, giving a module and class name, just as for the GLOBAL opcode (and see GLOBAL for more details about that). self.find_class(module, name) is used to get a class object. In addition, all the objects on the stack following the topmost markobject are gathered into a tuple and popped (along with the topmost markobject), just as for the TUPLE opcode. Now it gets complicated. If all of these are true: + The argtuple is empty (markobject was at the top of the stack at the start). + The class object does not have a __getinitargs__ attribute. then we want to create an old-style class instance without invoking its __init__() method (pickle has waffled on this over the years; not calling __init__() is current wisdom). In this case, an instance of an old-style dummy class is created, and then we try to rebind its __class__ attribute to the desired class object. If this succeeds, the new instance object is pushed on the stack, and we're done. Else (the argtuple is not empty, it's not an old-style class object, or the class object does have a __getinitargs__ attribute), the code first insists that the class object have a __safe_for_unpickling__ attribute. Unlike as for the __safe_for_unpickling__ check in REDUCE, it doesn't matter whether this attribute has a true or false value, it only matters whether it exists (XXX this is a bug). If __safe_for_unpickling__ doesn't exist, UnpicklingError is raised. Else (the class object does have a __safe_for_unpickling__ attr), the class object obtained from INST's arguments is applied to the argtuple obtained from the stack, and the resulting instance object is pushed on the stack. NOTE: checks for __safe_for_unpickling__ went away in Python 2.3. NOTE: the distinction between old-style and new-style classes does not make sense in Python 3. """), I(name='OBJ', code='o', arg=None, stack_before=[markobject, anyobject, stackslice], stack_after=[anyobject], proto=1, doc="""Build a class instance. This is the protocol 1 version of protocol 0's INST opcode, and is very much like it. The major difference is that the class object is taken off the stack, allowing it to be retrieved from the memo repeatedly if several instances of the same class are created. This can be much more efficient (in both time and space) than repeatedly embedding the module and class names in INST opcodes. Unlike INST, OBJ takes no arguments from the opcode stream. Instead the class object is taken off the stack, immediately above the topmost markobject: Stack before: ... markobject classobject stackslice Stack after: ... new_instance_object As for INST, the remainder of the stack above the markobject is gathered into an argument tuple, and then the logic seems identical, except that no __safe_for_unpickling__ check is done (XXX this is a bug). See INST for the gory details. NOTE: In Python 2.3, INST and OBJ are identical except for how they get the class object. That was always the intent; the implementations had diverged for accidental reasons. """), I(name='NEWOBJ', code='\x81', arg=None, stack_before=[anyobject, anyobject], stack_after=[anyobject], proto=2, doc="""Build an object instance. The stack before should be thought of as containing a class object followed by an argument tuple (the tuple being the stack top). Call these cls and args. They are popped off the stack, and the value returned by cls.__new__(cls, *args) is pushed back onto the stack. """), # Machine control. I(name='PROTO', code='\x80', arg=uint1, stack_before=[], stack_after=[], proto=2, doc="""Protocol version indicator. For protocol 2 and above, a pickle must start with this opcode. The argument is the protocol version, an int in range(2, 256). """), I(name='STOP', code='.', arg=None, stack_before=[anyobject], stack_after=[], proto=0, doc="""Stop the unpickling machine. Every pickle ends with this opcode. The object at the top of the stack is popped, and that's the result of unpickling. The stack should be empty then. """), # Ways to deal with persistent IDs. I(name='PERSID', code='P', arg=stringnl_noescape, stack_before=[], stack_after=[anyobject], proto=0, doc="""Push an object identified by a persistent ID. The pickle module doesn't define what a persistent ID means. PERSID's argument is a newline-terminated str-style (no embedded escapes, no bracketing quote characters) string, which *is* "the persistent ID". The unpickler passes this string to self.persistent_load(). Whatever object that returns is pushed on the stack. There is no implementation of persistent_load() in Python's unpickler: it must be supplied by an unpickler subclass. """), I(name='BINPERSID', code='Q', arg=None, stack_before=[anyobject], stack_after=[anyobject], proto=1, doc="""Push an object identified by a persistent ID. Like PERSID, except the persistent ID is popped off the stack (instead of being a string embedded in the opcode bytestream). The persistent ID is passed to self.persistent_load(), and whatever object that returns is pushed on the stack. See PERSID for more detail. """), ] del I # Verify uniqueness of .name and .code members. name2i = {} code2i = {} for i, d in enumerate(opcodes): if d.name in name2i: raise ValueError("repeated name %r at indices %d and %d" % (d.name, name2i[d.name], i)) if d.code in code2i: raise ValueError("repeated code %r at indices %d and %d" % (d.code, code2i[d.code], i)) name2i[d.name] = i code2i[d.code] = i del name2i, code2i, i, d ############################################################################## # Build a code2op dict, mapping opcode characters to OpcodeInfo records. # Also ensure we've got the same stuff as pickle.py, although the # introspection here is dicey. code2op = {} for d in opcodes: code2op[d.code] = d del d def assure_pickle_consistency(verbose=False): copy = code2op.copy() for name in pickle.__all__: if not re.match("[A-Z][A-Z0-9_]+$", name): if verbose: print( "skipping %r: it doesn't look like an opcode name" % name) continue picklecode = getattr(pickle, name) if not isinstance(picklecode, bytes) or len(picklecode) != 1: if verbose: print("skipping %r: value %r doesn't look like a pickle " "code" % (name, picklecode)) continue picklecode = picklecode.decode("latin-1") if picklecode in copy: if verbose: print("checking name {!r} w/ code {!r} for consistency".format( name, picklecode)) d = copy[picklecode] if d.name != name: raise ValueError("for pickle code %r, pickle.py uses name %r " "but we're using name %r" % (picklecode, name, d.name)) # Forget this one. Any left over in copy at the end are a problem # of a different kind. del copy[picklecode] else: raise ValueError("pickle.py appears to have a pickle opcode with " "name %r and code %r, but we don't" % (name, picklecode)) if copy: msg = ["we appear to have pickle opcodes that pickle.py doesn't have:"] for code, d in copy.items(): msg.append(f" name {d.name!r} with code {code!r}") raise ValueError("\n".join(msg)) assure_pickle_consistency() del assure_pickle_consistency ############################################################################## # A pickle opcode generator. def genops(pickle): """Generate all the opcodes in a pickle. 'pickle' is a file-like object, or string, containing the pickle. Each opcode in the pickle is generated, from the current pickle position, stopping after a STOP opcode is delivered. A triple is generated for each opcode: opcode, arg, pos opcode is an OpcodeInfo record, describing the current opcode. If the opcode has an argument embedded in the pickle, arg is its decoded value, as a Python object. If the opcode doesn't have an argument, arg is None. If the pickle has a tell() method, pos was the value of pickle.tell() before reading the current opcode. If the pickle is a bytes object, it's wrapped in a BytesIO object, and the latter's tell() result is used. Else (the pickle doesn't have a tell(), and it's not obvious how to query its current position) pos is None. """ if isinstance(pickle, bytes_types): import io pickle = io.BytesIO(pickle) if hasattr(pickle, "tell"): getpos = pickle.tell else: def getpos(): return None while True: pos = getpos() code = pickle.read(1) opcode = code2op.get(code.decode("latin-1")) if opcode is None: if code == b"": raise ValueError("pickle exhausted before seeing STOP") else: raise ValueError("at position {}, opcode {!r} unknown".format( pos is None and "" or pos, code)) if opcode.arg is None: arg = None else: arg = opcode.arg.reader(pickle) yield opcode, arg, pos if code == b'.': assert opcode.name == 'STOP' break ############################################################################## # A pickle optimizer. def optimize(p): 'Optimize a pickle string by removing unused PUT opcodes' gets = set() # set of args used by a GET opcode puts = [] # (arg, startpos, stoppos) for the PUT opcodes prevpos = None # set to pos if previous opcode was a PUT prevarg = None for opcode, arg, pos in genops(p): if prevpos is not None: puts.append((prevarg, prevpos, pos)) prevpos = None if 'PUT' in opcode.name: prevarg, prevpos = arg, pos elif 'GET' in opcode.name: gets.add(arg) # Copy the pickle string except for PUTS without a corresponding GET s = [] i = 0 for arg, start, stop in puts: j = stop if (arg in gets) else start s.append(p[i:j]) i = stop s.append(p[i:]) return b''.join(s) ############################################################################## # A symbolic pickle disassembler. def dis(pickle, out=None, memo=None, indentlevel=4, annotate=0): """Produce a symbolic disassembly of a pickle. 'pickle' is a file-like object, or string, containing a (at least one) pickle. The pickle is disassembled from the current position, through the first STOP opcode encountered. Optional arg 'out' is a file-like object to which the disassembly is printed. It defaults to sys.stdout. Optional arg 'memo' is a Python dict, used as the pickle's memo. It may be mutated by dis(), if the pickle contains PUT or BINPUT opcodes. Passing the same memo object to another dis() call then allows disassembly to proceed across multiple pickles that were all created by the same pickler with the same memo. Ordinarily you don't need to worry about this. Optional arg 'indentlevel' is the number of blanks by which to indent a new MARK level. It defaults to 4. Optional arg 'annotate' if nonzero instructs dis() to add short description of the opcode on each line of disassembled output. The value given to 'annotate' must be an integer and is used as a hint for the column where annotation should start. The default value is 0, meaning no annotations. In addition to printing the disassembly, some sanity checks are made: + All embedded opcode arguments "make sense". + Explicit and implicit pop operations have enough items on the stack. + When an opcode implicitly refers to a markobject, a markobject is actually on the stack. + A memo entry isn't referenced before it's defined. + The markobject isn't stored in the memo. + A memo entry isn't redefined. """ # Most of the hair here is for sanity checks, but most of it is needed # anyway to detect when a protocol 0 POP takes a MARK off the stack # (which in turn is needed to indent MARK blocks correctly). stack = [] # crude emulation of unpickler stack if memo is None: memo = {} # crude emulation of unpicker memo maxproto = -1 # max protocol number seen markstack = [] # bytecode positions of MARK opcodes indentchunk = ' ' * indentlevel errormsg = None annocol = annotate # columnt hint for annotations for opcode, arg, pos in genops(pickle): if pos is not None: print("%5d:" % pos, end=' ', file=out) line = "%-4s %s%s" % (repr(opcode.code)[1:-1], indentchunk * len(markstack), opcode.name) maxproto = max(maxproto, opcode.proto) before = opcode.stack_before # don't mutate after = opcode.stack_after # don't mutate numtopop = len(before) # See whether a MARK should be popped. markmsg = None if markobject in before or (opcode.name == "POP" and stack and stack[-1] is markobject): assert markobject not in after if __debug__: if markobject in before: assert before[-1] is stackslice if markstack: markpos = markstack.pop() if markpos is None: markmsg = "(MARK at unknown opcode offset)" else: markmsg = "(MARK at %d)" % markpos # Pop everything at and after the topmost markobject. while stack[-1] is not markobject: stack.pop() stack.pop() # Stop later code from popping too much. try: numtopop = before.index(markobject) except ValueError: assert opcode.name == "POP" numtopop = 0 else: errormsg = markmsg = "no MARK exists on stack" # Check for correct memo usage. if opcode.name in ("PUT", "BINPUT", "LONG_BINPUT"): assert arg is not None if arg in memo: errormsg = "memo key %r already defined" % arg elif not stack: errormsg = "stack is empty -- can't store into memo" elif stack[-1] is markobject: errormsg = "can't store markobject in the memo" else: memo[arg] = stack[-1] elif opcode.name in ("GET", "BINGET", "LONG_BINGET"): if arg in memo: assert len(after) == 1 after = [memo[arg]] # for better stack emulation else: errormsg = "memo key %r has never been stored into" % arg if arg is not None or markmsg: # make a mild effort to align arguments line += ' ' * (10 - len(opcode.name)) if arg is not None: line += ' ' + repr(arg) if markmsg: line += ' ' + markmsg if annotate: line += ' ' * (annocol - len(line)) # make a mild effort to align annotations annocol = len(line) if annocol > 50: annocol = annotate line += ' ' + opcode.doc.split('\n', 1)[0] print(line, file=out) if errormsg: # Note that we delayed complaining until the offending opcode # was printed. raise ValueError(errormsg) # Emulate the stack effects. if len(stack) < numtopop: raise ValueError("tries to pop %d items from stack with " "only %d items" % (numtopop, len(stack))) if numtopop: del stack[-numtopop:] if markobject in after: assert markobject not in before markstack.append(pos) stack.extend(after) print("highest protocol among opcodes =", maxproto, file=out) if stack: raise ValueError("stack not empty after STOP: %r" % stack) # For use in the doctest, simply as an example of a class to pickle. class _Example: def __init__(self, value): self.value = value _dis_test = r""" >>> import pickle >>> x = [1, 2, (3, 4), {b'abc': "def"}] >>> pkl0 = pickle.dumps(x, 0) >>> dis(pkl0) 0: ( MARK 1: l LIST (MARK at 0) 2: p PUT 0 5: I INT 1 8: a APPEND 9: I INT 2 12: a APPEND 13: ( MARK 14: I INT 3 17: I INT 4 20: t TUPLE (MARK at 13) 21: p PUT 1 24: a APPEND 25: ( MARK 26: d DICT (MARK at 25) 27: p PUT 2 30: c GLOBAL '_codecs encode' 46: p PUT 3 49: ( MARK 50: V UNICODE 'abc' 55: p PUT 4 58: V UNICODE 'latin1' 66: p PUT 5 69: t TUPLE (MARK at 49) 70: p PUT 6 73: R REDUCE 74: p PUT 7 77: V UNICODE 'def' 82: p PUT 8 85: s SETITEM 86: a APPEND 87: . STOP highest protocol among opcodes = 0 Try again with a "binary" pickle. >>> pkl1 = pickle.dumps(x, 1) >>> dis(pkl1) 0: ] EMPTY_LIST 1: q BINPUT 0 3: ( MARK 4: K BININT1 1 6: K BININT1 2 8: ( MARK 9: K BININT1 3 11: K BININT1 4 13: t TUPLE (MARK at 8) 14: q BINPUT 1 16: } EMPTY_DICT 17: q BINPUT 2 19: c GLOBAL '_codecs encode' 35: q BINPUT 3 37: ( MARK 38: X BINUNICODE 'abc' 46: q BINPUT 4 48: X BINUNICODE 'latin1' 59: q BINPUT 5 61: t TUPLE (MARK at 37) 62: q BINPUT 6 64: R REDUCE 65: q BINPUT 7 67: X BINUNICODE 'def' 75: q BINPUT 8 77: s SETITEM 78: e APPENDS (MARK at 3) 79: . STOP highest protocol among opcodes = 1 Exercise the INST/OBJ/BUILD family. >>> import pickletools >>> dis(pickle.dumps(pickletools.dis, 0)) 0: c GLOBAL 'pickletools dis' 17: p PUT 0 20: . STOP highest protocol among opcodes = 0 >>> from pickletools import _Example >>> x = [_Example(42)] * 2 >>> dis(pickle.dumps(x, 0)) 0: ( MARK 1: l LIST (MARK at 0) 2: p PUT 0 5: c GLOBAL 'copy_reg _reconstructor' 30: p PUT 1 33: ( MARK 34: c GLOBAL 'pickletools _Example' 56: p PUT 2 59: c GLOBAL '__builtin__ object' 79: p PUT 3 82: N NONE 83: t TUPLE (MARK at 33) 84: p PUT 4 87: R REDUCE 88: p PUT 5 91: ( MARK 92: d DICT (MARK at 91) 93: p PUT 6 96: V UNICODE 'value' 103: p PUT 7 106: I INT 42 110: s SETITEM 111: b BUILD 112: a APPEND 113: g GET 5 116: a APPEND 117: . STOP highest protocol among opcodes = 0 >>> dis(pickle.dumps(x, 1)) 0: ] EMPTY_LIST 1: q BINPUT 0 3: ( MARK 4: c GLOBAL 'copy_reg _reconstructor' 29: q BINPUT 1 31: ( MARK 32: c GLOBAL 'pickletools _Example' 54: q BINPUT 2 56: c GLOBAL '__builtin__ object' 76: q BINPUT 3 78: N NONE 79: t TUPLE (MARK at 31) 80: q BINPUT 4 82: R REDUCE 83: q BINPUT 5 85: } EMPTY_DICT 86: q BINPUT 6 88: X BINUNICODE 'value' 98: q BINPUT 7 100: K BININT1 42 102: s SETITEM 103: b BUILD 104: h BINGET 5 106: e APPENDS (MARK at 3) 107: . STOP highest protocol among opcodes = 1 Try "the canonical" recursive-object test. >>> L = [] >>> T = L, >>> L.append(T) >>> L[0] is T True >>> T[0] is L True >>> L[0][0] is L True >>> T[0][0] is T True >>> dis(pickle.dumps(L, 0)) 0: ( MARK 1: l LIST (MARK at 0) 2: p PUT 0 5: ( MARK 6: g GET 0 9: t TUPLE (MARK at 5) 10: p PUT 1 13: a APPEND 14: . STOP highest protocol among opcodes = 0 >>> dis(pickle.dumps(L, 1)) 0: ] EMPTY_LIST 1: q BINPUT 0 3: ( MARK 4: h BINGET 0 6: t TUPLE (MARK at 3) 7: q BINPUT 1 9: a APPEND 10: . STOP highest protocol among opcodes = 1 Note that, in the protocol 0 pickle of the recursive tuple, the disassembler has to emulate the stack in order to realize that the POP opcode at 16 gets rid of the MARK at 0. >>> dis(pickle.dumps(T, 0)) 0: ( MARK 1: ( MARK 2: l LIST (MARK at 1) 3: p PUT 0 6: ( MARK 7: g GET 0 10: t TUPLE (MARK at 6) 11: p PUT 1 14: a APPEND 15: 0 POP 16: 0 POP (MARK at 0) 17: g GET 1 20: . STOP highest protocol among opcodes = 0 >>> dis(pickle.dumps(T, 1)) 0: ( MARK 1: ] EMPTY_LIST 2: q BINPUT 0 4: ( MARK 5: h BINGET 0 7: t TUPLE (MARK at 4) 8: q BINPUT 1 10: a APPEND 11: 1 POP_MARK (MARK at 0) 12: h BINGET 1 14: . STOP highest protocol among opcodes = 1 Try protocol 2. >>> dis(pickle.dumps(L, 2)) 0: \x80 PROTO 2 2: ] EMPTY_LIST 3: q BINPUT 0 5: h BINGET 0 7: \x85 TUPLE1 8: q BINPUT 1 10: a APPEND 11: . STOP highest protocol among opcodes = 2 >>> dis(pickle.dumps(T, 2)) 0: \x80 PROTO 2 2: ] EMPTY_LIST 3: q BINPUT 0 5: h BINGET 0 7: \x85 TUPLE1 8: q BINPUT 1 10: a APPEND 11: 0 POP 12: h BINGET 1 14: . STOP highest protocol among opcodes = 2 Try protocol 3 with annotations: >>> dis(pickle.dumps(T, 3), annotate=1) 0: \x80 PROTO 3 Protocol version indicator. 2: ] EMPTY_LIST Push an empty list. 3: q BINPUT 0 Store the stack top into the memo. The stack is not popped. 5: h BINGET 0 Read an object from the memo and push it on the stack. 7: \x85 TUPLE1 Build a one-tuple out of the topmost item on the stack. 8: q BINPUT 1 Store the stack top into the memo. The stack is not popped. 10: a APPEND Append an object to a list. 11: 0 POP Discard the top stack item, shrinking the stack by one item. 12: h BINGET 1 Read an object from the memo and push it on the stack. 14: . STOP Stop the unpickling machine. highest protocol among opcodes = 2 """ # noqa: E501 line too long _memo_test = r""" >>> import pickle >>> import io >>> f = io.BytesIO() >>> p = pickle.Pickler(f, 2) >>> x = [1, 2, 3] >>> p.dump(x) >>> p.dump(x) >>> f.seek(0) 0 >>> memo = {} >>> dis(f, memo=memo) 0: \x80 PROTO 2 2: ] EMPTY_LIST 3: q BINPUT 0 5: ( MARK 6: K BININT1 1 8: K BININT1 2 10: K BININT1 3 12: e APPENDS (MARK at 5) 13: . STOP highest protocol among opcodes = 2 >>> dis(f, memo=memo) 14: \x80 PROTO 2 16: h BINGET 0 18: . STOP highest protocol among opcodes = 2 """ __test__ = {'disassembler_test': _dis_test, 'disassembler_memo_test': _memo_test, } def _test(): import doctest return doctest.testmod() if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description='disassemble one or more pickle files') parser.add_argument( 'pickle_file', type=argparse.FileType('br'), nargs='*', help='the pickle file') parser.add_argument( '-o', '--output', default=sys.stdout, type=argparse.FileType('w'), help='the file where the output should be written') parser.add_argument( '-m', '--memo', action='store_true', help='preserve memo between disassemblies') parser.add_argument( '-l', '--indentlevel', default=4, type=int, help='the number of blanks by which to indent a new MARK level') parser.add_argument( '-a', '--annotate', action='store_true', help='annotate each line with a short opcode description') parser.add_argument( '-p', '--preamble', default="==> {name} <==", help='if more than one pickle file is specified, print this before' ' each disassembly') parser.add_argument( '-t', '--test', action='store_true', help='run self-test suite') parser.add_argument( '-v', action='store_true', help='run verbosely; only affects self-test run') args = parser.parse_args() if args.test: _test() else: annotate = 30 if args.annotate else 0 if not args.pickle_file: parser.print_help() elif len(args.pickle_file) == 1: dis(args.pickle_file[0], args.output, None, args.indentlevel, annotate) else: memo = {} if args.memo else None for f in args.pickle_file: preamble = args.preamble.format(name=f.name) args.output.write(preamble + '\n') dis(f, args.output, memo, args.indentlevel, annotate) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/slowpickle.py0000644000076600000240000000204514753071602020661 0ustar00m.howitzstaff''' The zodbpickle.pickle module exposes the standard behavior of the pickle module. This is backward compatible, but has the effect that by default, on Python3 you get the fast implementation, while on Python2 you get the slow implementation. This module is a version that always exposes the slow implementation of pickling and avoids the need to explicitly touch internals. Note: We are intentionally using "import *" in this context. The imported modules define an __all__ variable, which contains all the names that it wants to export. So this is a rare case where 'import *' is exactly the right thing to do. ''' import sys import zodbpickle.pickle_3 as p # undo the replacement with fast versions p.Pickler, p.Unpickler = p._Pickler, p._Unpickler p.dump, p.dumps, p.load, p.loads = p._dump, p._dumps, p._load, p._loads del p # isort: off # pick up all names that the module defines from .pickle_3 import * # noqa: E402 module level import not at top of file # do not share the globals with a fast version del sys.modules['zodbpickle.pickle_3'] ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1739355010.8954701 zodbpickle-4.2/src/zodbpickle/tests/0000755000076600000240000000000014753071603017275 5ustar00m.howitzstaff././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/tests/__init__.py0000644000076600000240000000032414753071602021404 0ustar00m.howitzstaffimport os import platform py_impl = getattr(platform, 'python_implementation', lambda: None) _is_pypy = py_impl() == 'PyPy' _is_jython = py_impl() == 'Jython' _is_pure = int(os.environ.get('PURE_PYTHON', '0')) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/tests/pickle_3_tests.py0000644000076600000240000001235514753071602022567 0ustar00m.howitzstaffimport collections import doctest import io import unittest from zodbpickle import pickle_3 as pickle from zodbpickle import pickletools_3 as pickletools from . import _is_pure from . import _is_pypy from .pickletester_3 import AbstractBytesFallbackTests from .pickletester_3 import AbstractBytestrTests from .pickletester_3 import AbstractDispatchTableTests from .pickletester_3 import AbstractPersistentPicklerTests from .pickletester_3 import AbstractPickleModuleTests from .pickletester_3 import AbstractPicklerUnpicklerObjectTests from .pickletester_3 import AbstractPickleTests from .pickletester_3 import BigmemPickleTests if not _is_pypy and not _is_pure: try: from zodbpickle import _pickle except ModuleNotFoundError: has_c_implementation = False else: has_c_implementation = True else: has_c_implementation = False class PickleTests(AbstractPickleModuleTests): pass class PyPicklerBase: pickler = pickle._Pickler unpickler = pickle._Unpickler def dumps(self, arg, proto=None, **kwds): f = io.BytesIO() p = self.pickler(f, proto, **kwds) p.dump(arg) f.seek(0) return bytes(f.read()) def loads(self, buf, **kwds): f = io.BytesIO(buf) u = self.unpickler(f, **kwds) return u.load() class PyPicklerTests(PyPicklerBase, AbstractPickleTests): pass class PyPicklerBytestrTests(PyPicklerBase, AbstractBytestrTests): pass class PyPicklerBytesFallbackTests(PyPicklerBase, AbstractBytesFallbackTests): pass class InMemoryPickleTests(AbstractPickleTests, BigmemPickleTests): pickler = pickle._Pickler unpickler = pickle._Unpickler def dumps(self, arg, protocol=None): return pickle.dumps(arg, protocol) def loads(self, buf, **kwds): return pickle.loads(buf, **kwds) class PyPersPicklerTests(AbstractPersistentPicklerTests): pickler = pickle._Pickler unpickler = pickle._Unpickler def dumps(self, arg, proto=None): class PersPickler(self.pickler): def persistent_id(subself, obj): return self.persistent_id(obj) f = io.BytesIO() p = PersPickler(f, proto) p.dump(arg) f.seek(0) return f.read() def loads(self, buf, **kwds): class PersUnpickler(self.unpickler): def persistent_load(subself, obj): return self.persistent_load(obj) f = io.BytesIO(buf) u = PersUnpickler(f, **kwds) return u.load() class PyPicklerUnpicklerObjectTests(AbstractPicklerUnpicklerObjectTests): pickler_class = pickle._Pickler unpickler_class = pickle._Unpickler class PyDispatchTableTests(AbstractDispatchTableTests): pickler_class = pickle._Pickler def get_dispatch_table(self): return pickle.dispatch_table.copy() class PyChainDispatchTableTests(AbstractDispatchTableTests): pickler_class = pickle._Pickler def get_dispatch_table(self): return collections.ChainMap({}, pickle.dispatch_table) if has_c_implementation: class CPicklerTests(PyPicklerTests): pickler = _pickle.Pickler unpickler = _pickle.Unpickler class CPicklerBytestrTests(PyPicklerBytestrTests): pickler = _pickle.Pickler unpickler = _pickle.Unpickler class CPicklerBytesFallbackTests(PyPicklerBytesFallbackTests): pickler = _pickle.Pickler unpickler = _pickle.Unpickler class CPersPicklerTests(PyPersPicklerTests): pickler = _pickle.Pickler unpickler = _pickle.Unpickler class CDumpPickle_LoadPickle(PyPicklerTests): pickler = _pickle.Pickler unpickler = pickle._Unpickler class DumpPickle_CLoadPickle(PyPicklerTests): pickler = pickle._Pickler unpickler = _pickle.Unpickler class CPicklerUnpicklerObjectTests(AbstractPicklerUnpicklerObjectTests): pickler_class = _pickle.Pickler unpickler_class = _pickle.Unpickler class CDispatchTableTests(AbstractDispatchTableTests): pickler_class = pickle.Pickler def get_dispatch_table(self): return pickle.dispatch_table.copy() class CChainDispatchTableTests(AbstractDispatchTableTests): pickler_class = pickle.Pickler def get_dispatch_table(self): return collections.ChainMap({}, pickle.dispatch_table) def choose_tests(): tests = [ PickleTests, PyPicklerTests, PyPersPicklerTests, PyPicklerBytestrTests, PyPicklerBytesFallbackTests, PyDispatchTableTests, PyChainDispatchTableTests, ] if has_c_implementation: tests.extend([ CPicklerTests, CPersPicklerTests, CPicklerBytestrTests, CPicklerBytesFallbackTests, CDumpPickle_LoadPickle, DumpPickle_CLoadPickle, PyPicklerUnpicklerObjectTests, CPicklerUnpicklerObjectTests, InMemoryPickleTests, CDispatchTableTests, CChainDispatchTableTests, ]) return tests def test_suite(): return unittest.TestSuite([ unittest.defaultTestLoader.loadTestsFromTestCase(t) for t in choose_tests() ] + [ doctest.DocTestSuite(pickle), doctest.DocTestSuite(pickletools), ]) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/tests/pickletester_3.py0000644000076600000240000020647614753071602022605 0ustar00m.howitzstaffimport copyreg import io import os import sys import tempfile import unittest import weakref from http.cookies import SimpleCookie from test.support import _2G from test.support import _4G from test.support import TestFailed from test.support import bigmemtest from test.support import run_with_locale import __main__ from zodbpickle import pickle_3 as pickle from zodbpickle import pickletools_3 as pickletools from zodbpickle.pickle_3 import bytes_types from . import _is_pypy try: from test.support import no_tracing except ModuleNotFoundError: from functools import wraps def no_tracing(func): if not hasattr(sys, 'gettrace'): return func @wraps(func) def wrapper(*args, **kwargs): original_trace = sys.gettrace() try: sys.settrace(None) return func(*args, **kwargs) finally: sys.settrace(original_trace) return wrapper _PY311b1 = sys.hexversion >= 0x30b00b1 # 3.11.0b1 # Tests that try a number of pickle protocols should have a # for proto in protocols: # kind of outer loop. protocols = range(pickle.HIGHEST_PROTOCOL + 1) ascii_char_size = 1 fd, TESTFN = tempfile.mkstemp('.pickletester_3') os.close(fd) # Return True if opcode code appears in the pickle, else False. def opcode_in_pickle(code, pickle): for op, dummy, dummy in pickletools.genops(pickle): if op.code == code.decode("latin-1"): return True return False # Return the number of times opcode code appears in pickle. def count_opcode(code, pickle): n = 0 for op, dummy, dummy in pickletools.genops(pickle): if op.code == code.decode("latin-1"): n += 1 return n class UnseekableIO(io.BytesIO): def peek(self, *args): raise NotImplementedError def seekable(self): return False def seek(self, *args): raise io.UnsupportedOperation def tell(self): raise io.UnsupportedOperation # We can't very well test the extension registry without putting known stuff # in it, but we have to be careful to restore its original state. Code # should do this: # # e = ExtensionSaver(extension_code) # try: # fiddle w/ the extension registry's stuff for extension_code # finally: # e.restore() class ExtensionSaver: # Remember current registration for code (if any), and remove it (if # there is one). def __init__(self, code): self.code = code if code in copyreg._inverted_registry: self.pair = copyreg._inverted_registry[code] copyreg.remove_extension(self.pair[0], self.pair[1], code) else: self.pair = None # Restore previous registration for code. def restore(self): code = self.code curpair = copyreg._inverted_registry.get(code) if curpair is not None: copyreg.remove_extension(curpair[0], curpair[1], code) pair = self.pair if pair is not None: copyreg.add_extension(pair[0], pair[1], code) class C: def __eq__(self, other): return self.__dict__ == other.__dict__ class D(C): def __init__(self, arg): pass class E(C): def __getinitargs__(self): return () __main__.C = C C.__module__ = "__main__" __main__.D = D D.__module__ = "__main__" __main__.E = E E.__module__ = "__main__" class myint(int): def __init__(self, x): self.str = str(x) class initarg(C): def __init__(self, a, b): self.a = a self.b = b def __getinitargs__(self): return self.a, self.b class metaclass(type): pass class use_metaclass(metaclass=metaclass): pass class pickling_metaclass(type): def __eq__(self, other): return (isinstance(self, type(other)) and self.reduce_args == other.reduce_args) def __reduce__(self): return (create_dynamic_class, self.reduce_args) def create_dynamic_class(name, bases): result = pickling_metaclass(name, bases, dict()) result.reduce_args = (name, bases) return result # DATA0 .. DATA2 are the pickles we expect under the various protocols, for # the object returned by create_data(). DATA0 = ( b'(lp0\nL0L\naL1L\naF2.0\nac' b'builtins\ncomplex\n' b'p1\n(F3.0\nF0.0\ntp2\nRp' b'3\naL1L\naL-1L\naL255L\naL-' b'255L\naL-256L\naL65535L\na' b'L-65535L\naL-65536L\naL2' b'147483647L\naL-2147483' b'647L\naL-2147483648L\na(' b'Vabc\np4\ng4\nccopyreg' b'\n_reconstructor\np5\n(' b'c__main__\nC\np6\ncbu' b'iltins\nobject\np7\nNt' b'p8\nRp9\n(dp10\nVfoo\np1' b'1\nL1L\nsVbar\np12\nL2L\nsb' b'g9\ntp13\nag13\naL5L\na.' ) # Disassembly of DATA0 DATA0_DIS = """\ 0: ( MARK 1: l LIST (MARK at 0) 2: p PUT 0 5: L LONG 0 9: a APPEND 10: L LONG 1 14: a APPEND 15: F FLOAT 2.0 20: a APPEND 21: c GLOBAL 'builtins complex' 39: p PUT 1 42: ( MARK 43: F FLOAT 3.0 48: F FLOAT 0.0 53: t TUPLE (MARK at 42) 54: p PUT 2 57: R REDUCE 58: p PUT 3 61: a APPEND 62: L LONG 1 66: a APPEND 67: L LONG -1 72: a APPEND 73: L LONG 255 79: a APPEND 80: L LONG -255 87: a APPEND 88: L LONG -256 95: a APPEND 96: L LONG 65535 104: a APPEND 105: L LONG -65535 114: a APPEND 115: L LONG -65536 124: a APPEND 125: L LONG 2147483647 138: a APPEND 139: L LONG -2147483647 153: a APPEND 154: L LONG -2147483648 168: a APPEND 169: ( MARK 170: V UNICODE 'abc' 175: p PUT 4 178: g GET 4 181: c GLOBAL 'copyreg _reconstructor' 205: p PUT 5 208: ( MARK 209: c GLOBAL '__main__ C' 221: p PUT 6 224: c GLOBAL 'builtins object' 241: p PUT 7 244: N NONE 245: t TUPLE (MARK at 208) 246: p PUT 8 249: R REDUCE 250: p PUT 9 253: ( MARK 254: d DICT (MARK at 253) 255: p PUT 10 259: V UNICODE 'foo' 264: p PUT 11 268: L LONG 1 272: s SETITEM 273: V UNICODE 'bar' 278: p PUT 12 282: L LONG 2 286: s SETITEM 287: b BUILD 288: g GET 9 291: t TUPLE (MARK at 169) 292: p PUT 13 296: a APPEND 297: g GET 13 301: a APPEND 302: L LONG 5 306: a APPEND 307: . STOP highest protocol among opcodes = 0 """ DATA1 = ( b']q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c' b'builtins\ncomplex\nq\x01' b'(G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00t' b'q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xffJ' b'\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff\xff' b'\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00ab' b'cq\x04h\x04ccopyreg\n_reco' b'nstructor\nq\x05(c__main' b'__\nC\nq\x06cbuiltins\n' b'object\nq\x07Ntq\x08Rq\t}q\n(' b'X\x03\x00\x00\x00fooq\x0bK\x01X\x03\x00\x00\x00bar' b'q\x0cK\x02ubh\ttq\rh\rK\x05e.' ) # Disassembly of DATA1 DATA1_DIS = """\ 0: ] EMPTY_LIST 1: q BINPUT 0 3: ( MARK 4: K BININT1 0 6: K BININT1 1 8: G BINFLOAT 2.0 17: c GLOBAL 'builtins complex' 35: q BINPUT 1 37: ( MARK 38: G BINFLOAT 3.0 47: G BINFLOAT 0.0 56: t TUPLE (MARK at 37) 57: q BINPUT 2 59: R REDUCE 60: q BINPUT 3 62: K BININT1 1 64: J BININT -1 69: K BININT1 255 71: J BININT -255 76: J BININT -256 81: M BININT2 65535 84: J BININT -65535 89: J BININT -65536 94: J BININT 2147483647 99: J BININT -2147483647 104: J BININT -2147483648 109: ( MARK 110: X BINUNICODE 'abc' 118: q BINPUT 4 120: h BINGET 4 122: c GLOBAL 'copyreg _reconstructor' 146: q BINPUT 5 148: ( MARK 149: c GLOBAL '__main__ C' 161: q BINPUT 6 163: c GLOBAL 'builtins object' 180: q BINPUT 7 182: N NONE 183: t TUPLE (MARK at 148) 184: q BINPUT 8 186: R REDUCE 187: q BINPUT 9 189: } EMPTY_DICT 190: q BINPUT 10 192: ( MARK 193: X BINUNICODE 'foo' 201: q BINPUT 11 203: K BININT1 1 205: X BINUNICODE 'bar' 213: q BINPUT 12 215: K BININT1 2 217: u SETITEMS (MARK at 192) 218: b BUILD 219: h BINGET 9 221: t TUPLE (MARK at 109) 222: q BINPUT 13 224: h BINGET 13 226: K BININT1 5 228: e APPENDS (MARK at 3) 229: . STOP highest protocol among opcodes = 1 """ DATA2 = ( b'\x80\x02]q\x00(K\x00K\x01G@\x00\x00\x00\x00\x00\x00\x00c' b'builtins\ncomplex\n' b'q\x01G@\x08\x00\x00\x00\x00\x00\x00G\x00\x00\x00\x00\x00\x00\x00\x00' b'\x86q\x02Rq\x03K\x01J\xff\xff\xff\xffK\xffJ\x01\xff\xff\xff' b'J\x00\xff\xff\xffM\xff\xffJ\x01\x00\xff\xffJ\x00\x00\xff\xffJ\xff' b'\xff\xff\x7fJ\x01\x00\x00\x80J\x00\x00\x00\x80(X\x03\x00\x00\x00a' b'bcq\x04h\x04c__main__\nC\nq\x05' b')\x81q\x06}q\x07(X\x03\x00\x00\x00fooq\x08K\x01' b'X\x03\x00\x00\x00barq\tK\x02ubh\x06tq\nh' b'\nK\x05e.' ) # Disassembly of DATA2 DATA2_DIS = """\ 0: \x80 PROTO 2 2: ] EMPTY_LIST 3: q BINPUT 0 5: ( MARK 6: K BININT1 0 8: K BININT1 1 10: G BINFLOAT 2.0 19: c GLOBAL 'builtins complex' 37: q BINPUT 1 39: G BINFLOAT 3.0 48: G BINFLOAT 0.0 57: \x86 TUPLE2 58: q BINPUT 2 60: R REDUCE 61: q BINPUT 3 63: K BININT1 1 65: J BININT -1 70: K BININT1 255 72: J BININT -255 77: J BININT -256 82: M BININT2 65535 85: J BININT -65535 90: J BININT -65536 95: J BININT 2147483647 100: J BININT -2147483647 105: J BININT -2147483648 110: ( MARK 111: X BINUNICODE 'abc' 119: q BINPUT 4 121: h BINGET 4 123: c GLOBAL '__main__ C' 135: q BINPUT 5 137: ) EMPTY_TUPLE 138: \x81 NEWOBJ 139: q BINPUT 6 141: } EMPTY_DICT 142: q BINPUT 7 144: ( MARK 145: X BINUNICODE 'foo' 153: q BINPUT 8 155: K BININT1 1 157: X BINUNICODE 'bar' 165: q BINPUT 9 167: K BININT1 2 169: u SETITEMS (MARK at 144) 170: b BUILD 171: h BINGET 6 173: t TUPLE (MARK at 110) 174: q BINPUT 10 176: h BINGET 10 178: K BININT1 5 180: e APPENDS (MARK at 5) 181: . STOP highest protocol among opcodes = 2 """ # set([1,2]) pickled from 2.x with protocol 2 DATA3 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.' # xrange(5) pickled from 2.x with protocol 2 DATA4 = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.' # a SimpleCookie() object pickled from 2.x with protocol 2 DATA5 = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key' b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U' b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07' b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U' b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b' b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.') # set([3]) pickled from 2.x with protocol 2 DATA6 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01K\x03a\x85q\x02Rq\x03.' DATA6_PYPY = b'\x80\x02c__builtin__\nset\nq\x00K\x03\x85q\x01\x85q\x02Rq\x03.' def create_data(): c = C() c.foo = 1 c.bar = 2 x = [0, 1, 2.0, 3.0 + 0j] # Append some integer test cases at cPickle.c's internal size # cutoffs. uint1max = 0xff uint2max = 0xffff int4max = 0x7fffffff x.extend([1, -1, uint1max, -uint1max, -uint1max - 1, uint2max, -uint2max, -uint2max - 1, int4max, -int4max, -int4max - 1]) y = ('abc', 'abc', c, c) x.append(y) x.append(y) x.append(5) return x class AbstractPickleTests(unittest.TestCase): # Subclass must define self.dumps, self.loads. _testdata = create_data() def setUp(self): pass def test_misc(self): # test various datatypes not tested by testdata for proto in protocols: x = myint(4) s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(x, y) x = (1, ()) s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(x, y) x = initarg(1, x) s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(x, y) # XXX test __reduce__ protocol? def test_roundtrip_equality(self): expected = self._testdata for proto in protocols: s = self.dumps(expected, proto) got = self.loads(s) self.assertEqual(expected, got) def test_load_from_data0(self): self.assertEqual(self._testdata, self.loads(DATA0)) def test_load_from_data1(self): self.assertEqual(self._testdata, self.loads(DATA1)) def test_load_from_data2(self): self.assertEqual(self._testdata, self.loads(DATA2)) def test_load_classic_instance(self): # See issue5180. Test loading 2.x pickles that # contain an instance of old style class. for X, args in [(C, ()), (D, ('x',)), (E, ())]: xname = X.__name__.encode('ascii') # Protocol 0 (text mode pickle): """ 0: ( MARK 1: i INST '__main__ X' (MARK at 0) 15: p PUT 0 18: ( MARK 19: d DICT (MARK at 18) 20: p PUT 1 23: b BUILD 24: . STOP """ pickle0 = (b"(i__main__\n" b"X\n" b"p0\n" b"(dp1\nb.").replace(b'X', xname) self.assertEqual(X(*args), self.loads(pickle0)) # Protocol 1 (binary mode pickle) """ 0: ( MARK 1: c GLOBAL '__main__ X' 15: q BINPUT 0 17: o OBJ (MARK at 0) 18: q BINPUT 1 20: } EMPTY_DICT 21: q BINPUT 2 23: b BUILD 24: . STOP """ pickle1 = (b'(c__main__\n' b'X\n' b'q\x00oq\x01}q\x02b.').replace(b'X', xname) self.assertEqual(X(*args), self.loads(pickle1)) # Protocol 2 (pickle2 = b'\x80\x02' + pickle1) """ 0: \x80 PROTO 2 2: ( MARK 3: c GLOBAL '__main__ X' 17: q BINPUT 0 19: o OBJ (MARK at 2) 20: q BINPUT 1 22: } EMPTY_DICT 23: q BINPUT 2 25: b BUILD 26: . STOP """ pickle2 = (b'\x80\x02(c__main__\n' b'X\n' b'q\x00oq\x01}q\x02b.').replace(b'X', xname) self.assertEqual(X(*args), self.loads(pickle2)) # There are gratuitous differences between pickles produced by # pickle and cPickle, largely because cPickle starts PUT indices at # 1 and pickle starts them at 0. See XXX comment in cPickle's put2() -- # there's a comment with an exclamation point there whose meaning # is a mystery. cPickle also suppresses PUT for objects with a refcount # of 1. def dont_test_disassembly(self): from io import StringIO from pickletools import dis for proto, expected in (0, DATA0_DIS), (1, DATA1_DIS): s = self.dumps(self._testdata, proto) filelike = StringIO() dis(s, out=filelike) got = filelike.getvalue() self.assertEqual(expected, got) def test_recursive_list(self): l_ = [] l_.append(l_) for proto in protocols: s = self.dumps(l_, proto) x = self.loads(s) self.assertEqual(len(x), 1) self.assertIs(x, x[0]) def test_recursive_tuple(self): t = ([],) t[0].append(t) for proto in protocols: s = self.dumps(t, proto) x = self.loads(s) self.assertEqual(len(x), 1) self.assertEqual(len(x[0]), 1) self.assertIs(x, x[0][0]) def test_recursive_dict(self): d = {} d[1] = d for proto in protocols: s = self.dumps(d, proto) x = self.loads(s) self.assertEqual(list(x.keys()), [1]) self.assertIs(x[1], x) def test_recursive_inst(self): i = C() i.attr = i for proto in protocols: s = self.dumps(i, proto) x = self.loads(s) self.assertEqual(dir(x), dir(i)) self.assertIs(x.attr, x) def test_recursive_multi(self): l_ = [] d = {1: l_} i = C() i.attr = d l_.append(i) for proto in protocols: s = self.dumps(l_, proto) x = self.loads(s) self.assertEqual(len(x), 1) self.assertEqual(dir(x[0]), dir(i)) self.assertEqual(list(x[0].attr.keys()), [1]) self.assertIs(x[0].attr[1], x) def test_get(self): self.assertRaises(KeyError, self.loads, b'g0\np0') self.assertEqual(self.loads(b'((Kdtp0\nh\x00l.))'), [(100,), (100,)]) def test_insecure_strings(self): # XXX Some of these tests are temporarily disabled insecure = [b"abc", b"2 + 2", # not quoted # b"'abc' + 'def'", # not a single quoted string b"'abc", # quote is not closed b"'abc\"", # open quote and close quote don't match b"'abc' ?", # junk after close quote b"'\\'", # trailing backslash # Variations on issue #17710 b"'", b'"', b"' ", b"' ", b"' ", b"' ", b'" ', # some tests of the quoting rules # b"'abc\"\''", # b"'\\\\a\'\'\'\\\'\\\\\''", ] for b in insecure: buf = b"S" + b + b"\012p0\012." self.assertRaises(ValueError, self.loads, buf) def test_unicode(self): endcases = ['', '<\\u>', '<\\\u1234>', '<\n>', '<\\>', '<\\\U00012345>', # surrogates '<\udc80>'] for proto in protocols: for u in endcases: p = self.dumps(u, proto) u2 = self.loads(p) self.assertEqual(u2, u) def test_unicode_high_plane(self): t = '\U00012345' for proto in protocols: p = self.dumps(t, proto) t2 = self.loads(p) self.assertEqual(t2, t) def test_bytes(self): for proto in protocols: for s in b'', b'xyz', b'xyz' * 100: p = self.dumps(s, proto) self.assertEqual(self.loads(p), s) for s in [bytes([i]) for i in range(256)]: p = self.dumps(s, proto) self.assertEqual(self.loads(p), s) for s in [bytes([i, i]) for i in range(256)]: p = self.dumps(s, proto) self.assertEqual(self.loads(p), s) def test_ints(self): import sys for proto in protocols: n = sys.maxsize while n: for expected in (-n, n): s = self.dumps(expected, proto) n2 = self.loads(s) self.assertEqual(expected, n2) n = n >> 1 def test_maxint64(self): maxint64 = (1 << 63) - 1 data = b'I' + str(maxint64).encode("ascii") + b'\n.' got = self.loads(data) self.assertEqual(got, maxint64) # Try too with a bogus literal. data = b'I' + str(maxint64).encode("ascii") + b'JUNK\n.' self.assertRaises(ValueError, self.loads, data) def test_long(self): for proto in protocols: # 256 bytes is where LONG4 begins. for nbits in 1, 8, 8 * 254, 8 * 255, 8 * 256, 8 * 257: nbase = 1 << nbits for npos in nbase - 1, nbase, nbase + 1: for n in npos, -npos: pickle = self.dumps(n, proto) got = self.loads(pickle) self.assertEqual(n, got) # Try a monster. This is quadratic-time in protos 0 & 1, so don't # bother with those. nbase = int("deadbeeffeedface", 16) nbase += nbase << 1000000 for n in nbase, -nbase: p = self.dumps(n, 2) got = self.loads(p) self.assertEqual(n, got) def test_float(self): test_values = [0.0, 4.94e-324, 1e-310, 7e-308, 6.626e-34, 0.1, 0.5, 3.14, 263.44582062374053, 6.022e23, 1e30] test_values = test_values + [-x for x in test_values] for proto in protocols: for value in test_values: pickle = self.dumps(value, proto) got = self.loads(pickle) self.assertEqual(value, got) @run_with_locale('LC_ALL', 'de_DE', 'fr_FR') def test_float_format(self): # make sure that floats are formatted locale independent with proto 0 self.assertEqual(self.dumps(1.2, 0)[0:3], b'F1.') def test_reduce(self): pass def test_getinitargs(self): pass def test_pop_empty_stack(self): # Test issue7455 s = b'0' self.assertRaises((pickle.UnpicklingError, IndexError), self.loads, s) def test_metaclass(self): a = use_metaclass() for proto in protocols: s = self.dumps(a, proto) b = self.loads(s) self.assertEqual(a.__class__, b.__class__) def test_dynamic_class(self): a = create_dynamic_class("my_dynamic_class", (object,)) copyreg.pickle(pickling_metaclass, pickling_metaclass.__reduce__) for proto in protocols: s = self.dumps(a, proto) b = self.loads(s) self.assertEqual(a, b) def test_structseq(self): import os import time t = time.localtime() for proto in protocols: s = self.dumps(t, proto) u = self.loads(s) self.assertEqual(t, u) if hasattr(os, "stat"): t = os.stat(os.curdir) s = self.dumps(t, proto) u = self.loads(s) self.assertEqual(t, u) if hasattr(os, "statvfs"): t = os.statvfs(os.curdir) s = self.dumps(t, proto) u = self.loads(s) self.assertEqual(t, u) def test_ellipsis(self): for proto in protocols: s = self.dumps(..., proto) u = self.loads(s) self.assertEqual(..., u) def test_notimplemented(self): for proto in protocols: s = self.dumps(NotImplemented, proto) u = self.loads(s) self.assertEqual(NotImplemented, u) # Tests for protocol 2 def test_proto(self): build_none = pickle.NONE + pickle.STOP for proto in protocols: expected = build_none if proto >= 2: expected = pickle.PROTO + bytes([proto]) + expected p = self.dumps(None, proto) self.assertEqual(p, expected) oob = protocols[-1] + 1 # a future protocol badpickle = pickle.PROTO + bytes([oob]) + build_none try: self.loads(badpickle) except ValueError as detail: self.assertTrue(str(detail).startswith( "unsupported pickle protocol")) else: self.fail("expected bad protocol number to raise ValueError") def test_long1(self): x = 12345678910111213141516178920 for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(x, y) self.assertEqual(opcode_in_pickle(pickle.LONG1, s), proto >= 2) def test_long4(self): x = 12345678910111213141516178920 << (256 * 8) for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(x, y) self.assertEqual(opcode_in_pickle(pickle.LONG4, s), proto >= 2) def test_short_tuples(self): # Map (proto, len(tuple)) to expected opcode. expected_opcode = {(0, 0): pickle.TUPLE, (0, 1): pickle.TUPLE, (0, 2): pickle.TUPLE, (0, 3): pickle.TUPLE, (0, 4): pickle.TUPLE, (1, 0): pickle.EMPTY_TUPLE, (1, 1): pickle.TUPLE, (1, 2): pickle.TUPLE, (1, 3): pickle.TUPLE, (1, 4): pickle.TUPLE, (2, 0): pickle.EMPTY_TUPLE, (2, 1): pickle.TUPLE1, (2, 2): pickle.TUPLE2, (2, 3): pickle.TUPLE3, (2, 4): pickle.TUPLE, (3, 0): pickle.EMPTY_TUPLE, (3, 1): pickle.TUPLE1, (3, 2): pickle.TUPLE2, (3, 3): pickle.TUPLE3, (3, 4): pickle.TUPLE, } a = () b = (1,) c = (1, 2) d = (1, 2, 3) e = (1, 2, 3, 4) for proto in protocols: for x in a, b, c, d, e: s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(x, y, (proto, x, s, y)) expected = expected_opcode[proto, len(x)] self.assertEqual(opcode_in_pickle(expected, s), True) def test_singletons(self): # Map (proto, singleton) to expected opcode. expected_opcode = {(0, None): pickle.NONE, (1, None): pickle.NONE, (2, None): pickle.NONE, (3, None): pickle.NONE, (0, True): pickle.INT, (1, True): pickle.INT, (2, True): pickle.NEWTRUE, (3, True): pickle.NEWTRUE, (0, False): pickle.INT, (1, False): pickle.INT, (2, False): pickle.NEWFALSE, (3, False): pickle.NEWFALSE, } for proto in protocols: for x in None, False, True: s = self.dumps(x, proto) y = self.loads(s) self.assertIs(x, y, (proto, x, s, y)) expected = expected_opcode[proto, x] self.assertEqual(opcode_in_pickle(expected, s), True) def test_newobj_tuple(self): x = MyTuple([1, 2, 3]) x.foo = 42 x.bar = "hello" for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(tuple(x), tuple(y)) self.assertEqual(x.__dict__, y.__dict__) def test_newobj_list(self): x = MyList([1, 2, 3]) x.foo = 42 x.bar = "hello" for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(list(x), list(y)) self.assertEqual(x.__dict__, y.__dict__) def test_newobj_generic(self): for proto in protocols: for C in myclasses: B = C.__base__ x = C(C.sample) x.foo = 42 s = self.dumps(x, proto) y = self.loads(s) detail = (proto, C, B, x, y, type(y)) self.assertEqual(B(x), B(y), detail) self.assertEqual(x.__dict__, y.__dict__, detail) def test_newobj_proxies(self): # NEWOBJ should use the __class__ rather than the raw type classes = myclasses[:] # Cannot create weakproxies to these classes for c in (MyInt, MyTuple): classes.remove(c) for proto in protocols: for C in classes: B = C.__base__ x = C(C.sample) x.foo = 42 p = weakref.proxy(x) s = self.dumps(p, proto) y = self.loads(s) self.assertEqual(type(y), type(x)) # rather than type(p) detail = (proto, C, B, x, y, type(y)) self.assertEqual(B(x), B(y), detail) self.assertEqual(x.__dict__, y.__dict__, detail) # Register a type with copyreg, with extension code extcode. Pickle # an object of that type. Check that the resulting pickle uses opcode # (EXT[124]) under proto 2, and not in proto 1. def produce_global_ext(self, extcode, opcode): e = ExtensionSaver(extcode) try: copyreg.add_extension(__name__, "MyList", extcode) x = MyList([1, 2, 3]) x.foo = 42 x.bar = "hello" # Dump using protocol 1 for comparison. s1 = self.dumps(x, 1) self.assertIn(__name__.encode("utf-8"), s1) self.assertIn(b"MyList", s1) self.assertEqual(opcode_in_pickle(opcode, s1), False) y = self.loads(s1) self.assertEqual(list(x), list(y)) self.assertEqual(x.__dict__, y.__dict__) # Dump using protocol 2 for test. s2 = self.dumps(x, 2) self.assertNotIn(__name__.encode("utf-8"), s2) self.assertNotIn(b"MyList", s2) self.assertEqual(opcode_in_pickle(opcode, s2), True, repr(s2)) y = self.loads(s2) self.assertEqual(list(x), list(y)) self.assertEqual(x.__dict__, y.__dict__) finally: e.restore() def test_global_ext1(self): self.produce_global_ext(0x00000001, pickle.EXT1) # smallest EXT1 code self.produce_global_ext(0x000000ff, pickle.EXT1) # largest EXT1 code def test_global_ext2(self): self.produce_global_ext(0x00000100, pickle.EXT2) # smallest EXT2 code self.produce_global_ext(0x0000ffff, pickle.EXT2) # largest EXT2 code self.produce_global_ext(0x0000abcd, pickle.EXT2) # check endianness def test_global_ext4(self): self.produce_global_ext(0x00010000, pickle.EXT4) # smallest EXT4 code self.produce_global_ext(0x7fffffff, pickle.EXT4) # largest EXT4 code self.produce_global_ext(0x12abcdef, pickle.EXT4) # check endianness def test_list_chunking(self): n = 10 # too small to chunk x = list(range(n)) for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(x, y) num_appends = count_opcode(pickle.APPENDS, s) self.assertEqual(num_appends, proto > 0) n = 2500 # expect at least two chunks when proto > 0 x = list(range(n)) for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(x, y) num_appends = count_opcode(pickle.APPENDS, s) if proto == 0: self.assertEqual(num_appends, 0) else: self.assertGreaterEqual(num_appends, 2) def test_dict_chunking(self): n = 10 # too small to chunk x = dict.fromkeys(range(n)) for proto in protocols: s = self.dumps(x, proto) self.assertIsInstance(s, bytes_types) y = self.loads(s) self.assertEqual(x, y) num_setitems = count_opcode(pickle.SETITEMS, s) self.assertEqual(num_setitems, proto > 0) n = 2500 # expect at least two chunks when proto > 0 x = dict.fromkeys(range(n)) for proto in protocols: s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(x, y) num_setitems = count_opcode(pickle.SETITEMS, s) if proto == 0: self.assertEqual(num_setitems, 0) else: self.assertGreaterEqual(num_setitems, 2) def test_simple_newobj(self): x = object.__new__(SimpleNewObj) # avoid __init__ x.abc = 666 for proto in protocols: s = self.dumps(x, proto) self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s), proto >= 2) y = self.loads(s) # will raise TypeError if __init__ called self.assertEqual(y.abc, 666) self.assertEqual(x.__dict__, y.__dict__) def test_newobj_list_slots(self): x = SlotList([1, 2, 3]) x.foo = 42 x.bar = "hello" s = self.dumps(x, 2) y = self.loads(s) self.assertEqual(list(x), list(y)) self.assertEqual(x.__dict__, y.__dict__) self.assertEqual(x.foo, y.foo) self.assertEqual(x.bar, y.bar) def test_reduce_overrides_default_reduce_ex(self): for proto in protocols: x = REX_one() self.assertEqual(x._reduce_called, 0) s = self.dumps(x, proto) self.assertEqual(x._reduce_called, 1) y = self.loads(s) self.assertEqual(y._reduce_called, 0) def test_reduce_ex_called(self): for proto in protocols: x = REX_two() self.assertEqual(x._proto, None) s = self.dumps(x, proto) self.assertEqual(x._proto, proto) y = self.loads(s) self.assertEqual(y._proto, None) def test_reduce_ex_overrides_reduce(self): for proto in protocols: x = REX_three() self.assertEqual(x._proto, None) s = self.dumps(x, proto) self.assertEqual(x._proto, proto) y = self.loads(s) self.assertEqual(y._proto, None) def test_reduce_ex_calls_base(self): for proto in protocols: x = REX_four() self.assertEqual(x._proto, None) s = self.dumps(x, proto) self.assertEqual(x._proto, proto) y = self.loads(s) self.assertEqual(y._proto, proto) def test_reduce_calls_base(self): for proto in protocols: x = REX_five() self.assertEqual(x._reduce_called, 0) s = self.dumps(x, proto) self.assertEqual(x._reduce_called, 1) y = self.loads(s) self.assertEqual(y._reduce_called, 1) @no_tracing def test_bad_getattr(self): x = BadGetattr() if _PY311b1: # https://github.com/python/cpython/pull/2821 fixed runtime error # problem for protocol version 2 and above it landed in 3.11.0b1. proto_versions_with_runtime_error = (0, 1) proto_versions_without_runtime_error = (2,) else: proto_versions_with_runtime_error = (0, 1, 2) proto_versions_without_runtime_error = () for proto in proto_versions_with_runtime_error: with self.assertRaises(RuntimeError, msg='proto=%s' % proto): self.dumps(x, proto) for proto in proto_versions_without_runtime_error: self.dumps(x, proto) def test_reduce_bad_iterator(self): # Issue4176: crash when 4th and 5th items of __reduce__() # are not iterators class C: def __reduce__(self): # 4th item is not an iterator return list, (), None, [], None class D: def __reduce__(self): # 5th item is not an iterator return dict, (), None, None, [] # Protocol 0 is less strict and also accept iterables. for proto in protocols: try: self.dumps(C(), proto) except (pickle.PickleError): pass try: self.dumps(D(), proto) except (pickle.PickleError): pass def test_many_puts_and_gets(self): # Test that internal data structures correctly deal with lots of # puts/gets. keys = ("aaa" + str(i) for i in range(100)) large_dict = {k: [4, 5, 6] for k in keys} obj = [dict(large_dict), dict(large_dict), dict(large_dict)] for proto in protocols: dumped = self.dumps(obj, proto) loaded = self.loads(dumped) self.assertEqual(loaded, obj, "Failed protocol %d: %r != %r" % (proto, obj, loaded)) @unittest.skipIf(_is_pypy, 'PyPy does not guarantee the identity of strings. ' 'See the discussion on ' 'http://pypy.readthedocs.org/en/latest/cpython_differences.html#object-identity-of-primitive-values-is-and-id') # noqa: E501 line too long def test_attribute_name_interning(self): # Test that attribute names of pickled objects are interned when # unpickling. for proto in protocols: x = C() x.foo = 42 x.bar = "hello" s = self.dumps(x, proto) y = self.loads(s) x_keys = sorted(x.__dict__) y_keys = sorted(y.__dict__) for x_key, y_key in zip(x_keys, y_keys): self.assertIs(x_key, y_key) def test_unpickle_from_2x(self): # Unpickle non-trivial data from Python 2.x. loaded = self.loads(DATA3) self.assertEqual(loaded, {1, 2}) loaded = self.loads(DATA4) self.assertEqual(type(loaded), type(range(0))) self.assertEqual(list(loaded), list(range(5))) loaded = self.loads(DATA5) self.assertEqual(type(loaded), SimpleCookie) self.assertEqual(list(loaded.keys()), ["key"]) self.assertEqual(loaded["key"].value, "value") def test_pickle_to_2x(self): # Pickle non-trivial data with protocol 2, expecting that it yields # the same result as Python 2.x did. # NOTE: this test is a bit too strong since we can produce different # bytecode that 2.x will still understand. dumped = self.dumps(range(5), 2) self.assertEqual(dumped, DATA4) dumped = self.dumps({3}, 2) if not _is_pypy: # The integer in the set is pickled differently under PyPy # due to the differing identity semantics (?) self.assertEqual(dumped, DATA6) else: self.assertEqual(dumped, DATA6_PYPY) def test_large_pickles(self): # Test the correctness of internal buffering routines when handling # large data. for proto in protocols: data = (1, min, b'xy' * (30 * 1024), len) dumped = self.dumps(data, proto) loaded = self.loads(dumped) self.assertEqual(len(loaded), len(data)) self.assertEqual(loaded, data) def test_empty_bytestring(self): # issue 11286 empty = self.loads(b'\x80\x03U\x00q\x00.', encoding='koi8-r') self.assertEqual(empty, '') def test_int_pickling_efficiency(self): # Test compacity of int representation (see issue #12744) for proto in protocols: sizes = [len(self.dumps(2**n, proto)) for n in range(70)] # the size function is monotonic self.assertEqual(sorted(sizes), sizes) if proto >= 2: self.assertLessEqual(sizes[-1], 14) def check_negative_32b_binXXX(self, dumped): if sys.maxsize > 2**32: self.skipTest("test is only meaningful on 32-bit builds") # XXX Pure Python pickle reads lengths as signed and passes # them directly to read() (hence the EOFError) with self.assertRaises((pickle.UnpicklingError, EOFError, ValueError, OverflowError)): self.loads(dumped) def test_negative_32b_binbytes(self): # On 32-bit builds, a BINBYTES of 2**31 or more is refused self.check_negative_32b_binXXX(b'\x80\x03B\xff\xff\xff\xffxyzq\x00.') def test_negative_32b_binunicode(self): # On 32-bit builds, a BINUNICODE of 2**31 or more is refused self.check_negative_32b_binXXX(b'\x80\x03X\xff\xff\xff\xffxyzq\x00.') def test_negative_put(self): # Issue #12847 dumped = b'Va\np-1\n.' self.assertRaises(ValueError, self.loads, dumped) def test_negative_32b_binput(self): # Issue #12847 if sys.maxsize > 2**32: self.skipTest("test is only meaningful on 32-bit builds") dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.' self.assertRaises(ValueError, self.loads, dumped) def _check_pickling_with_opcode(self, obj, opcode, proto): pickled = self.dumps(obj, proto) self.assertTrue(opcode_in_pickle(opcode, pickled)) unpickled = self.loads(pickled) self.assertEqual(obj, unpickled) def test_appends_on_non_lists(self): # Issue #17720 obj = REX_six([1, 2, 3]) for proto in protocols: if proto == 0: self._check_pickling_with_opcode(obj, pickle.APPEND, proto) else: self._check_pickling_with_opcode(obj, pickle.APPENDS, proto) def test_setitems_on_non_dicts(self): obj = REX_seven({1: -1, 2: -2, 3: -3}) for proto in protocols: if proto == 0: self._check_pickling_with_opcode(obj, pickle.SETITEM, proto) else: self._check_pickling_with_opcode(obj, pickle.SETITEMS, proto) def test_corrupted_pickle(self): # Former C implementation produced corrupted pickles on these samples. # See https://github.com/zopefoundation/zodbpickle/pull/47 sample1 = ['a'] * 17509 dumped = self.dumps(sample1, 0) loaded = self.loads(dumped) self.assertEqual(loaded, sample1) sample2 = ['a'] * 34992 dumped = self.dumps(sample2, 1) loaded = self.loads(dumped) self.assertEqual(loaded, sample2) sample3 = ['a'] * 34991 dumped = self.dumps(sample3, 2) loaded = self.loads(dumped) self.assertEqual(loaded, sample3) sample4 = ['a'] * 34991 dumped = self.dumps(sample4, 3) loaded = self.loads(dumped) self.assertEqual(loaded, sample4) class AbstractBytestrTests(unittest.TestCase): def unpickleEqual(self, data, unpickled): loaded = self.loads(data, encoding="bytes") self.assertEqual(loaded, unpickled) def test_load_str_protocol_0(self): """ Test str from protocol=0 python 2: pickle.dumps('bytestring \x00\xa0', protocol=0) """ self.unpickleEqual( b"S'bytestring \\x00\\xa0'\np0\n.", b'bytestring \x00\xa0') def test_load_str_protocol_1(self): """ Test str from protocol=1 python 2: pickle.dumps('bytestring \x00\xa0', protocol=1) """ self.unpickleEqual( b'U\rbytestring \x00\xa0q\x00.', b'bytestring \x00\xa0') def test_load_str_protocol_2(self): """ Test str from protocol=2 python 2: pickle.dumps('bytestring \x00\xa0', protocol=2) """ self.unpickleEqual( b'\x80\x02U\rbytestring \x00\xa0q\x00.', b'bytestring \x00\xa0') def test_load_unicode_protocol_0(self): """ Test unicode with protocol=0 python 2: pickle.dumps( u"\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440", protocol=0) """ self.unpickleEqual( b'V\\u041a\\u043e\\u043c\\u043f\\u044c\\u044e\\u0442\\u0435' b'\\u0440\np0\n.', '\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440') def test_load_unicode_protocol_1(self): """ Test unicode with protocol=1 python 2: pickle.dumps( u"\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440", protocol=1) """ self.unpickleEqual( b'X\x12\x00\x00\x00\xd0\x9a\xd0\xbe\xd0\xbc\xd0\xbf\xd1\x8c\xd1' b'\x8e\xd1\x82\xd0\xb5\xd1\x80q\x00.', '\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440') def test_load_unicode_protocol_2(self): """ Test unicode with protocol=1 python 2: pickle.dumps( u"\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440", protocol=2) """ self.unpickleEqual( b'\x80\x02X\x12\x00\x00\x00\xd0\x9a\xd0\xbe\xd0\xbc\xd0\xbf\xd1' b'\x8c\xd1\x8e\xd1\x82\xd0\xb5\xd1\x80q\x00.', '\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440') def test_load_long_str_protocol_1(self): """ Test long str with protocol=1 python 2: pickle.dumps('x'*300, protocol=1) """ self.unpickleEqual( b'T,\x01\x00\x00xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' b'q\x00.', b'x' * 300) class AbstractBytesFallbackTests(unittest.TestCase): def unpickleEqual(self, data, unpickled): loaded = self.loads(data, errors="bytes") self.assertEqual(loaded, unpickled) def test_load_instance(self): r"""Test instance pickle. Python 2: pickle.dumps({'x': 'ascii', 'y': '\xff'}) """ self.unpickleEqual( b"(dp0\nS'y'\np1\nS'\\xff'\np2\nsS'x'\np3\nS'ascii'\np4\ns.", {'x': 'ascii', 'y': b'\xff'}) class BigmemPickleTests(unittest.TestCase): # Binary protocols can serialize longs of up to 2GB-1 @bigmemtest(size=_2G, memuse=1 + 1, dry_run=False) def test_huge_long_32b(self, size): data = 1 << (8 * size) try: for proto in protocols: if proto < 2: continue with self.assertRaises((ValueError, OverflowError)): self.dumps(data, protocol=proto) finally: data = None # Protocol 3 can serialize up to 4GB-1 as a bytes object # (older protocols don't have a dedicated opcode for bytes and are # too inefficient) @bigmemtest(size=_2G, memuse=1 + 1, dry_run=False) def test_huge_bytes_32b(self, size): data = b"abcd" * (size // 4) try: for proto in protocols: if proto < 3: continue try: pickled = self.dumps(data, protocol=proto) self.assertIn(b"abcd", pickled[:15]) self.assertIn(b"abcd", pickled[-15:]) finally: pickled = None finally: data = None @bigmemtest(size=_4G, memuse=1 + 1, dry_run=False) def test_huge_bytes_64b(self, size): data = b"a" * size try: for proto in protocols: if proto < 3: continue with self.assertRaises((ValueError, OverflowError)): self.dumps(data, protocol=proto) finally: data = None # All protocols use 1-byte per printable ASCII character; we add another # byte because the encoded form has to be copied into the internal buffer. @bigmemtest(size=_2G, memuse=2 + ascii_char_size, dry_run=False) def test_huge_str_32b(self, size): data = "abcd" * (size // 4) try: for proto in protocols: try: pickled = self.dumps(data, protocol=proto) self.assertIn(b"abcd", pickled[:15]) self.assertIn(b"abcd", pickled[-15:]) finally: pickled = None finally: data = None # BINUNICODE (protocols 1, 2 and 3) cannot carry more than # 2**32 - 1 bytes of utf-8 encoded unicode. @bigmemtest(size=_4G, memuse=1 + ascii_char_size, dry_run=False) def test_huge_str_64b(self, size): data = "a" * size try: for proto in protocols: if proto == 0: continue with self.assertRaises((ValueError, OverflowError)): self.dumps(data, protocol=proto) finally: data = None # Test classes for reduce_ex class REX_one: """No __reduce_ex__ here, but inheriting it from object""" _reduce_called = 0 def __reduce__(self): self._reduce_called = 1 return REX_one, () class REX_two: """No __reduce__ here, but inheriting it from object""" _proto = None def __reduce_ex__(self, proto): self._proto = proto return REX_two, () class REX_three: _proto = None def __reduce_ex__(self, proto): self._proto = proto return REX_two, () def __reduce__(self): raise TestFailed("This __reduce__ shouldn't be called") class REX_four: """Calling base class method should succeed""" _proto = None def __reduce_ex__(self, proto): self._proto = proto return object.__reduce_ex__(self, proto) class REX_five: """This one used to fail with infinite recursion""" _reduce_called = 0 def __reduce__(self): self._reduce_called = 1 return object.__reduce__(self) class REX_six: """This class is used to check the 4th argument (list iterator) of the reduce protocol. """ def __init__(self, items=None): self.items = items if items is not None else [] def __eq__(self, other): return isinstance(self, type(other)) and self.items == self.items def append(self, item): self.items.append(item) def __reduce__(self): return type(self), (), None, iter(self.items), None class REX_seven: """This class is used to check the 5th argument (dict iterator) of the reduce protocol. """ def __init__(self, table=None): self.table = table if table is not None else {} def __eq__(self, other): return isinstance(self, type(other)) and self.table == self.table def __setitem__(self, key, value): self.table[key] = value def __reduce__(self): return type(self), (), None, None, iter(self.table.items()) # Test classes for newobj class MyInt(int): sample = 1 class MyFloat(float): sample = 1.0 class MyComplex(complex): sample = 1.0 + 0.0j class MyStr(str): sample = "hello" class MyUnicode(str): sample = "hello \u1234" class MyTuple(tuple): sample = (1, 2, 3) class MyList(list): sample = [1, 2, 3] class MyDict(dict): sample = {"a": 1, "b": 2} myclasses = [MyInt, MyFloat, MyComplex, MyStr, MyUnicode, MyTuple, MyList, MyDict] class SlotList(MyList): __slots__ = ["foo"] class SimpleNewObj: def __init__(self, a, b, c): # raise an error, to make sure this isn't called raise TypeError("SimpleNewObj.__init__() didn't expect to get called") class BadGetattr: def __getattr__(self, key): self.foo class AbstractPickleModuleTests(unittest.TestCase): def test_dump_closed_file(self): import os f = open(TESTFN, "wb") try: f.close() self.assertRaises(ValueError, pickle.dump, 123, f) finally: os.remove(TESTFN) def test_load_closed_file(self): import os f = open(TESTFN, "wb") try: f.close() self.assertRaises(ValueError, pickle.dump, 123, f) finally: os.remove(TESTFN) def test_load_from_and_dump_to_file(self): stream = io.BytesIO() data = [123, {}, 124] pickle.dump(data, stream) stream.seek(0) unpickled = pickle.load(stream) self.assertEqual(unpickled, data) def test_highest_protocol(self): # Of course this needs to be changed when HIGHEST_PROTOCOL changes. self.assertEqual(pickle.HIGHEST_PROTOCOL, 3) def test_callapi(self): f = io.BytesIO() # With and without keyword arguments pickle.dump(123, f, -1) pickle.dump(123, file=f, protocol=-1) pickle.dumps(123, -1) pickle.dumps(123, protocol=-1) pickle.Pickler(f, -1) pickle.Pickler(f, protocol=-1) def test_bad_init(self): # Test issue3664 (pickle can segfault from a badly initialized # Pickler). Override initialization without calling __init__() of the # superclass. class BadPickler(pickle.Pickler): def __init__(self): pass class BadUnpickler(pickle.Unpickler): def __init__(self): pass self.assertRaises(pickle.PicklingError, BadPickler().dump, 0) self.assertRaises(pickle.UnpicklingError, BadUnpickler().load) def test_bad_input(self): # Test issue4298 s = bytes([0x58, 0, 0, 0, 0x54]) self.assertRaises(EOFError, pickle.loads, s) class AbstractPersistentPicklerTests(unittest.TestCase): # This class defines persistent_id() and persistent_load() # functions that should be used by the pickler. All even integers # are pickled using persistent ids. def persistent_id(self, object): if isinstance(object, int) and object % 2 == 0: self.id_count += 1 return str(object) else: return None def persistent_load(self, oid): self.load_count += 1 object = int(oid) assert object % 2 == 0 return object def test_persistence(self): self.id_count = 0 self.load_count = 0 L = list(range(10)) self.assertEqual(self.loads(self.dumps(L)), L) self.assertEqual(self.id_count, 5) self.assertEqual(self.load_count, 5) def test_bin_persistence(self): self.id_count = 0 self.load_count = 0 L = list(range(10)) self.assertEqual(self.loads(self.dumps(L, 1)), L) self.assertEqual(self.id_count, 5) self.assertEqual(self.load_count, 5) class AbstractPicklerUnpicklerObjectTests(unittest.TestCase): pickler_class = None unpickler_class = None def setUp(self): assert self.pickler_class assert self.unpickler_class def test_clear_pickler_memo(self): # To test whether clear_memo() has any effect, we pickle an object, # then pickle it again without clearing the memo; the two serialized # forms should be different. If we clear_memo() and then pickle the # object again, the third serialized form should be identical to the # first one we obtained. data = ["abcdefg", "abcdefg", 44] f = io.BytesIO() pickler = self.pickler_class(f) pickler.dump(data) first_pickled = f.getvalue() # Reset StringIO object. f.seek(0) f.truncate() pickler.dump(data) second_pickled = f.getvalue() # Reset the Pickler and StringIO objects. pickler.clear_memo() f.seek(0) f.truncate() pickler.dump(data) third_pickled = f.getvalue() self.assertNotEqual(first_pickled, second_pickled) self.assertEqual(first_pickled, third_pickled) def test_priming_pickler_memo(self): # Verify that we can set the Pickler's memo attribute. data = ["abcdefg", "abcdefg", 44] f = io.BytesIO() pickler = self.pickler_class(f) pickler.dump(data) first_pickled = f.getvalue() f = io.BytesIO() primed = self.pickler_class(f) primed.memo = pickler.memo primed.dump(data) primed_pickled = f.getvalue() self.assertNotEqual(first_pickled, primed_pickled) def test_priming_unpickler_memo(self): # Verify that we can set the Unpickler's memo attribute. data = ["abcdefg", "abcdefg", 44] f = io.BytesIO() pickler = self.pickler_class(f) pickler.dump(data) first_pickled = f.getvalue() f = io.BytesIO() primed = self.pickler_class(f) primed.memo = pickler.memo primed.dump(data) primed_pickled = f.getvalue() unpickler = self.unpickler_class(io.BytesIO(first_pickled)) unpickled_data1 = unpickler.load() self.assertEqual(unpickled_data1, data) primed = self.unpickler_class(io.BytesIO(primed_pickled)) primed.memo = unpickler.memo unpickled_data2 = primed.load() primed.memo.clear() self.assertEqual(unpickled_data2, data) self.assertIs(unpickled_data2, unpickled_data1) def test_reusing_unpickler_objects(self): data1 = ["abcdefg", "abcdefg", 44] f = io.BytesIO() pickler = self.pickler_class(f) pickler.dump(data1) pickled1 = f.getvalue() data2 = ["abcdefg", 44, 44] f = io.BytesIO() pickler = self.pickler_class(f) pickler.dump(data2) pickled2 = f.getvalue() f = io.BytesIO() f.write(pickled1) f.seek(0) unpickler = self.unpickler_class(f) self.assertEqual(unpickler.load(), data1) f.seek(0) f.truncate() f.write(pickled2) f.seek(0) self.assertEqual(unpickler.load(), data2) def _check_multiple_unpicklings(self, ioclass): for proto in protocols: data1 = [(x, str(x)) for x in range(2000)] + [b"abcde", len] f = ioclass() pickler = self.pickler_class(f, protocol=proto) pickler.dump(data1) pickled = f.getvalue() N = 5 f = ioclass(pickled * N) unpickler = self.unpickler_class(f) for i in range(N): if f.seekable(): pos = f.tell() self.assertEqual(unpickler.load(), data1) if f.seekable(): self.assertEqual(f.tell(), pos + len(pickled)) self.assertRaises(EOFError, unpickler.load) def test_multiple_unpicklings_seekable(self): self._check_multiple_unpicklings(io.BytesIO) def test_multiple_unpicklings_unseekable(self): self._check_multiple_unpicklings(UnseekableIO) def test_unpickling_buffering_readline(self): # Issue #12687: the unpickler's buffering logic could fail with # text mode opcodes. data = list(range(10)) for proto in protocols: for buf_size in range(1, 11): f = io.BufferedRandom(io.BytesIO(), buffer_size=buf_size) pickler = self.pickler_class(f, protocol=proto) pickler.dump(data) f.seek(0) unpickler = self.unpickler_class(f) self.assertEqual(unpickler.load(), data) def test_noload_object(self): global _NOLOAD_OBJECT after = {} _NOLOAD_OBJECT = object() aaa = AAA() bbb = BBB() ccc = 1 ddd = 1.0 eee = ('eee', 1) fff = ['fff'] ggg = {'ggg': 0} unpickler = self.unpickler_class f = io.BytesIO() pickler = self.pickler_class(f, protocol=2) pickler.dump(_NOLOAD_OBJECT) after['_NOLOAD_OBJECT'] = f.tell() pickler.dump(aaa) after['aaa'] = f.tell() pickler.dump(bbb) after['bbb'] = f.tell() pickler.dump(ccc) after['ccc'] = f.tell() pickler.dump(ddd) after['ddd'] = f.tell() pickler.dump(eee) after['eee'] = f.tell() pickler.dump(fff) after['fff'] = f.tell() pickler.dump(ggg) after['ggg'] = f.tell() f.seek(0) unpickler = self.unpickler_class(f) unpickler.noload() # read past _NOLOAD_OBJECT self.assertEqual(f.tell(), after['_NOLOAD_OBJECT']) noload = unpickler.noload() # read past aaa self.assertEqual(noload, None) self.assertEqual(f.tell(), after['aaa']) unpickler.noload() # read past bbb self.assertEqual(f.tell(), after['bbb']) noload = unpickler.noload() # read past ccc self.assertEqual(noload, ccc) self.assertEqual(f.tell(), after['ccc']) noload = unpickler.noload() # read past ddd self.assertEqual(noload, ddd) self.assertEqual(f.tell(), after['ddd']) noload = unpickler.noload() # read past eee self.assertEqual(noload, eee) self.assertEqual(f.tell(), after['eee']) noload = unpickler.noload() # read past fff self.assertEqual(noload, fff) self.assertEqual(f.tell(), after['fff']) noload = unpickler.noload() # read past ggg self.assertEqual(noload, ggg) self.assertEqual(f.tell(), after['ggg']) def test_functional_noload_dict_subclass(self): """noload() doesn't break or produce any output given a dict subclass """ # See http://bugs.python.org/issue1101399 o = MyDict() o['x'] = 1 f = io.BytesIO() pickler = self.pickler_class(f, protocol=2) pickler.dump(o) f.seek(0) unpickler = self.unpickler_class(f) noload = unpickler.noload() self.assertEqual(noload, None) def test_functional_noload_list_subclass(self): """noload() doesn't break or produce any output given a list subclass """ # See http://bugs.python.org/issue1101399 o = MyList() o.append(1) f = io.BytesIO() pickler = self.pickler_class(f, protocol=2) pickler.dump(o) f.seek(0) unpickler = self.unpickler_class(f) noload = unpickler.noload() self.assertEqual(noload, None) def test_functional_noload_dict(self): """noload() implements the Python 2.6 behaviour and fills in dicts""" # See http://bugs.python.org/issue1101399 o = dict() o['x'] = 1 f = io.BytesIO() pickler = self.pickler_class(f, protocol=2) pickler.dump(o) f.seek(0) unpickler = self.unpickler_class(f) noload = unpickler.noload() self.assertEqual(noload, o) def test_functional_noload_list(self): """noload() implements the Python 2.6 behaviour and fills in lists""" # See http://bugs.python.org/issue1101399 o = list() o.append(1) f = io.BytesIO() pickler = self.pickler_class(f, protocol=2) pickler.dump(o) f.seek(0) unpickler = self.unpickler_class(f) noload = unpickler.noload() self.assertEqual(noload, o) # Tests for dispatch_table attribute REDUCE_A = 'reduce_A' class AAA: def __reduce__(self): return str, (REDUCE_A,) class BBB: pass class AbstractDispatchTableTests(unittest.TestCase): def test_default_dispatch_table(self): # No dispatch_table attribute by default f = io.BytesIO() p = self.pickler_class(f, 0) with self.assertRaises(AttributeError): p.dispatch_table self.assertFalse(hasattr(p, 'dispatch_table')) def test_class_dispatch_table(self): # A dispatch_table attribute can be specified class-wide dt = self.get_dispatch_table() class MyPickler(self.pickler_class): dispatch_table = dt def dumps(obj, protocol=None): f = io.BytesIO() p = MyPickler(f, protocol) self.assertEqual(p.dispatch_table, dt) p.dump(obj) return f.getvalue() self._test_dispatch_table(dumps, dt) def test_instance_dispatch_table(self): # A dispatch_table attribute can also be specified instance-wide dt = self.get_dispatch_table() def dumps(obj, protocol=None): f = io.BytesIO() p = self.pickler_class(f, protocol) p.dispatch_table = dt self.assertEqual(p.dispatch_table, dt) p.dump(obj) return f.getvalue() self._test_dispatch_table(dumps, dt) def _test_dispatch_table(self, dumps, dispatch_table): def custom_load_dump(obj): return pickle.loads(dumps(obj, 0)) def default_load_dump(obj): return pickle.loads(pickle.dumps(obj, 0)) # pickling complex numbers using protocol 0 relies on copyreg # so check pickling a complex number still works z = 1 + 2j self.assertEqual(custom_load_dump(z), z) self.assertEqual(default_load_dump(z), z) # modify pickling of complex REDUCE_1 = 'reduce_1' def reduce_1(obj): return str, (REDUCE_1,) dispatch_table[complex] = reduce_1 self.assertEqual(custom_load_dump(z), REDUCE_1) self.assertEqual(default_load_dump(z), z) # check picklability of AAA and BBB a = AAA() b = BBB() self.assertEqual(custom_load_dump(a), REDUCE_A) self.assertIsInstance(custom_load_dump(b), BBB) self.assertEqual(default_load_dump(a), REDUCE_A) self.assertIsInstance(default_load_dump(b), BBB) # modify pickling of BBB dispatch_table[BBB] = reduce_1 self.assertEqual(custom_load_dump(a), REDUCE_A) self.assertEqual(custom_load_dump(b), REDUCE_1) self.assertEqual(default_load_dump(a), REDUCE_A) self.assertIsInstance(default_load_dump(b), BBB) # revert pickling of BBB and modify pickling of AAA REDUCE_2 = 'reduce_2' def reduce_2(obj): return str, (REDUCE_2,) dispatch_table[AAA] = reduce_2 del dispatch_table[BBB] self.assertEqual(custom_load_dump(a), REDUCE_2) self.assertIsInstance(custom_load_dump(b), BBB) self.assertEqual(default_load_dump(a), REDUCE_A) self.assertIsInstance(default_load_dump(b), BBB) if __name__ == "__main__": # Print some stuff that can be used to rewrite DATA{0,1,2} from pickletools import dis x = create_data() for i in range(3): p = pickle.dumps(x, i) print(f"DATA{i} = (") for j in range(0, len(p), 20): b = bytes(p[j:j + 20]) print(f" {b!r}") print(")") print() print(f"# Disassembly of DATA{i}") print(f"DATA{i}_DIS = \"\"\"\\") dis(p) print("\"\"\"") print() ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/tests/test_compile_flags.py0000644000076600000240000000252314753071602023513 0ustar00m.howitzstaff############################################################################## # # Copyright (c) 2022 Zope Foundation and Contributors. # All Rights Reserved. # # This software is subject to the provisions of the Zope Public License, # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # FOR A PARTICULAR PURPOSE # ############################################################################## import struct import unittest try: import zodbpickle._pickle # noqa: try to load a C module for side effects except ModuleNotFoundError: # pragma: no cover pass class TestFloatingPoint(unittest.TestCase): def test_no_fast_math_optimization(self): # Building with -Ofast enables -ffast-math, which sets certain FPU # flags that can cause breakage elsewhere. A library such as BTrees # has no business changing global FPU flags for the entire process. zero_bits = struct.unpack("!Q", struct.pack("!d", 0.0))[0] next_up = zero_bits + 1 smallest_subnormal = struct.unpack("!d", struct.pack("!Q", next_up))[0] self.assertNotEqual(smallest_subnormal, 0.0) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle/tests/test_pickle.py0000644000076600000240000000227114753071602022156 0ustar00m.howitzstaffimport sys import types import unittest from . import _is_pure from . import _is_pypy if _is_pure or _is_pypy: function_type = types.FunctionType else: function_type = types.BuiltinFunctionType del sys del _is_pypy del _is_pure class TestImportability(unittest.TestCase): def test_Pickler(self): from zodbpickle.pickle import Pickler self.assertIsInstance(Pickler, object) def test_Unpickler(self): from zodbpickle.pickle import Unpickler self.assertIsInstance(Unpickler, object) def test_load(self): from zodbpickle.pickle import load self.assertIsInstance(load, function_type) def test_loads(self): from zodbpickle.pickle import loads self.assertIsInstance(loads, function_type) def test_dump(self): from zodbpickle.pickle import dump self.assertIsInstance(dump, function_type) def test_dumps(self): from zodbpickle.pickle import dumps self.assertIsInstance(dumps, function_type) def test_suite(): from .pickle_3_tests import test_suite return unittest.TestSuite([ test_suite(), unittest.defaultTestLoader.loadTestsFromName(__name__), ]) ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1739355010.8956428 zodbpickle-4.2/src/zodbpickle.egg-info/0000755000076600000240000000000014753071603017625 5ustar00m.howitzstaff././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle.egg-info/PKG-INFO0000644000076600000240000003247214753071602020731 0ustar00m.howitzstaffMetadata-Version: 2.1 Name: zodbpickle Version: 4.2 Summary: Fork of Python 3 pickle module. Home-page: https://github.com/zopefoundation/zodbpickle Author: Python and Zope Foundation Author-email: zodb-dev@zope.dev License: PSFL 2 and ZPL-2.1 Keywords: zodb pickle Platform: any Classifier: Development Status :: 5 - Production/Stable Classifier: License :: OSI Approved :: Zope Public License Classifier: License :: OSI Approved :: Python Software Foundation License Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Framework :: ZODB Classifier: Topic :: Database Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Operating System :: Microsoft :: Windows Classifier: Operating System :: Unix Classifier: Operating System :: MacOS :: MacOS X Requires-Python: >=3.9 License-File: LICENSE.txt Provides-Extra: test Requires-Dist: zope.testrunner; extra == "test" Provides-Extra: docs Requires-Dist: Sphinx; extra == "docs" ``zodbpickle`` README ===================== .. image:: https://github.com/zopefoundation/zodbpickle/actions/workflows/tests.yml/badge.svg :target: https://github.com/zopefoundation/zodbpickle/actions/workflows/tests.yml .. image:: https://coveralls.io/repos/github/zopefoundation/zodbpickle/badge.svg :target: https://coveralls.io/github/zopefoundation/zodbpickle :alt: Coverage status .. image:: https://img.shields.io/pypi/v/zodbpickle.svg :target: https://pypi.python.org/pypi/zodbpickle :alt: PyPI .. image:: https://img.shields.io/pypi/pyversions/zodbpickle.svg :target: https://pypi.python.org/pypi/zodbpickle :alt: Python versions This package presents a uniform pickling interface for ZODB: - Under Python2, this package forks both Python 2.7's ``pickle`` and ``cPickle`` modules, adding support for the ``protocol 3`` opcodes. It also provides a new subclass of ``bytes``, ``zodbpickle.binary``, which Python2 applications can use to pickle binary values such that they will be unpickled as ``bytes`` under Py3k. - Under Py3k, this package forks the ``pickle`` module (and the supporting C extension) from both Python 3.2 and Python 3.3. The fork add support for the ``noload`` operations used by ZODB. Caution ------- ``zodbpickle`` relies on Python's ``pickle`` module. The ``pickle`` module is not intended to be secure against erroneous or maliciously constructed data. Never unpickle data received from an untrusted or unauthenticated source as arbitrary code might be executed. Also see https://docs.python.org/3.6/library/pickle.html General Usage ------------- To get compatibility between Python 2 and 3 pickling, replace:: import pickle by:: from zodbpickle import pickle This provides compatibility, but has the effect that you get the fast implementation in Python 3, while Python 2 uses the slow version. To get a more deterministic choice of the implementation, use one of:: from zodbpickle import fastpickle # always C from zodbpickle import slowpickle # always Python Both modules can co-exist which is helpful for comparison. But there is a bit more to consider, so please read on! Loading/Storing Python 2 Strings -------------------------------- In all their wisdom, the Python developers have decided that Python 2 ``str`` instances should be loaded as Python 3 ``str`` objects (i.e. unicode strings). Patches were proposed in Python `issue 6784`__ but were never applied. This code base contains those patches. .. __: http://bugs.python.org/issue6784 Example 1: Loading Python 2 pickles on Python 3 :: $ python2 >>> import pickle >>> pickle.dumps('\xff', protocol=0) "S'\\xff'\np0\n." >>> pickle.dumps('\xff', protocol=1) 'U\x01\xffq\x00.' >>> pickle.dumps('\xff', protocol=2) '\x80\x02U\x01\xffq\x00.' $ python3 >>> from zodbpickle import pickle >>> pickle.loads(b"S'\\xff'\np0\n.", encoding='bytes') b'\xff' >>> pickle.loads(b'U\x01\xffq\x00.', encoding='bytes') b'\xff' >>> pickle.loads(b'\x80\x02U\x01\xffq\x00.', encoding='bytes') b'\xff' Example 2: Loading Python 3 pickles on Python 2 :: $ python3 >>> from zodbpickle import pickle >>> pickle.dumps(b"\xff", protocol=0) b'c_codecs\nencode\np0\n(V\xff\np1\nVlatin1\np2\ntp3\nRp4\n.' >>> pickle.dumps(b"\xff", protocol=1) b'c_codecs\nencode\nq\x00(X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02tq\x03Rq\x04.' >>> pickle.dumps(b"\xff", protocol=2) b'\x80\x02c_codecs\nencode\nq\x00X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02\x86q\x03Rq\x04.' $ python2 >>> import pickle >>> pickle.loads('c_codecs\nencode\np0\n(V\xff\np1\nVlatin1\np2\ntp3\nRp4\n.') '\xff' >>> pickle.loads('c_codecs\nencode\nq\x00(X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02tq\x03Rq\x04.') '\xff' >>> pickle.loads('\x80\x02c_codecs\nencode\nq\x00X\x02\x00\x00\x00\xc3\xbfq\x01X\x06\x00\x00\x00latin1q\x02\x86q\x03Rq\x04.') '\xff' Example 3: everything breaks down :: $ python2 >>> class Foo(object): ... def __init__(self): ... self.x = 'hello' ... >>> import pickle >>> pickle.dumps(Foo(), protocol=0) "ccopy_reg\n_reconstructor\np0\n(c__main__\nFoo\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'x'\np6\nS'hello'\np7\nsb." >>> pickle.dumps(Foo(), protocol=1) 'ccopy_reg\n_reconstructor\nq\x00(c__main__\nFoo\nq\x01c__builtin__\nobject\nq\x02Ntq\x03Rq\x04}q\x05U\x01xq\x06U\x05helloq\x07sb.' >>> pickle.dumps(Foo(), protocol=2) '\x80\x02c__main__\nFoo\nq\x00)\x81q\x01}q\x02U\x01xq\x03U\x05helloq\x04sb.' $ python3 >>> from zodbpickle import pickle >>> class Foo(object): pass ... >>> foo = pickle.loads("ccopy_reg\n_reconstructor\np0\n(c__main__\nFoo\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'x'\np6\nS'hello'\np7\nsb.", encoding='bytes') >>> foo.x Traceback (most recent call last): File "", line 1, in AttributeError: 'Foo' object has no attribute 'x' wait what? :: >>> foo.__dict__ {b'x': b'hello'} oooh. So we use ``encoding='ASCII'`` (the default) and ``errors='bytes'`` and hope it works:: >>> foo = pickle.loads("ccopy_reg\n_reconstructor\np0\n(c__main__\nFoo\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'x'\np6\nS'hello'\np7\nsb.", errors='bytes') >>> foo.x 'hello' falling back to bytes if necessary :: >>> pickle.loads(b'\x80\x02U\x01\xffq\x00.', errors='bytes') b'\xff' Support for ``noload()`` ------------------------ The ZODB uses `cPickle`'s ``noload()`` method to retrieve all persistent references from a pickle without loading any objects. This feature was removed from Python 3's pickle. Unfortuantely, this unnecessarily fills the pickle cache. This module provides a ``noload()`` method again. =========== Changelog =========== 4.2 (2025-02-12) ================ - Drop support for Python 3.8. - Add preliminary support for Python 3.14 as of 3.14a4. - Remove unused ``setuptools`` install requirement. 4.1.1 (2024-10-02) ================== - Fix ``NameError`` which occurred when importing ``zodbpickle.fastpickle``. 4.1 (2024-09-17) ================ - Add final support for Python 3.13. 4.0 (2024-05-30) ================ - Drop support for Python 3.7. 3.3 (2024-04-16) ================ - Build Windows wheels on GHA. - Add preliminary support for Python 3.13 as of 3.13a5. 3.2 (2024-02-16) ================ - Add preliminary support for Python 3.13 as of 3.13a3. 3.1 (2023-10-05) ================ - Add support for Python 3.12. 3.0.1 (2023-03-28) ================== - Fix ``NameError`` in ``.fastpickle`` and ``.slowpickle``. 3.0 (2023-03-24) ================ - Build Linux binary wheels for Python 3.11. - Add preliminary support for Python 3.12a5. - Drop support for Python 2.7, 3.5, 3.6. - Drop support for deprecated ``python setup.py test``. 2.6 (2022-11-17) ================ - Add support for building arm64 wheels on macOS. 2.5 (2022-11-03) ================ - Add support for the final Python 3.11 release. 2.4 (2022-09-15) ================ - Add support for Python 3.11 (as of 3.11.0b3). - Disable unsafe math optimizations in C code. See `pull request 73 `_. 2.3 (2022-04-22) ================ - Add support for Python 3.11 (as of 3.11.0a7). 2.2.0 (2021-09-29) ================== - Add support for Python 3.10. 2.1.0 (2021-09-24) ================== - Add support for Python 3.9. 2.0.0 (2019-11-13) ================== - CPython 2: Make ``zodbpickle.binary`` objects smaller and untracked by the garbage collector. Now they behave more like the native bytes object. Just like it, and just like on Python 3, they cannot have arbitrary attributes or be weakly referenced. See `issue 53 `_. 1.1 (2019-11-09) ================ - Add support for Python 3.8. - Drop support for Python 3.4. 1.0.4 (2019-06-12) ================== - Fix pickle corruption under certain conditions. See `pull request 47 `_. 1.0.3 (2018-12-18) ================== - Fix a bug: zodbpickle.slowpickle assigned `_Pickler` to `Unpickler`. 1.0.2 (2018-08-10) ================== - Add support for Python 3.7. 1.0.1 (2018-05-16) ================== - Fix a memory leak in pickle protocol 3 under Python 2. See `issue 36 `_. 1.0 (2018-02-09) ================ - Add a warning to the readme not to use untrusted pickles. - Drop support for Python 3.3. 0.7.0 (2017-09-22) ================== - Drop support for Python 2.6 and 3.2. - Add support for Jython 2.7. - Add support for Python 3.5 and 3.6. 0.6.0 (2015-04-02) ================== - Restore the ``noload`` behaviour from Python 2.6 and provide the ``noload`` method on the non-C-accelerated unpicklers under PyPy and Python 2. - Add support for PyPy, PyPy3, and Python 3.4. 0.5.2 (2013-08-17) ================== - Import accelerator from *our* extension module under Py3k. See https://github.com/zopefoundation/zodbpickle/issues/6, https://github.com/zopefoundation/zodbpickle/issues/7. - Fix unpickler's ``load_short_binstring`` across supported platforms. 0.5.1 (2013-07-06) ================== - Update all code and tests to Python 2.6.8, 2.7.5, 3.2.5, 3.3.2 . - Add the modules ``zodbpickle.fastpickle`` and ``zodbpickle.slowpickle``. This provides a version-independent choice of the C or Python implementation. - Fix a minor bug on OS X 0.5.0 (2013-06-14) ================== - Removed support for the ``bytes_as_strings`` arguments to pickling APIs: the pickles created when that argument was true might not be unpickled without passing ``encoding='bytes'``, which ZODB couldn't reliably enforce. On Py3k, ZODB will be using ``protocol=3`` pickles anyway. 0.4.4 (2013-06-07) ================== - Add protocol 3 opcodes to the C version of the ``noload()`` dispatcher. 0.4.3 (2013-06-07) ================== - Packaging error: remove spurious ``-ASIDE`` file from sdist. 0.4.2 (2013-06-07) ================== - Fix NameError in pure-Python version of ``Unpickler.noload_appends``. - Fix NameError in pure-Python version of ``Unpickler.noload_setitems``. 0.4.1 (2013-04-29) ================== - Fix typo in Python2 version of ``zodbpickle.pickle`` module. 0.4 (2013-04-28) ================ - Support the common pickle module interface for Python 2.6, 2.7, 3.2, and 3.3. - Split the Python implementations / tests into Python2- and Py3k-specific variants. - Added a fork of the Python 2.7 ``_pickle.c``, for use under Python2. The fork adds support for the Py3k ``protocol 3`` opcodes. - Added a custom ``binary`` type for use in Python2 apps. Derived from ``bytes``, the ``binary`` type allows Python2 apps to pickle binary data using opcodes which will cause it to be unpickled as ``bytes`` on Py3k. Under Py3k, the ``binary`` type is just an alias for ``bytes``. 0.3 (2013-03-18) ================ - Added ``noload`` code to Python 3.2 version of ``Unpickler``. As with the Python 3.3 version, this code remains untested. - Added ``bytes_as_strings`` option to the Python 3.2 version of ``Pickler``, ``dump``, and ``dumps``. 0.2 (2013-03-05) ================ - Added ``bytes_as_strings`` option to ``Pickler``, ``dump``, and ``dumps``. - Incomplete support for Python 3.2: - Move ``_pickle.c`` -> ``_pickle_33.c``. - Clone Python 3.2.3's ``_pickle.c`` -> ``_pickle_32.c`` and apply the same patch. - Choose between them at build time based on ``sys.version_info``. - Disable some tests of 3.3-only features. - Missing: implementation of ``noload()`` in ``_pickle_32.c``. - Missing: implementation of ``bytes_as_strings=True`` in ``_pickle_32.c``. 0.1.0 (2013-02-27) ================== - Initial release of Python 3.3's pickle with the patches of Python `issue 6784`__ applied. .. __: http://bugs.python.org/issue6784#msg156166 - Added support for ``errors="bytes"``. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle.egg-info/SOURCES.txt0000644000076600000240000000173014753071602021511 0ustar00m.howitzstaff.manylinux-install.sh .manylinux.sh .pre-commit-config.yaml .readthedocs.yaml CHANGES.rst CONTRIBUTING.md LICENSE.txt MANIFEST.in README.rst buildout.cfg pyproject.toml setup.cfg setup.py tox.ini docs/Makefile docs/conf.py docs/index.rst docs/make.bat docs/historical/proposal.rst patches/pickle_bytes_code.diff patches/pickle_bytes_tests.diff patches/pickle_noload.patch src/zodbpickle/__init__.py src/zodbpickle/_pickle_33.c src/zodbpickle/fastpickle.py src/zodbpickle/pickle.py src/zodbpickle/pickle_3.py src/zodbpickle/pickletools_3.py src/zodbpickle/slowpickle.py src/zodbpickle.egg-info/PKG-INFO src/zodbpickle.egg-info/SOURCES.txt src/zodbpickle.egg-info/dependency_links.txt src/zodbpickle.egg-info/not-zip-safe src/zodbpickle.egg-info/requires.txt src/zodbpickle.egg-info/top_level.txt src/zodbpickle/tests/__init__.py src/zodbpickle/tests/pickle_3_tests.py src/zodbpickle/tests/pickletester_3.py src/zodbpickle/tests/test_compile_flags.py src/zodbpickle/tests/test_pickle.py././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle.egg-info/dependency_links.txt0000644000076600000240000000000114753071602023672 0ustar00m.howitzstaff ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle.egg-info/not-zip-safe0000644000076600000240000000000114753071602022052 0ustar00m.howitzstaff ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle.egg-info/requires.txt0000644000076600000240000000004714753071602022225 0ustar00m.howitzstaff [docs] Sphinx [test] zope.testrunner ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/src/zodbpickle.egg-info/top_level.txt0000644000076600000240000000001314753071602022350 0ustar00m.howitzstaffzodbpickle ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1739355010.0 zodbpickle-4.2/tox.ini0000644000076600000240000000331114753071602014526 0ustar00m.howitzstaff# Generated from: # https://github.com/zopefoundation/meta/tree/master/config/c-code [tox] minversion = 4.0 envlist = lint py39,py39-pure py310,py310-pure py311,py311-pure py312,py312-pure py313,py313-pure py314,py314-pure pypy3 docs coverage [testenv] pip_pre = py314: true deps = setuptools <= 75.6.0 setenv = pure: PURE_PYTHON=1 !pure-!pypy3: PURE_PYTHON=0 commands = zope-testrunner --test-path=src {posargs:-vc} extras = test [testenv:setuptools-latest] basepython = python3 deps = git+https://github.com/pypa/setuptools.git\#egg=setuptools [testenv:coverage] basepython = python3 allowlist_externals = mkdir deps = coverage[toml] setenv = PURE_PYTHON=1 commands = mkdir -p {toxinidir}/parts/htmlcov coverage run -m zope.testrunner --test-path=src {posargs:-vc} coverage html coverage report [testenv:release-check] description = ensure that the distribution is ready to release basepython = python3 skip_install = true deps = setuptools <= 75.6.0 twine build check-manifest check-python-versions >= 0.20.0 wheel commands_pre = commands = check-manifest check-python-versions --only setup.py,tox.ini,.github/workflows/tests.yml python -m build --sdist --no-isolation twine check dist/* [testenv:lint] description = This env runs all linters configured in .pre-commit-config.yaml basepython = python3 skip_install = true deps = pre-commit commands_pre = commands = pre-commit run --all-files --show-diff-on-failure [testenv:docs] basepython = python3 skip_install = false extras = docs commands_pre = commands = sphinx-build -b html -d docs/_build/doctrees docs docs/_build/html