zict-2.0.0/0000755000076600000240000000000013626230051013562 5ustar jbourbeaustaff00000000000000zict-2.0.0/LICENSE.txt0000644000076600000240000000272413557437000015420 0ustar jbourbeaustaff00000000000000Copyright (c) 2016 Matthew Rocklin All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: a. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. b. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. c. Neither the name of toolz nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. zict-2.0.0/MANIFEST.in0000644000076600000240000000026213557437000015326 0ustar jbourbeaustaff00000000000000recursive-include zict *.py recursive-include docs *.rst include setup.py include README.rst include LICENSE.txt include MANIFEST.in include requirements.txt prune docs/_build zict-2.0.0/PKG-INFO0000644000076600000240000000144113626230051014657 0ustar jbourbeaustaff00000000000000Metadata-Version: 1.2 Name: zict Version: 2.0.0 Summary: Mutable mapping tools Home-page: http://zict.readthedocs.io/en/latest/ Maintainer: Matthew Rocklin Maintainer-email: mrocklin@gmail.com License: BSD Description: Zict ==== |Build Status| Mutable Mapping interfaces. See documentation_. .. _documentation: http://zict.readthedocs.io/en/latest/ .. |Build Status| image:: https://travis-ci.org/dask/zict.svg?branch=master :target: https://travis-ci.org/dask/zict Keywords: mutable mapping,dict,dask Platform: UNKNOWN Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 zict-2.0.0/README.rst0000644000076600000240000000037613557437000015265 0ustar jbourbeaustaff00000000000000Zict ==== |Build Status| Mutable Mapping interfaces. See documentation_. .. _documentation: http://zict.readthedocs.io/en/latest/ .. |Build Status| image:: https://travis-ci.org/dask/zict.svg?branch=master :target: https://travis-ci.org/dask/zict zict-2.0.0/requirements.txt0000644000076600000240000000001113557437000017044 0ustar jbourbeaustaff00000000000000heapdict zict-2.0.0/setup.cfg0000644000076600000240000000034713626230051015407 0ustar jbourbeaustaff00000000000000[flake8] exclude = __init__.py max-line-length = 120 ignore = E731, # Assigning lambda expression E741 # Ambiguous variable names [tool:pytest] addopts = -v --doctest-modules [egg_info] tag_build = tag_date = 0 zict-2.0.0/setup.py0000755000076600000240000000145213626227714015315 0ustar jbourbeaustaff00000000000000#!/usr/bin/env python import os from setuptools import setup setup(name='zict', version='2.0.0', description='Mutable mapping tools', url='http://zict.readthedocs.io/en/latest/', maintainer='Matthew Rocklin', maintainer_email='mrocklin@gmail.com', license='BSD', keywords='mutable mapping,dict,dask', packages=['zict'], install_requires=open('requirements.txt').read().strip().split('\n'), long_description=(open('README.rst').read() if os.path.exists('README.rst') else ''), classifiers=[ "Programming Language :: Python", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", ], zip_safe=False) zict-2.0.0/zict/0000755000076600000240000000000013626230051014533 5ustar jbourbeaustaff00000000000000zict-2.0.0/zict/__init__.py0000644000076600000240000000027213626227714016661 0ustar jbourbeaustaff00000000000000from .zip import Zip from .file import File from .func import Func from .lru import LRU from .buffer import Buffer from .sieve import Sieve from .lmdb import LMDB __version__ = "2.0.0" zict-2.0.0/zict/buffer.py0000644000076600000240000000735713626026221016374 0ustar jbourbeaustaff00000000000000from itertools import chain from .common import ZictBase, close from .lru import LRU class Buffer(ZictBase): """ Buffer one dictionary on top of another This creates a MutableMapping by combining two MutableMappings, one that feeds into the other when it overflows, based on an LRU mechanism. When the first evicts elements these get placed into the second. When an item is retrieved from the second it is placed back into the first. Parameters ---------- fast: MutableMapping slow: MutableMapping fast_to_slow_callbacks: list of callables These functions run every time data moves from the fast to the slow mapping. They take two arguments, a key and a value slow_to_fast_callbacks: list of callables These functions run every time data moves form the slow to the fast mapping. Examples -------- >>> fast = dict() >>> slow = Func(dumps, loads, File('storage/')) # doctest: +SKIP >>> def weight(k, v): ... return sys.getsizeof(v) >>> buff = Buffer(fast, slow, 1e8, weight=weight) # doctest: +SKIP See Also -------- LRU """ def __init__( self, fast, slow, n, weight=lambda k, v: 1, fast_to_slow_callbacks=None, slow_to_fast_callbacks=None, ): self.fast = LRU(n, fast, weight=weight, on_evict=[self.fast_to_slow]) self.slow = slow self.n = n self.weight = weight if callable(fast_to_slow_callbacks): fast_to_slow_callbacks = [fast_to_slow_callbacks] if callable(slow_to_fast_callbacks): slow_to_fast_callbacks = [slow_to_fast_callbacks] self.fast_to_slow_callbacks = fast_to_slow_callbacks or [] self.slow_to_fast_callbacks = slow_to_fast_callbacks or [] def fast_to_slow(self, key, value): self.slow[key] = value for cb in self.fast_to_slow_callbacks: cb(key, value) def slow_to_fast(self, key): value = self.slow[key] # Avoid useless movement for heavy values if self.weight(key, value) <= self.n: del self.slow[key] self.fast[key] = value for cb in self.slow_to_fast_callbacks: cb(key, value) return value def __getitem__(self, key): if key in self.fast: return self.fast[key] elif key in self.slow: return self.slow_to_fast(key) else: raise KeyError(key) def __setitem__(self, key, value): # Avoid useless movement for heavy values if self.weight(key, value) <= self.n: if key in self.slow: del self.slow[key] self.fast[key] = value else: if key in self.fast: del self.fast[key] self.slow[key] = value def __delitem__(self, key): if key in self.fast: del self.fast[key] elif key in self.slow: del self.slow[key] else: raise KeyError(key) def keys(self): return chain(self.fast.keys(), self.slow.keys()) def values(self): return chain(self.fast.values(), self.slow.values()) def items(self): return chain(self.fast.items(), self.slow.items()) def __len__(self): return len(self.fast) + len(self.slow) def __iter__(self): return chain(iter(self.fast), iter(self.slow)) def __contains__(self, key): return key in self.fast or key in self.slow def __str__(self): return "Buffer<%s, %s>" % (str(self.fast), str(self.slow)) __repr__ = __str__ def flush(self): self.fast.flush() self.slow.flush() def close(self): close(self.fast) close(self.slow) zict-2.0.0/zict/common.py0000644000076600000240000000261313626026221016401 0ustar jbourbeaustaff00000000000000try: from collections.abc import Mapping, MutableMapping except ImportError: from collections import Mapping, MutableMapping class ZictBase(MutableMapping): """ Base class for zict mappings. """ def update(*args, **kwds): # Boilerplate for implementing an update() method if not args: raise TypeError( "descriptor 'update' of MutableMapping object " "needs an argument" ) self = args[0] args = args[1:] if len(args) > 1: raise TypeError("update expected at most 1 arguments, got %d" % len(args)) items = [] if args: other = args[0] if isinstance(other, Mapping) or hasattr(other, "items"): items += other.items() else: # Assuming (key, value) pairs items += other if kwds: items += kwds.items() self._do_update(items) def _do_update(self, items): # Default implementation, can be overriden for speed for k, v in items: self[k] = v def close(self): """ Release any system resources held by this object. """ def __enter__(self): return self def __exit__(self, *args): self.close() def close(z): """ Close *z* if possible. """ if hasattr(z, "close"): z.close() zict-2.0.0/zict/file.py0000644000076600000240000000476513626026221016042 0ustar jbourbeaustaff00000000000000import os from urllib.parse import quote, unquote from .common import ZictBase def _safe_key(key): """ Escape key so as to be usable on all filesystems. """ # Even directory separators are unsafe. return quote(key, safe="") def _unsafe_key(key): """ Undo the escaping done by _safe_key(). """ return unquote(key) class File(ZictBase): """ Mutable Mapping interface to a directory Keys must be strings, values must be bytes Note this shouldn't be used for interprocess persistence, as keys are cached in memory. Parameters ---------- directory: string mode: string, ('r', 'w', 'a'), defaults to 'a' Examples -------- >>> z = File('myfile') # doctest: +SKIP >>> z['x'] = b'123' # doctest: +SKIP >>> z['x'] # doctest: +SKIP b'123' Also supports writing lists of bytes objects >>> z['y'] = [b'123', b'4567'] # doctest: +SKIP >>> z['y'] # doctest: +SKIP b'1234567' Or anything that can be used with file.write, like a memoryview >>> z['data'] = np.ones(5).data # doctest: +SKIP """ def __init__(self, directory, mode="a"): self.directory = directory self.mode = mode self._keys = set() if not os.path.exists(self.directory): os.makedirs(self.directory, exist_ok=True) else: for n in os.listdir(self.directory): self._keys.add(_unsafe_key(n)) def __str__(self): return '' % ( self.directory, self.mode, len(self), ) __repr__ = __str__ def __getitem__(self, key): if key not in self._keys: raise KeyError(key) with open(os.path.join(self.directory, _safe_key(key)), "rb") as f: return f.read() def __setitem__(self, key, value): with open(os.path.join(self.directory, _safe_key(key)), "wb") as f: if isinstance(value, (tuple, list)): for v in value: f.write(v) else: f.write(value) self._keys.add(key) def __contains__(self, key): return key in self._keys def keys(self): return iter(self._keys) __iter__ = keys def __delitem__(self, key): if key not in self._keys: raise KeyError(key) os.remove(os.path.join(self.directory, _safe_key(key))) self._keys.remove(key) def __len__(self): return len(self._keys) zict-2.0.0/zict/func.py0000644000076600000240000000351113626026221016042 0ustar jbourbeaustaff00000000000000from .common import ZictBase, close class Func(ZictBase): """ Buffer a MutableMapping with a pair of input/output functions Parameters ---------- dump: callable Function to call on value as we set it into the mapping load: callable Function to call on value as we pull it from the mapping d: MutableMapping Examples -------- >>> def double(x): ... return x * 2 >>> def halve(x): ... return x / 2 >>> d = dict() >>> f = Func(double, halve, d) >>> f['x'] = 10 >>> d {'x': 20} >>> f['x'] 10.0 """ def __init__(self, dump, load, d): self.dump = dump self.load = load self.d = d def __getitem__(self, key): return self.load(self.d[key]) def __setitem__(self, key, value): self.d[key] = self.dump(value) def __contains__(self, key): return key in self.d def __delitem__(self, key): del self.d[key] def keys(self): return self.d.keys() def values(self): return map(self.load, self.d.values()) def items(self): return ((k, self.load(v)) for k, v in self.d.items()) def _do_update(self, items): self.d.update((k, self.dump(v)) for k, v in items) def __iter__(self): return iter(self.d) def __len__(self): return len(self.d) def __str__(self): return "%s %s>" % ( funcname(self.dump), funcname(self.load), str(self.d), ) __repr__ = __str__ def flush(self): self.d.flush() def close(self): close(self.d) def funcname(func): """Get the name of a function.""" while hasattr(func, "func"): func = func.func try: return func.__name__ except Exception: return str(func) zict-2.0.0/zict/lmdb.py0000644000076600000240000000477113626026221016036 0ustar jbourbeaustaff00000000000000import sys from .common import ZictBase def _encode_key(key): return key.encode("utf-8") def _decode_key(key): return key.decode("utf-8") class LMDB(ZictBase): """ Mutable Mapping interface to a LMDB database. Keys must be strings, values must be bytes Parameters ---------- directory: string Examples -------- >>> z = LMDB('/tmp/somedir/') # doctest: +SKIP >>> z['x'] = b'123' # doctest: +SKIP >>> z['x'] # doctest: +SKIP b'123' """ def __init__(self, directory): import lmdb # map_size is the maximum database size but shouldn't fill up the # virtual address space map_size = 1 << 40 if sys.maxsize >= 2 ** 32 else 1 << 28 # writemap requires sparse file support otherwise the whole # `map_size` may be reserved up front on disk writemap = sys.platform.startswith("linux") self.db = lmdb.open( directory, subdir=True, map_size=map_size, sync=False, writemap=writemap, ) def __getitem__(self, key): with self.db.begin() as txn: value = txn.get(_encode_key(key)) if value is None: raise KeyError(key) return value def __setitem__(self, key, value): with self.db.begin(write=True) as txn: txn.put(_encode_key(key), value) def __contains__(self, key): with self.db.begin() as txn: return txn.cursor().set_key(_encode_key(key)) def items(self): cursor = self.db.begin().cursor() return ((_decode_key(k), v) for k, v in cursor.iternext(keys=True, values=True)) def keys(self): cursor = self.db.begin().cursor() return (_decode_key(k) for k in cursor.iternext(keys=True, values=False)) def values(self): cursor = self.db.begin().cursor() return cursor.iternext(keys=False, values=True) def _do_update(self, items): # Optimized version of update() using a single putmulti() call. items = [(_encode_key(k), v) for k, v in items] with self.db.begin(write=True) as txn: consumed, added = txn.cursor().putmulti(items) assert consumed == added == len(items) def __iter__(self): return self.keys() def __delitem__(self, key): with self.db.begin(write=True) as txn: if not txn.delete(_encode_key(key)): raise KeyError(key) def __len__(self): return self.db.stat()["entries"] def close(self): self.db.close() zict-2.0.0/zict/lru.py0000644000076600000240000000564613626026221015724 0ustar jbourbeaustaff00000000000000from heapdict import heapdict from .common import ZictBase, close def do_nothing(k, v): pass class LRU(ZictBase): """ Evict Least Recently Used Elements Parameters ---------- n: int Number of elements to keep, or total weight if weight= is used d: MutableMapping Dictionary in which to hold elements on_evict: list of callables Function:: k, v -> action to call on key value pairs prior to eviction weight: callable Function:: k, v -> number to determine the size of keeping the item in the mapping. Defaults to ``(k, v) -> 1`` Examples -------- >>> lru = LRU(2, dict(), on_evict=lambda k, v: print("Lost", k, v)) >>> lru['x'] = 1 >>> lru['y'] = 2 >>> lru['z'] = 3 Lost x 1 """ def __init__(self, n, d, on_evict=None, weight=lambda k, v: 1): self.d = d self.n = n self.heap = heapdict() self.i = 0 if callable(on_evict): on_evict = [on_evict] self.on_evict = on_evict or [] self.weight = weight self.total_weight = 0 self.weights = dict() def __getitem__(self, key): result = self.d[key] self.i += 1 self.heap[key] = self.i return result def __setitem__(self, key, value): if key in self.d: del self[key] weight = self.weight(key, value) if weight <= self.n: self.d[key] = value self.i += 1 self.heap[key] = self.i self.weights[key] = weight self.total_weight += weight else: for cb in self.on_evict: cb(key, value) while self.total_weight > self.n: self.evict() def evict(self): """ Evict least recently used key This is typically called from internal use, but can be externally triggered as well. Returns ------- k: key v: value w: weight """ k, priority = self.heap.popitem() weight = self.weights.pop(k) self.total_weight -= weight v = self.d.pop(k) for cb in self.on_evict: cb(k, v) return k, v, weight def __delitem__(self, key): del self.d[key] del self.heap[key] self.total_weight -= self.weights.pop(key) def keys(self): return self.d.keys() def values(self): return self.d.values() def items(self): return self.d.items() def __len__(self): return len(self.d) def __iter__(self): return iter(self.d) def __contains__(self, key): return key in self.d def __str__(self): sub = str(self.d) if not isinstance(self.d, dict) else "dict" return "" % (self.total_weight, self.n, sub) __repr__ = __str__ def flush(self): self.d.flush() def close(self): close(self.d) zict-2.0.0/zict/sieve.py0000644000076600000240000000603613626026221016227 0ustar jbourbeaustaff00000000000000from collections import defaultdict from itertools import chain from .common import ZictBase, close class Sieve(ZictBase): """ Store values in different mappings based on a selector's output. This creates a MutableMapping combining several underlying MutableMappings for storage. Items are dispatched based on a selector function provided by the user. Parameters ---------- mappings: dict of {mapping key: MutableMapping} selector: callable (key, value) -> mapping key Examples -------- >>> small = {} >>> large = DataBase() # doctest: +SKIP >>> mappings = {True: small, False: large} # doctest: +SKIP >>> def is_small(key, value): # doctest: +SKIP return sys.getsizeof(value) < 10000 >>> d = Sieve(mappings, is_small) # doctest: +SKIP See Also -------- Buffer """ def __init__(self, mappings, selector): self.mappings = mappings self.selector = selector self.key_to_mapping = {} def __getitem__(self, key): return self.key_to_mapping[key][key] def __setitem__(self, key, value): old_mapping = self.key_to_mapping.get(key) mapping = self.mappings[self.selector(key, value)] if old_mapping is not None and old_mapping is not mapping: del old_mapping[key] mapping[key] = value self.key_to_mapping[key] = mapping def __delitem__(self, key): del self.key_to_mapping.pop(key)[key] def _do_update(self, items): # Optimized update() implementation issuing a single update() # call per underlying mapping. updates = defaultdict(list) mapping_ids = dict((id(m), m) for m in self.mappings.values()) for key, value in items: old_mapping = self.key_to_mapping.get(key) mapping = self.mappings[self.selector(key, value)] if old_mapping is not None and old_mapping is not mapping: del old_mapping[key] # Can't hash a mutable mapping, so use its id() instead updates[id(mapping)].append((key, value)) for mid, mitems in updates.items(): mapping = mapping_ids[mid] mapping.update(mitems) for key, _ in mitems: self.key_to_mapping[key] = mapping def keys(self): return chain.from_iterable(self.mappings.values()) def values(self): return chain.from_iterable(m.values() for m in self.mappings.values()) def items(self): return chain.from_iterable(m.items() for m in self.mappings.values()) def __len__(self): return sum(map(len, self.mappings.values())) __iter__ = keys def __contains__(self, key): return key in self.key_to_mapping def __str__(self): return "Sieve<%s>" % (str(self.mappings),) __repr__ = __str__ def flush(self): for m in self.mappings.values(): m.flush() def close(self): for m in self.mappings.values(): close(m) zict-2.0.0/zict/tests/0000755000076600000240000000000013626230051015675 5ustar jbourbeaustaff00000000000000zict-2.0.0/zict/tests/__init__.py0000644000076600000240000000000013557437000020002 0ustar jbourbeaustaff00000000000000zict-2.0.0/zict/tests/test_buffer.py0000644000076600000240000000537113626026221020567 0ustar jbourbeaustaff00000000000000from zict import Buffer from . import utils_test def test_simple(): a = dict() b = dict() buff = Buffer(a, b, n=10, weight=lambda k, v: v) buff["x"] = 1 buff["y"] = 2 assert buff["x"] == 1 assert buff["y"] == 2 assert a == {"x": 1, "y": 2} assert buff.fast.total_weight == 3 buff["z"] = 8 assert a == {"y": 2, "z": 8} assert b == {"x": 1} assert buff["x"] == 1 assert a == {"x": 1, "z": 8} assert b == {"y": 2} assert "x" in buff assert "y" in buff assert "missing" not in buff buff["y"] = 1 assert a == {"x": 1, "y": 1, "z": 8} assert buff.fast.total_weight == 10 assert b == {} del buff["z"] assert a == {"x": 1, "y": 1} assert buff.fast.total_weight == 2 assert b == {} del buff["y"] assert a == {"x": 1} assert buff.fast.total_weight == 1 assert b == {} assert "y" not in buff buff["a"] = 5 assert set(buff) == set(buff.keys()) == {"a", "x"} fast_keys = set(buff.fast) slow_keys = set(buff.slow) assert not (fast_keys & slow_keys) assert fast_keys | slow_keys == set(buff) # Overweight element stays in slow mapping buff["b"] = 1000 assert "b" in buff.slow assert set(buff.fast) == fast_keys assert set(buff.slow) == {"b"} | slow_keys assert "b" in buff assert buff["b"] == 1000 def test_setitem_avoid_fast_slow_duplicate(): a = dict() b = dict() buff = Buffer(a, b, n=10, weight=lambda k, v: v) for first, second in [(1, 12), (12, 1)]: buff["a"] = first assert buff["a"] == first buff["a"] = second assert buff["a"] == second fast_keys = set(buff.fast) slow_keys = set(buff.slow) assert not (fast_keys & slow_keys) assert fast_keys | slow_keys == set(buff) del buff["a"] assert "a" not in buff assert "a" not in a assert "a" not in b def test_mapping(): """ Test mapping interface for Buffer(). """ a = {} b = {} buff = Buffer(a, b, n=2) utils_test.check_mapping(buff) utils_test.check_closing(buff) def test_callbacks(): f2s = [] def f2s_cb(k, v): f2s.append(k) s2f = [] def s2f_cb(k, v): s2f.append(k) a = dict() b = dict() buff = Buffer( a, b, n=10, weight=lambda k, v: v, fast_to_slow_callbacks=f2s_cb, slow_to_fast_callbacks=s2f_cb, ) buff["x"] = 1 buff["y"] = 2 assert buff["x"] == 1 assert buff["y"] == 2 assert not f2s assert not s2f buff["z"] = 8 assert f2s == ["x"] assert s2f == [] buff["z"] assert f2s == ["x"] assert s2f == [] buff["x"] assert f2s == ["x", "y"] assert s2f == ["x"] zict-2.0.0/zict/tests/test_file.py0000644000076600000240000000437713626026221020242 0ustar jbourbeaustaff00000000000000import os import shutil import pytest from zict.file import File from . import utils_test @pytest.yield_fixture def fn(): filename = ".tmp" if os.path.exists(filename): shutil.rmtree(filename) yield filename if os.path.exists(filename): shutil.rmtree(filename) def test_mapping(fn): """ Test mapping interface for File(). """ z = File(fn) utils_test.check_mapping(z) def test_implementation(fn): z = File(fn) assert not z z["x"] = b"123" assert os.listdir(fn) == ["x"] with open(os.path.join(fn, "x"), "rb") as f: assert f.read() == b"123" assert "x" in z def test_str(fn): z = File(fn) assert fn in str(z) assert fn in repr(z) assert z.mode in str(z) assert z.mode in repr(z) def test_setitem_typeerror(fn): z = File(fn) with pytest.raises(TypeError): z["x"] = 123 def test_contextmanager(fn): with File(fn) as z: z["x"] = b"123" with open(os.path.join(fn, "x"), "rb") as f: assert f.read() == b"123" def test_delitem(fn): z = File(fn) z["x"] = b"123" assert os.path.exists(os.path.join(z.directory, "x")) del z["x"] assert not os.path.exists(os.path.join(z.directory, "x")) def test_missing_key(fn): z = File(fn) with pytest.raises(KeyError): z["x"] def test_arbitrary_chars(fn): z = File(fn) # Avoid hitting the Windows max filename length chunk = 16 for i in range(1, 128, chunk): key = "".join(["foo_"] + [chr(i) for i in range(i, min(128, i + chunk))]) with pytest.raises(KeyError): z[key] z[key] = b"foo" assert z[key] == b"foo" assert list(z) == [key] assert list(z.keys()) == [key] assert list(z.items()) == [(key, b"foo")] assert list(z.values()) == [b"foo"] zz = File(fn) assert zz[key] == b"foo" assert list(zz) == [key] assert list(zz.keys()) == [key] assert list(zz.items()) == [(key, b"foo")] assert list(zz.values()) == [b"foo"] del zz del z[key] with pytest.raises(KeyError): z[key] def test_write_list_of_bytes(fn): z = File(fn) z["x"] = [b"123", b"4567"] assert z["x"] == b"1234567" zict-2.0.0/zict/tests/test_func.py0000644000076600000240000000143413626026221020245 0ustar jbourbeaustaff00000000000000from zict import Func from . import utils_test def inc(x): return x + 1 def dec(x): return x - 1 def rotl(x): return x[1:] + x[:1] def rotr(x): return x[-1:] + x[:-1] def test_simple(): d = dict() f = Func(inc, dec, d) f["x"] = 10 assert f["x"] == 10 assert d["x"] == 11 assert "x" in f assert list(f) == ["x"] assert list(f.values()) == [10] assert list(f.items()) == [("x", 10)] assert all(s in str(f) for s in ["inc", "dec", "x", "Func"]) assert all(s in repr(f) for s in ["inc", "dec", "x", "Func"]) del f["x"] assert "x" not in d def test_mapping(): """ Test mapping interface for Func(). """ d = {} z = Func(rotl, rotr, d) utils_test.check_mapping(z) utils_test.check_closing(z) zict-2.0.0/zict/tests/test_lmdb.py0000644000076600000240000000216213626026221020227 0ustar jbourbeaustaff00000000000000import gc import os import shutil import tempfile import pytest from zict.lmdb import LMDB from . import utils_test @pytest.yield_fixture def fn(): dirname = tempfile.mkdtemp(prefix="test_lmdb-") try: yield dirname finally: if os.path.exists(dirname): shutil.rmtree(dirname) def test_mapping(fn): """ Test mapping interface for LMDB(). """ z = LMDB(fn) utils_test.check_mapping(z) def test_reuse(fn): """ Test persistence of a LMDB() mapping. """ with LMDB(fn) as z: assert len(z) == 0 z["abc"] = b"123" with LMDB(fn) as z: assert len(z) == 1 assert z["abc"] == b"123" def test_creates_dir(fn): with LMDB(fn): assert os.path.isdir(fn) def test_file_descriptors_dont_leak(fn): psutil = pytest.importorskip("psutil") proc = psutil.Process() before = proc.num_fds() z = LMDB(fn) del z gc.collect() assert proc.num_fds() == before z = LMDB(fn) z.close() assert proc.num_fds() == before with LMDB(fn) as z: pass assert proc.num_fds() == before zict-2.0.0/zict/tests/test_lru.py0000644000076600000240000000423713626026221020120 0ustar jbourbeaustaff00000000000000from zict import LRU from . import utils_test def test_simple(): d = dict() lru = LRU(2, d) lru["x"] = 1 lru["y"] = 2 assert lru["x"] == 1 assert lru["y"] == 2 assert d == {"x": 1, "y": 2} lru["z"] = 3 assert len(d) == 2 assert len(lru) == 2 assert "z" in d assert "z" in lru assert "x" not in d assert "y" in d del lru["y"] assert "y" not in d assert "y" not in lru lru["a"] = 5 assert set(lru.keys()) == set(["z", "a"]) def test_str(): d = dict() lru = LRU(2, d) lru["x"] = 1 lru["y"] = 2 assert str(lru.total_weight) in str(lru) assert str(lru.total_weight) in repr(lru) assert str(lru.n) in str(lru) assert str(lru.n) in repr(lru) assert "dict" in str(lru) assert "dict" in repr(lru) def test_mapping(): """ Test mapping interface for LRU(). """ d = {} # 100 is more than the max length when running check_mapping() lru = LRU(100, d) utils_test.check_mapping(lru) utils_test.check_closing(lru) def test_overwrite(): d = dict() lru = LRU(2, d) lru["x"] = 1 lru["y"] = 2 lru["y"] = 3 assert set(lru) == {"x", "y"} lru.update({"y": 4}) assert set(lru) == {"x", "y"} def test_callbacks(): count = [0] def cb(k, v): count[0] += 1 L = list() d = dict() lru = LRU(2, d, on_evict=[lambda k, v: L.append((k, v)), cb]) lru["x"] = 1 lru["y"] = 2 lru["z"] = 3 assert L == [("x", 1)] assert count[0] == len(L) def test_weight(): d = dict() weight = lambda k, v: v lru = LRU(10, d, weight=weight) lru["x"] = 5 assert lru.total_weight == 5 lru["y"] = 4 assert lru.total_weight == 9 lru["z"] = 3 assert d == {"y": 4, "z": 3} assert lru.total_weight == 7 del lru["z"] assert lru.total_weight == 4 lru["a"] = 10000 assert "a" not in lru assert d == {"y": 4} def test_explicit_evict(): d = dict() lru = LRU(10, d) lru["x"] = 1 lru["y"] = 2 assert set(d) == {"x", "y"} k, v, w = lru.evict() assert set(d) == {"y"} assert k == "x" assert v == 1 assert w == 1 zict-2.0.0/zict/tests/test_sieve.py0000644000076600000240000000330213626026221020421 0ustar jbourbeaustaff00000000000000from zict import Sieve from . import utils_test def test_simple(): a = {} b = {} c = {} def selector(k, v): return len(v) % 3 mappings = {0: a, 1: b, 2: c} d = Sieve(mappings, selector) assert len(d) == 0 d["u"] = b"the" d["v"] = b"big" d["w"] = b"brown" d["x"] = b"fox" d["y"] = b"jumps" d["z"] = b"over" assert d["u"] == b"the" assert d["v"] == b"big" assert len(d) == 6 assert sorted(d) == ["u", "v", "w", "x", "y", "z"] assert sorted(d.keys()) == ["u", "v", "w", "x", "y", "z"] assert sorted(d.values()) == sorted( [b"the", b"big", b"brown", b"fox", b"jumps", b"over"] ) assert a == {"u": b"the", "v": b"big", "x": b"fox"} assert b == {"z": b"over"} assert c == {"w": b"brown", "y": b"jumps"} # Changing existing keys can move values from one mapping to another. d["w"] = b"lazy" d["x"] = b"dog" assert d["w"] == b"lazy" assert d["x"] == b"dog" assert len(d) == 6 assert sorted(d.values()) == sorted( [b"the", b"big", b"lazy", b"dog", b"jumps", b"over"] ) assert a == {"u": b"the", "v": b"big", "x": b"dog"} assert b == {"w": b"lazy", "z": b"over"} assert c == {"y": b"jumps"} del d["v"] del d["w"] assert len(d) == 4 assert "v" not in d assert "w" not in d assert sorted(d.values()) == sorted([b"the", b"dog", b"jumps", b"over"]) def test_mapping(): """ Test mapping interface for Sieve(). """ a = {} b = {} def selector(key, value): return sum(bytearray(value)) & 1 mappings = {0: a, 1: b} z = Sieve(mappings, selector) utils_test.check_mapping(z) utils_test.check_closing(z) zict-2.0.0/zict/tests/test_zip.py0000644000076600000240000000312413626026221020112 0ustar jbourbeaustaff00000000000000import os import zipfile try: from collections.abc import MutableMapping except ImportError: from collections import MutableMapping import pytest from zict import Zip @pytest.yield_fixture def fn(): filename = ".tmp.zip" if os.path.exists(filename): os.remove(filename) yield filename if os.path.exists(filename): os.remove(filename) def test_simple(fn): z = Zip(fn) assert isinstance(z, MutableMapping) assert not z assert list(z) == list(z.keys()) == [] assert list(z.values()) == [] assert list(z.items()) == [] z["x"] = b"123" assert list(z) == list(z.keys()) == ["x"] assert list(z.values()) == [b"123"] assert list(z.items()) == [("x", b"123")] assert z["x"] == b"123" z.flush() zz = zipfile.ZipFile(fn, mode="r") assert zz.read("x") == b"123" z["y"] = b"456" assert z["y"] == b"456" def test_setitem_typeerror(fn): z = Zip(fn) with pytest.raises(TypeError): z["x"] = 123 def test_contextmanager(fn): with Zip(fn) as z: z["x"] = b"123" zz = zipfile.ZipFile(fn, mode="r") assert zz.read("x") == b"123" def test_missing_key(fn): z = Zip(fn) with pytest.raises(KeyError): z["x"] def test_close(fn): z = Zip(fn) z["x"] = b"123" z.close() zz = zipfile.ZipFile(fn, mode="r") assert zz.read("x") == b"123" with pytest.raises(IOError): z["y"] = b"123" def test_bytearray(fn): data = bytearray(b"123") with Zip(fn) as z: z["x"] = data with Zip(fn) as z: assert z["x"] == b"123" zict-2.0.0/zict/tests/utils_test.py0000644000076600000240000000572713626026221020463 0ustar jbourbeaustaff00000000000000import random import string try: from collections.abc import MutableMapping except ImportError: from collections import MutableMapping import pytest def generate_random_strings(n, min_len, max_len): r = random.Random(42) l = [] chars = string.ascii_lowercase + string.digits for i in range(n): nchars = r.randint(min_len, max_len) s = "".join(r.choice(chars) for _ in range(nchars)) l.append(s) return l def to_bytestring(s): if isinstance(s, bytes): return s else: return s.encode("latin1") def check_items(z, expected_items): items = list(z.items()) assert len(items) == len(expected_items) assert sorted(items) == sorted(expected_items) # All iterators should walk the mapping in the same order assert list(z.keys()) == [k for k, v in items] assert list(z.values()) == [v for k, v in items] assert list(z) == [k for k, v in items] def stress_test_mapping_updates(z): # Certain mappings shuffle between several underlying stores # during updates. This stress tests the internal mapping # consistency. r = random.Random(42) keys = list(string.ascii_lowercase) values = [to_bytestring(s) for s in generate_random_strings(len(keys), 1, 10)] z.clear() assert len(z) == 0 for k, v in zip(keys, values): z[k] = v assert len(z) == len(keys) assert sorted(z) == sorted(keys) assert sorted(z.items()) == sorted(zip(keys, values)) for i in range(3): r.shuffle(keys) r.shuffle(values) for k, v in zip(keys, values): z[k] = v check_items(z, list(zip(keys, values))) r.shuffle(keys) r.shuffle(values) z.update(zip(keys, values)) check_items(z, list(zip(keys, values))) def check_mapping(z): assert isinstance(z, MutableMapping) assert not z assert list(z) == list(z.keys()) == [] assert list(z.values()) == [] assert list(z.items()) == [] assert len(z) == 0 z["abc"] = b"456" z["xyz"] = b"12" assert len(z) == 2 assert z["abc"] == b"456" check_items(z, [("abc", b"456"), ("xyz", b"12")]) assert "abc" in z assert "xyz" in z assert "def" not in z with pytest.raises(KeyError): z["def"] z.update(xyz=b"707", uvw=b"000") check_items(z, [("abc", b"456"), ("xyz", b"707"), ("uvw", b"000")]) z.update([("xyz", b"654"), ("uvw", b"999")]) check_items(z, [("abc", b"456"), ("xyz", b"654"), ("uvw", b"999")]) z.update({"xyz": b"321"}) check_items(z, [("abc", b"456"), ("xyz", b"321"), ("uvw", b"999")]) del z["abc"] with pytest.raises(KeyError): z["abc"] with pytest.raises(KeyError): del z["abc"] assert "abc" not in z assert set(z) == {"uvw", "xyz"} assert len(z) == 2 z["def"] = b"\x00\xff" assert len(z) == 3 assert z["def"] == b"\x00\xff" assert "def" in z stress_test_mapping_updates(z) def check_closing(z): z.close() zict-2.0.0/zict/zip.py0000644000076600000240000000352213626026221015713 0ustar jbourbeaustaff00000000000000try: from collections.abc import MutableMapping except ImportError: from collections import MutableMapping import zipfile class Zip(MutableMapping): """ Mutable Mapping interface to a Zip file Keys must be strings, values must be bytes Parameters ---------- filename: string mode: string, ('r', 'w', 'a'), defaults to 'a' Examples -------- >>> z = Zip('myfile.zip') # doctest: +SKIP >>> z['x'] = b'123' # doctest: +SKIP >>> z['x'] # doctest: +SKIP b'123' >>> z.flush() # flush and write metadata to disk # doctest: +SKIP """ def __init__(self, filename, mode="a"): self.filename = filename self.mode = mode self._file = None @property def file(self): if self.mode == "closed": raise IOError("File closed") if not self._file or not self._file.fp: self._file = zipfile.ZipFile(self.filename, mode=self.mode) return self._file def __getitem__(self, key): return self.file.read(key) def __setitem__(self, key, value): self.file.writestr(key, value) def keys(self): return (zi.filename for zi in self.file.filelist) def values(self): return map(self.file.read, self.keys()) def items(self): return ((zi.filename, self.file.read(zi.filename)) for zi in self.file.filelist) def __iter__(self): return self.keys() def __delitem__(self, key): raise NotImplementedError("Not supported by stdlib zipfile") def __len__(self): return len(self.file.filelist) def flush(self): self.file.fp.flush() self.file.close() def close(self): self.flush() self.mode = "closed" def __enter__(self): return self def __exit__(self, type, value, traceback): self.close() zict-2.0.0/zict.egg-info/0000755000076600000240000000000013626230051016225 5ustar jbourbeaustaff00000000000000zict-2.0.0/zict.egg-info/PKG-INFO0000644000076600000240000000144113626230051017322 0ustar jbourbeaustaff00000000000000Metadata-Version: 1.2 Name: zict Version: 2.0.0 Summary: Mutable mapping tools Home-page: http://zict.readthedocs.io/en/latest/ Maintainer: Matthew Rocklin Maintainer-email: mrocklin@gmail.com License: BSD Description: Zict ==== |Build Status| Mutable Mapping interfaces. See documentation_. .. _documentation: http://zict.readthedocs.io/en/latest/ .. |Build Status| image:: https://travis-ci.org/dask/zict.svg?branch=master :target: https://travis-ci.org/dask/zict Keywords: mutable mapping,dict,dask Platform: UNKNOWN Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 zict-2.0.0/zict.egg-info/SOURCES.txt0000644000076600000240000000110113626230051020102 0ustar jbourbeaustaff00000000000000LICENSE.txt MANIFEST.in README.rst requirements.txt setup.cfg setup.py zict/__init__.py zict/buffer.py zict/common.py zict/file.py zict/func.py zict/lmdb.py zict/lru.py zict/sieve.py zict/zip.py zict.egg-info/PKG-INFO zict.egg-info/SOURCES.txt zict.egg-info/dependency_links.txt zict.egg-info/not-zip-safe zict.egg-info/requires.txt zict.egg-info/top_level.txt zict/tests/__init__.py zict/tests/test_buffer.py zict/tests/test_file.py zict/tests/test_func.py zict/tests/test_lmdb.py zict/tests/test_lru.py zict/tests/test_sieve.py zict/tests/test_zip.py zict/tests/utils_test.pyzict-2.0.0/zict.egg-info/dependency_links.txt0000644000076600000240000000000113626230051022273 0ustar jbourbeaustaff00000000000000 zict-2.0.0/zict.egg-info/not-zip-safe0000644000076600000240000000000113626230051020453 0ustar jbourbeaustaff00000000000000 zict-2.0.0/zict.egg-info/requires.txt0000644000076600000240000000001113626230051020615 0ustar jbourbeaustaff00000000000000heapdict zict-2.0.0/zict.egg-info/top_level.txt0000644000076600000240000000000513626230051020752 0ustar jbourbeaustaff00000000000000zict