zict-0.1.3/0000775000175000017500000000000013160466433014152 5ustar mrocklinmrocklin00000000000000zict-0.1.3/requirements.txt0000664000175000017500000000001113160466340017423 0ustar mrocklinmrocklin00000000000000heapdict zict-0.1.3/zict.egg-info/0000775000175000017500000000000013160466433016615 5ustar mrocklinmrocklin00000000000000zict-0.1.3/zict.egg-info/PKG-INFO0000664000175000017500000000113013160466433017705 0ustar mrocklinmrocklin00000000000000Metadata-Version: 1.0 Name: zict Version: 0.1.3 Summary: Mutable mapping tools Home-page: http://zict.readthedocs.io/en/latest/ Author: Matthew Rocklin Author-email: mrocklin@gmail.com License: BSD Description: Zict ==== |Build Status| Mutable Mapping interfaces. See documentation_. .. _documentation: http://zict.readthedocs.io/en/latest/ .. |Build Status| image:: https://travis-ci.org/dask/zict.svg?branch=master :target: https://travis-ci.org/dask/zict Keywords: mutable mapping,dict,dask Platform: UNKNOWN zict-0.1.3/zict.egg-info/top_level.txt0000664000175000017500000000000513160466433021342 0ustar mrocklinmrocklin00000000000000zict zict-0.1.3/zict.egg-info/SOURCES.txt0000664000175000017500000000106713160466433020505 0ustar mrocklinmrocklin00000000000000LICENSE.txt MANIFEST.in README.rst requirements.txt setup.py zict/__init__.py zict/buffer.py zict/common.py zict/file.py zict/func.py zict/lmdb.py zict/lru.py zict/sieve.py zict/zip.py zict.egg-info/PKG-INFO zict.egg-info/SOURCES.txt zict.egg-info/dependency_links.txt zict.egg-info/not-zip-safe zict.egg-info/requires.txt zict.egg-info/top_level.txt zict/tests/__init__.py zict/tests/test_buffer.py zict/tests/test_file.py zict/tests/test_func.py zict/tests/test_lmdb.py zict/tests/test_lru.py zict/tests/test_sieve.py zict/tests/test_zip.py zict/tests/utils_test.pyzict-0.1.3/zict.egg-info/not-zip-safe0000664000175000017500000000000113160466433021043 0ustar mrocklinmrocklin00000000000000 zict-0.1.3/zict.egg-info/requires.txt0000664000175000017500000000001113160466433021205 0ustar mrocklinmrocklin00000000000000heapdict zict-0.1.3/zict.egg-info/dependency_links.txt0000664000175000017500000000000113160466433022663 0ustar mrocklinmrocklin00000000000000 zict-0.1.3/PKG-INFO0000664000175000017500000000113013160466433015242 0ustar mrocklinmrocklin00000000000000Metadata-Version: 1.0 Name: zict Version: 0.1.3 Summary: Mutable mapping tools Home-page: http://zict.readthedocs.io/en/latest/ Author: Matthew Rocklin Author-email: mrocklin@gmail.com License: BSD Description: Zict ==== |Build Status| Mutable Mapping interfaces. See documentation_. .. _documentation: http://zict.readthedocs.io/en/latest/ .. |Build Status| image:: https://travis-ci.org/dask/zict.svg?branch=master :target: https://travis-ci.org/dask/zict Keywords: mutable mapping,dict,dask Platform: UNKNOWN zict-0.1.3/zict/0000775000175000017500000000000013160466433015123 5ustar mrocklinmrocklin00000000000000zict-0.1.3/zict/buffer.py0000664000175000017500000000724513020072402016736 0ustar mrocklinmrocklin00000000000000from itertools import chain from .common import ZictBase, close from .lru import LRU class Buffer(ZictBase): """ Buffer one dictionary on top of another This creates a MutableMapping by combining two MutableMappings, one that feeds into the other when it overflows, based on an LRU mechanism. When the first evicts elements these get placed into the second. When an item is retrieved from the second it is placed back into the first. Parameters ---------- fast: MutableMapping slow: MutableMapping fast_to_slow_callbacks: list of callables These functions run every time data moves from the fast to the slow mapping. They take two arguments, a key and a value slow_to_fast_callbacks: list of callables These functions run every time data moves form the slow to the fast mapping. Examples -------- >>> fast = dict() >>> slow = Func(dumps, loads, File('storage/')) # doctest: +SKIP >>> def weight(k, v): ... return sys.getsizeof(v) >>> buff = Buffer(fast, slow, 1e8, weight=weight) # doctest: +SKIP See Also -------- LRU """ def __init__(self, fast, slow, n, weight=lambda k, v: 1, fast_to_slow_callbacks=None, slow_to_fast_callbacks=None): self.fast = LRU(n, fast, weight=weight, on_evict=[self.fast_to_slow]) self.slow = slow self.n = n self.weight = weight if callable(fast_to_slow_callbacks): fast_to_slow_callbacks = [fast_to_slow_callbacks] if callable(slow_to_fast_callbacks): slow_to_fast_callbacks = [slow_to_fast_callbacks] self.fast_to_slow_callbacks = fast_to_slow_callbacks or [] self.slow_to_fast_callbacks = slow_to_fast_callbacks or [] def fast_to_slow(self, key, value): self.slow[key] = value for cb in self.fast_to_slow_callbacks: cb(key, value) def slow_to_fast(self, key): value = self.slow[key] # Avoid useless movement for heavy values if self.weight(key, value) <= self.n: del self.slow[key] self.fast[key] = value for cb in self.slow_to_fast_callbacks: cb(key, value) return value def __getitem__(self, key): if key in self.fast: return self.fast[key] elif key in self.slow: return self.slow_to_fast(key) else: raise KeyError(key) def __setitem__(self, key, value): weight = self.weight(key, value) # Avoid useless movement for heavy values if self.weight(key, value) <= self.n: if key in self.slow: del self.slow[key] self.fast[key] = value else: self.slow[key] = value def __delitem__(self, key): if key in self.fast: del self.fast[key] elif key in self.slow: del self.slow[key] else: raise KeyError(key) def keys(self): return chain(self.fast.keys(), self.slow.keys()) def values(self): return chain(self.fast.values(), self.slow.values()) def items(self): return chain(self.fast.items(), self.slow.items()) def __len__(self): return len(self.fast) + len(self.slow) def __iter__(self): return chain(iter(self.fast), iter(self.slow)) def __contains__(self, key): return key in self.fast or key in self.slow def __str__(self): return 'Buffer<%s, %s>' % (str(self.fast), str(self.slow)) __repr__ = __str__ def flush(self): self.fast.flush() self.slow.flush() def close(self): close(self.fast) close(self.slow) zict-0.1.3/zict/common.py0000664000175000017500000000262213020072402016747 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function from collections import Mapping, MutableMapping class ZictBase(MutableMapping): """ Base class for zict mappings. """ def update(*args, **kwds): # Boilerplate for implementing an update() method if not args: raise TypeError("descriptor 'update' of MutableMapping object " "needs an argument") self = args[0] args = args[1:] if len(args) > 1: raise TypeError('update expected at most 1 arguments, got %d' % len(args)) items = [] if args: other = args[0] if isinstance(other, Mapping) or hasattr(other, "items"): items += other.items() else: # Assuming (key, value) pairs items += other if kwds: items += kwds.items() self._do_update(items) def _do_update(self, items): # Default implementation, can be overriden for speed for k, v in items: self[k] = v def close(self): """ Release any system resources held by this object. """ def __enter__(self): return self def __exit__(self, *args): self.close() def close(z): """ Close *z* if possible. """ if hasattr(z, "close"): z.close() zict-0.1.3/zict/func.py0000664000175000017500000000363713020072402016421 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function from .common import ZictBase, close class Func(ZictBase): """ Buffer a MutableMapping with a pair of input/output functions Parameters ---------- dump: callable Function to call on value as we set it into the mapping load: callable Function to call on value as we pull it from the mapping d: MutableMapping Examples -------- >>> def double(x): ... return x * 2 >>> def halve(x): ... return x / 2 >>> d = dict() >>> f = Func(double, halve, d) >>> f['x'] = 10 >>> d {'x': 20} >>> f['x'] 10.0 """ def __init__(self, dump, load, d): self.dump = dump self.load = load self.d = d def __getitem__(self, key): return self.load(self.d[key]) def __setitem__(self, key, value): self.d[key] = self.dump(value) def __contains__(self, key): return key in self.d def __delitem__(self, key): del self.d[key] def keys(self): return self.d.keys() def values(self): return map(self.load, self.d.values()) def items(self): return ((k, self.load(v)) for k, v in self.d.items()) def _do_update(self, items): self.d.update((k, self.dump(v)) for k, v in items) def __iter__(self): return iter(self.d) def __len__(self): return len(self.d) def __str__(self): return '%s %s>' % (funcname(self.dump), funcname(self.load), str(self.d)) __repr__ = __str__ def flush(self): self.d.flush() def close(self): close(self.d) def funcname(func): """Get the name of a function.""" while hasattr(func, 'func'): func = func.func try: return func.__name__ except: return str(func) zict-0.1.3/zict/lru.py0000664000175000017500000000574713160466302016307 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function from heapdict import heapdict from .common import ZictBase, close def do_nothing(k, v): pass class LRU(ZictBase): """ Evict Least Recently Used Elements Parameters ---------- n: int Number of elements to keep, or total weight if weight= is used d: MutableMapping Dictionary in which to hold elements on_evict: list of callables Function:: k, v -> action to call on key value pairs prior to eviction weight: callable Function:: k, v -> number to determine the size of keeping the item in the mapping. Defaults to ``(k, v) -> 1`` Examples -------- >>> lru = LRU(2, dict(), on_evict=lambda k, v: print("Lost", k, v)) >>> lru['x'] = 1 >>> lru['y'] = 2 >>> lru['z'] = 3 Lost x 1 """ def __init__(self, n, d, on_evict=None, weight=lambda k, v: 1): self.d = d self.n = n self.heap = heapdict() self.i = 0 if callable(on_evict): on_evict = [on_evict] self.on_evict = on_evict or [] self.weight = weight self.total_weight = 0 self.weights = dict() def __getitem__(self, key): result = self.d[key] self.i += 1 self.heap[key] = self.i return result def __setitem__(self, key, value): if key in self.d: del self[key] weight = self.weight(key, value) if weight <= self.n: self.d[key] = value self.i += 1 self.heap[key] = self.i self.weights[key] = weight self.total_weight += weight else: for cb in self.on_evict: cb(key, value) while self.total_weight > self.n: self.evict() def evict(self): """ Evict least recently used key This is typically called from internal use, but can be externally triggered as well. Returns ------- k: key v: value w: weight """ k, priority = self.heap.popitem() weight = self.weights.pop(k) self.total_weight -= weight v = self.d.pop(k) for cb in self.on_evict: cb(k, v) return k, v, weight def __delitem__(self, key): del self.d[key] del self.heap[key] self.total_weight -= self.weights.pop(key) def keys(self): return self.d.keys() def values(self): return self.d.values() def items(self): return self.d.items() def __len__(self): return len(self.d) def __iter__(self): return iter(self.d) def __contains__(self, key): return key in self.d def __str__(self): sub = str(self.d) if not isinstance(self.d, dict) else 'dict' return '' % (self.total_weight, self.n, sub) __repr__ = __str__ def flush(self): self.d.flush() def close(self): close(self.d) zict-0.1.3/zict/__init__.py0000664000175000017500000000027213160466352017235 0ustar mrocklinmrocklin00000000000000from .zip import Zip from .file import File from .func import Func from .lru import LRU from .buffer import Buffer from .sieve import Sieve from .lmdb import LMDB __version__ = '0.1.3' zict-0.1.3/zict/tests/0000775000175000017500000000000013160466433016265 5ustar mrocklinmrocklin00000000000000zict-0.1.3/zict/tests/test_zip.py0000664000175000017500000000300612714371023020471 0ustar mrocklinmrocklin00000000000000from collections import MutableMapping import os import zipfile import pytest from zict import Zip @pytest.yield_fixture def fn(): filename = '.tmp.zip' if os.path.exists(filename): os.remove(filename) yield filename if os.path.exists(filename): os.remove(filename) def test_simple(fn): z = Zip(fn) assert isinstance(z, MutableMapping) assert not z assert list(z) == list(z.keys()) == [] assert list(z.values()) == [] assert list(z.items()) == [] z['x'] = b'123' assert list(z) == list(z.keys()) == ['x'] assert list(z.values()) == [b'123'] assert list(z.items()) == [('x', b'123')] assert z['x'] == b'123' z.flush() zz = zipfile.ZipFile(fn, mode='r') assert zz.read('x') == b'123' z['y'] = b'456' assert z['y'] == b'456' def test_setitem_typeerror(fn): z = Zip(fn) with pytest.raises(TypeError): z['x'] = 123 def test_contextmanager(fn): with Zip(fn) as z: z['x'] = b'123' zz = zipfile.ZipFile(fn, mode='r') assert zz.read('x') == b'123' def test_missing_key(fn): z = Zip(fn) with pytest.raises(KeyError): z['x'] def test_close(fn): z = Zip(fn) z['x'] = b'123' z.close() zz = zipfile.ZipFile(fn, mode='r') assert zz.read('x') == b'123' with pytest.raises(IOError): z['y'] = b'123' def test_bytearray(fn): data = bytearray(b'123') with Zip(fn) as z: z['x'] = data with Zip(fn) as z: assert z['x'] == b'123' zict-0.1.3/zict/tests/test_sieve.py0000664000175000017500000000350213020072402020771 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function import sys from zict import Sieve from . import utils_test def test_simple(): a = {} b = {} c = {} def selector(k, v): return len(v) % 3 mappings = {0: a, 1: b, 2: c} d = Sieve(mappings, selector) assert len(d) == 0 d['u'] = b'the' d['v'] = b'big' d['w'] = b'brown' d['x'] = b'fox' d['y'] = b'jumps' d['z'] = b'over' assert d['u'] == b'the' assert d['v'] == b'big' assert len(d) == 6 assert sorted(d) == ['u', 'v', 'w', 'x', 'y', 'z'] assert sorted(d.keys()) == ['u', 'v', 'w', 'x', 'y', 'z'] assert sorted(d.values()) == sorted([b'the', b'big', b'brown', b'fox', b'jumps', b'over']) assert a == {'u': b'the', 'v': b'big', 'x': b'fox'} assert b == {'z': b'over'} assert c == {'w': b'brown', 'y': b'jumps'} # Changing existing keys can move values from one mapping to another. d['w'] = b'lazy' d['x'] = b'dog' assert d['w'] == b'lazy' assert d['x'] == b'dog' assert len(d) == 6 assert sorted(d.values()) == sorted([b'the', b'big', b'lazy', b'dog', b'jumps', b'over']) assert a == {'u': b'the', 'v': b'big', 'x': b'dog'} assert b == {'w': b'lazy', 'z': b'over'} assert c == {'y': b'jumps'} del d['v'] del d['w'] assert len(d) == 4 assert 'v' not in d assert 'w' not in d assert sorted(d.values()) == sorted([b'the', b'dog', b'jumps', b'over']) def test_mapping(): """ Test mapping interface for Sieve(). """ a = {} b = {} def selector(key, value): return sum(bytearray(value)) & 1 mappings = {0: a, 1: b} z = Sieve(mappings, selector) utils_test.check_mapping(z) utils_test.check_closing(z) zict-0.1.3/zict/tests/__init__.py0000664000175000017500000000000013020072402020344 0ustar mrocklinmrocklin00000000000000zict-0.1.3/zict/tests/test_func.py0000664000175000017500000000153313020072402020613 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function from zict import Func from . import utils_test def inc(x): return x + 1 def dec(x): return x - 1 def rotl(x): return x[1:] + x[:1] def rotr(x): return x[-1:] + x[:-1] def test_simple(): d = dict() f = Func(inc, dec, d) f['x'] = 10 assert f['x'] == 10 assert d['x'] == 11 assert 'x' in f assert list(f) == ['x'] assert list(f.values()) == [10] assert list(f.items()) == [('x', 10)] assert all(s in str(f) for s in ['inc', 'dec', 'x', 'Func']) assert all(s in repr(f) for s in ['inc', 'dec', 'x', 'Func']) del f['x'] assert 'x' not in d def test_mapping(): """ Test mapping interface for Func(). """ d = {} z = Func(rotl, rotr, d) utils_test.check_mapping(z) utils_test.check_closing(z) zict-0.1.3/zict/tests/test_lru.py0000664000175000017500000000434013160466302020474 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function from zict import LRU from . import utils_test def test_simple(): d = dict() lru = LRU(2, d) lru['x'] = 1 lru['y'] = 2 assert lru['x'] == 1 assert lru['y'] == 2 assert d == {'x': 1, 'y': 2} lru['z'] = 3 assert len(d) == 2 assert len(lru) == 2 assert 'z' in d assert 'z' in lru assert 'x' not in d assert 'y' in d del lru['y'] assert 'y' not in d assert 'y' not in lru lru['a'] = 5 assert set(lru.keys()) == set(['z', 'a']) def test_str(): d = dict() lru = LRU(2, d) lru['x'] = 1 lru['y'] = 2 assert str(lru.total_weight) in str(lru) assert str(lru.total_weight) in repr(lru) assert str(lru.n) in str(lru) assert str(lru.n) in repr(lru) assert 'dict' in str(lru) assert 'dict' in repr(lru) def test_mapping(): """ Test mapping interface for LRU(). """ d = {} # 100 is more than the max length when running check_mapping() lru = LRU(100, d) utils_test.check_mapping(lru) utils_test.check_closing(lru) def test_overwrite(): d = dict() lru = LRU(2, d) lru['x'] = 1 lru['y'] = 2 lru['y'] = 3 assert set(lru) == {'x', 'y'} lru.update({'y': 4}) assert set(lru) == {'x', 'y'} def test_callbacks(): count = [0] def cb(k, v): count[0] += 1 L = list() d = dict() lru = LRU(2, d, on_evict=[lambda k, v: L.append((k, v)), cb]) lru['x'] = 1 lru['y'] = 2 lru['z'] = 3 assert L == [('x', 1)] assert count[0] == len(L) def test_weight(): d = dict() weight = lambda k, v: v lru = LRU(10, d, weight=weight) lru['x'] = 5 assert lru.total_weight == 5 lru['y'] = 4 assert lru.total_weight == 9 lru['z'] = 3 assert d == {'y': 4, 'z': 3} assert lru.total_weight == 7 del lru['z'] assert lru.total_weight == 4 lru['a'] = 10000 assert 'a' not in lru assert d == {'y': 4} def test_explicit_evict(): d = dict() lru = LRU(10, d) lru['x'] = 1 lru['y'] = 2 assert set(d) == {'x', 'y'} k, v, w = lru.evict() assert set(d) == {'y'} assert k == 'x' assert v == 1 assert w == 1 zict-0.1.3/zict/tests/test_file.py0000664000175000017500000000450113064014314020603 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function import os import shutil import pytest from zict.file import File from . import utils_test @pytest.yield_fixture def fn(): filename = '.tmp' if os.path.exists(filename): shutil.rmtree(filename) yield filename if os.path.exists(filename): shutil.rmtree(filename) def test_mapping(fn): """ Test mapping interface for File(). """ z = File(fn) utils_test.check_mapping(z) def test_implementation(fn): z = File(fn) assert not z z['x'] = b'123' assert os.listdir(fn) == ['x'] with open(os.path.join(fn, 'x'), 'rb') as f: assert f.read() == b'123' assert 'x' in z def test_str(fn): z = File(fn) assert fn in str(z) assert fn in repr(z) assert z.mode in str(z) assert z.mode in repr(z) def test_setitem_typeerror(fn): z = File(fn) with pytest.raises(TypeError): z['x'] = 123 def test_contextmanager(fn): with File(fn) as z: z['x'] = b'123' with open(os.path.join(fn, 'x'), 'rb') as f: assert f.read() == b'123' def test_delitem(fn): z = File(fn) z['x'] = b'123' assert os.path.exists(os.path.join(z.directory, 'x')) del z['x'] assert not os.path.exists(os.path.join(z.directory, 'x')) def test_missing_key(fn): z = File(fn) with pytest.raises(KeyError): z['x'] def test_arbitrary_chars(fn): z = File(fn) # Avoid hitting the Windows max filename length chunk = 16 for i in range(1, 128, chunk): key = ''.join(['foo_'] + [chr(i) for i in range(i, min(128, i + chunk))]) with pytest.raises(KeyError): z[key] z[key] = b'foo' assert z[key] == b'foo' assert list(z) == [key] assert list(z.keys()) == [key] assert list(z.items()) == [(key, b'foo')] assert list(z.values()) == [b'foo'] zz = File(fn) assert zz[key] == b'foo' assert list(zz) == [key] assert list(zz.keys()) == [key] assert list(zz.items()) == [(key, b'foo')] assert list(zz.values()) == [b'foo'] del zz del z[key] with pytest.raises(KeyError): z[key] def test_write_list_of_bytes(fn): z = File(fn) z['x'] = [b'123', b'4567'] assert z['x'] == b'1234567' zict-0.1.3/zict/tests/test_buffer.py0000664000175000017500000000435113020072402021132 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function from zict import Buffer from . import utils_test def test_simple(): a = dict() b = dict() buff = Buffer(a, b, n=10, weight=lambda k, v: v) buff['x'] = 1 buff['y'] = 2 assert buff['x'] == 1 assert buff['y'] == 2 assert a == {'x': 1, 'y': 2} assert buff.fast.total_weight == 3 buff['z'] = 8 assert a == {'y': 2, 'z': 8} assert b == {'x': 1} assert buff['x'] == 1 assert a == {'x': 1, 'z': 8} assert b == {'y': 2} assert 'x' in buff assert 'y' in buff assert 'missing' not in buff buff['y'] = 1 assert a == {'x': 1, 'y': 1, 'z': 8} assert buff.fast.total_weight == 10 assert b == {} del buff['z'] assert a == {'x': 1, 'y': 1} assert buff.fast.total_weight == 2 assert b == {} del buff['y'] assert a == {'x': 1} assert buff.fast.total_weight == 1 assert b == {} assert 'y' not in buff buff['a'] = 5 assert set(buff) == set(buff.keys()) == {'a', 'x'} fast_keys = set(buff.fast) slow_keys = set(buff.slow) assert not (fast_keys & slow_keys) assert fast_keys | slow_keys == set(buff) # Overweight element stays in slow mapping buff['b'] = 1000 assert 'b' in buff.slow assert set(buff.fast) == fast_keys assert set(buff.slow) == {'b'} | slow_keys assert 'b' in buff assert buff['b'] == 1000 def test_mapping(): """ Test mapping interface for Buffer(). """ a = {} b = {} buff = Buffer(a, b, n=2) utils_test.check_mapping(buff) utils_test.check_closing(buff) def test_callbacks(): f2s = [] def f2s_cb(k, v): f2s.append(k) s2f = [] def s2f_cb(k, v): s2f.append(k) a = dict() b = dict() buff = Buffer(a, b, n=10, weight=lambda k, v: v, fast_to_slow_callbacks=f2s_cb, slow_to_fast_callbacks=s2f_cb) buff['x'] = 1 buff['y'] = 2 assert buff['x'] == 1 assert buff['y'] == 2 assert not f2s assert not s2f buff['z'] = 8 assert f2s == ['x'] assert s2f == [] buff['z'] assert f2s == ['x'] assert s2f == [] buff['x'] assert f2s == ['x', 'y'] assert s2f == ['x'] zict-0.1.3/zict/tests/utils_test.py0000664000175000017500000000573213020072402021025 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function from collections import MutableMapping import random import string import pytest def generate_random_strings(n, min_len, max_len): r = random.Random(42) l = [] chars = string.ascii_lowercase + string.digits for i in range(n): nchars = r.randint(min_len, max_len) s = ''.join(r.choice(chars) for _ in range(nchars)) l.append(s) return l def to_bytestring(s): if isinstance(s, bytes): return s else: return s.encode('latin1') def check_items(z, expected_items): items = list(z.items()) assert len(items) == len(expected_items) assert sorted(items) == sorted(expected_items) # All iterators should walk the mapping in the same order assert list(z.keys()) == [k for k, v in items] assert list(z.values()) == [v for k, v in items] assert list(z) == [k for k, v in items] def stress_test_mapping_updates(z): # Certain mappings shuffle between several underlying stores # during updates. This stress tests the internal mapping # consistency. r = random.Random(42) keys = list(string.ascii_lowercase) values = [to_bytestring(s) for s in generate_random_strings(len(keys), 1, 10)] z.clear() assert len(z) == 0 for k, v in zip(keys, values): z[k] = v assert len(z) == len(keys) assert sorted(z) == sorted(keys) assert sorted(z.items()) == sorted(zip(keys, values)) for i in range(3): r.shuffle(keys) r.shuffle(values) for k, v in zip(keys, values): z[k] = v check_items(z, list(zip(keys, values))) r.shuffle(keys) r.shuffle(values) z.update(zip(keys, values)) check_items(z, list(zip(keys, values))) def check_mapping(z): assert isinstance(z, MutableMapping) assert not z assert list(z) == list(z.keys()) == [] assert list(z.values()) == [] assert list(z.items()) == [] assert len(z) == 0 z['abc'] = b'456' z['xyz'] = b'12' assert len(z) == 2 assert z['abc'] == b'456' check_items(z, [('abc', b'456'), ('xyz', b'12')]) assert 'abc' in z assert 'xyz' in z assert 'def' not in z with pytest.raises(KeyError): z['def'] z.update(xyz=b'707', uvw=b'000') check_items(z, [('abc', b'456'), ('xyz', b'707'), ('uvw', b'000')]) z.update([('xyz', b'654'), ('uvw', b'999')]) check_items(z, [('abc', b'456'), ('xyz', b'654'), ('uvw', b'999')]) z.update({'xyz': b'321'}) check_items(z, [('abc', b'456'), ('xyz', b'321'), ('uvw', b'999')]) del z['abc'] with pytest.raises(KeyError): z['abc'] with pytest.raises(KeyError): del z['abc'] assert 'abc' not in z assert set(z) == {'uvw', 'xyz'} assert len(z) == 2 z['def'] = b'\x00\xff' assert len(z) == 3 assert z['def'] == b'\x00\xff' assert 'def' in z stress_test_mapping_updates(z) def check_closing(z): z.close() zict-0.1.3/zict/tests/test_lmdb.py0000664000175000017500000000227113020072402020576 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function import gc import os import shutil import tempfile import pytest from zict.lmdb import LMDB from . import utils_test @pytest.yield_fixture def fn(): dirname = tempfile.mkdtemp(prefix='test_lmdb-') try: yield dirname finally: if os.path.exists(dirname): shutil.rmtree(dirname) def test_mapping(fn): """ Test mapping interface for LMDB(). """ z = LMDB(fn) utils_test.check_mapping(z) def test_reuse(fn): """ Test persistence of a LMDB() mapping. """ with LMDB(fn) as z: assert len(z) == 0 z['abc'] = b'123' with LMDB(fn) as z: assert len(z) == 1 assert z['abc'] == b'123' def test_creates_dir(fn): with LMDB(fn) as z: assert os.path.isdir(fn) def test_file_descriptors_dont_leak(fn): psutil = pytest.importorskip('psutil') proc = psutil.Process() before = proc.num_fds() z = LMDB(fn) del z gc.collect() assert proc.num_fds() == before z = LMDB(fn) z.close() assert proc.num_fds() == before with LMDB(fn) as z: pass assert proc.num_fds() == before zict-0.1.3/zict/lmdb.py0000664000175000017500000000555213020072402016402 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function import os import sys from .common import ZictBase if sys.version_info >= (3,): def _encode_key(key): return key.encode('latin1') def _decode_key(key): return key.decode('latin1') else: def _encode_key(key): return key def _decode_key(key): return key class LMDB(ZictBase): """ Mutable Mapping interface to a LMDB database. Keys must be strings, values must be bytes Parameters ---------- directory: string Examples -------- >>> z = LMDB('/tmp/somedir/') # doctest: +SKIP >>> z['x'] = b'123' # doctest: +SKIP >>> z['x'] # doctest: +SKIP b'123' """ def __init__(self, directory): import lmdb # map_size is the maximum database size but shouldn't fill up the # virtual address space map_size = (1 << 40 if sys.maxsize >= 2**32 else 1 << 28) # writemap requires sparse file support otherwise the whole # `map_size` may be reserved up front on disk writemap = sys.platform.startswith('linux') self.db = lmdb.open(directory, subdir=True, map_size=map_size, sync=False, writemap=writemap, ) def __getitem__(self, key): with self.db.begin() as txn: value = txn.get(_encode_key(key)) if value is None: raise KeyError(key) return value def __setitem__(self, key, value): with self.db.begin(write=True) as txn: txn.put(_encode_key(key), value) def __contains__(self, key): with self.db.begin() as txn: return txn.cursor().set_key(_encode_key(key)) def items(self): cursor = self.db.begin().cursor() return ((_decode_key(k), v) for k, v in cursor.iternext(keys=True, values=True)) def keys(self): cursor = self.db.begin().cursor() return (_decode_key(k) for k in cursor.iternext(keys=True, values=False)) def values(self): cursor = self.db.begin().cursor() return cursor.iternext(keys=False, values=True) def _do_update(self, items): # Optimized version of update() using a single putmulti() call. items = [(_encode_key(k), v) for k, v in items] with self.db.begin(write=True) as txn: consumed, added = txn.cursor().putmulti(items) assert consumed == added == len(items) def __iter__(self): return self.keys() def __delitem__(self, key): with self.db.begin(write=True) as txn: if not txn.delete(_encode_key(key)): raise KeyError(key) def __len__(self): return self.db.stat()['entries'] def close(self): self.db.close() zict-0.1.3/zict/zip.py0000664000175000017500000000402313020072402016256 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function from collections import MutableMapping import sys import zipfile class Zip(MutableMapping): """ Mutable Mapping interface to a Zip file Keys must be strings, values must be bytes Parameters ---------- filename: string mode: string, ('r', 'w', 'a'), defaults to 'a' Examples -------- >>> z = Zip('myfile.zip') # doctest: +SKIP >>> z['x'] = b'123' # doctest: +SKIP >>> z['x'] # doctest: +SKIP b'123' >>> z.flush() # flush and write metadata to disk # doctest: +SKIP """ def __init__(self, filename, mode='a'): self.filename = filename self.mode = mode self._file = None @property def file(self): if self.mode == 'closed': raise IOError("File closed") if not self._file or not self._file.fp: self._file = zipfile.ZipFile(self.filename, mode=self.mode) return self._file def __getitem__(self, key): return self.file.read(key) def __setitem__(self, key, value): self.file.writestr(key, to_bytes(value)) def keys(self): return (zi.filename for zi in self.file.filelist) def values(self): return map(self.file.read, self.keys()) def items(self): return ((zi.filename, self.file.read(zi.filename)) for zi in self.file.filelist) def __iter__(self): return self.keys() def __delitem__(self, key): raise NotImplementedError("Not supported by stdlib zipfile") def __len__(self): return len(self.file.filelist) def flush(self): self.file.fp.flush() self.file.close() def close(self): self.flush() self.mode = 'closed' def __enter__(self): return self def __exit__(self, type, value, traceback): self.close() if sys.version_info[0] == 2: def to_bytes(x): if isinstance(x, bytearray): return bytes(x) return x else: to_bytes = lambda x: x zict-0.1.3/zict/file.py0000664000175000017500000000510513064014314016403 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function import errno import os try: from urllib.parse import quote, unquote except ImportError: from urllib import quote, unquote from .common import ZictBase def _safe_key(key): """ Escape key so as to be usable on all filesystems. """ # Even directory separators are unsafe. return quote(key, safe='') def _unsafe_key(key): """ Undo the escaping done by _safe_key(). """ return unquote(key) class File(ZictBase): """ Mutable Mapping interface to a directory Keys must be strings, values must be bytes Note this shouldn't be used for interprocess persistence, as keys are cached in memory. Parameters ---------- directory: string mode: string, ('r', 'w', 'a'), defaults to 'a' Examples -------- >>> z = File('myfile') # doctest: +SKIP >>> z['x'] = b'123' # doctest: +SKIP >>> z['x'] # doctest: +SKIP b'123' Also supports writing lists of bytes objects >>> z['y'] = [b'123', b'4567'] # doctest: +SKIP >>> z['y'] # doctest: +SKIP b'1234567' Or anything that can be used with file.write, like a memoryview >>> z['data'] = np.ones(5).data # doctest: +SKIP """ def __init__(self, directory, mode='a'): self.directory = directory self.mode = mode self._keys = set() if not os.path.exists(self.directory): os.mkdir(self.directory) else: for n in os.listdir(self.directory): self._keys.add(_unsafe_key(n)) def __str__(self): return '' % (self.directory, self.mode, len(self)) __repr__ = __str__ def __getitem__(self, key): if key not in self._keys: raise KeyError(key) with open(os.path.join(self.directory, _safe_key(key)), 'rb') as f: return f.read() def __setitem__(self, key, value): with open(os.path.join(self.directory, _safe_key(key)), 'wb') as f: if isinstance(value, (tuple, list)): for v in value: f.write(v) else: f.write(value) self._keys.add(key) def __contains__(self, key): return key in self._keys def keys(self): return iter(self._keys) __iter__ = keys def __delitem__(self, key): if key not in self._keys: raise KeyError(key) os.remove(os.path.join(self.directory, _safe_key(key))) self._keys.remove(key) def __len__(self): return len(self._keys) zict-0.1.3/zict/sieve.py0000664000175000017500000000620113020072402016567 0ustar mrocklinmrocklin00000000000000from __future__ import absolute_import, division, print_function from collections import defaultdict from itertools import chain import sys from .common import ZictBase, close class Sieve(ZictBase): """ Store values in different mappings based on a selector's output. This creates a MutableMapping combining several underlying MutableMappings for storage. Items are dispatched based on a selector function provided by the user. Parameters ---------- mappings: dict of {mapping key: MutableMapping} selector: callable (key, value) -> mapping key Examples -------- >>> small = {} >>> large = DataBase() # doctest: +SKIP >>> mappings = {True: small, False: large} # doctest: +SKIP >>> def is_small(key, value): # doctest: +SKIP return sys.getsizeof(value) < 10000 >>> d = Sieve(mappings, is_small) # doctest: +SKIP See Also -------- Buffer """ def __init__(self, mappings, selector): self.mappings = mappings self.selector = selector self.key_to_mapping = {} def __getitem__(self, key): return self.key_to_mapping[key][key] def __setitem__(self, key, value): old_mapping = self.key_to_mapping.get(key) mapping = self.mappings[self.selector(key, value)] if old_mapping is not None and old_mapping is not mapping: del old_mapping[key] mapping[key] = value self.key_to_mapping[key] = mapping def __delitem__(self, key): del self.key_to_mapping.pop(key)[key] def _do_update(self, items): # Optimized update() implementation issuing a single update() # call per underlying mapping. to_delete = [] updates = defaultdict(list) mapping_ids = dict((id(m), m) for m in self.mappings.values()) for key, value in items: old_mapping = self.key_to_mapping.get(key) mapping = self.mappings[self.selector(key, value)] if old_mapping is not None and old_mapping is not mapping: del old_mapping[key] # Can't hash a mutable mapping, so use its id() instead updates[id(mapping)].append((key, value)) for mid, mitems in updates.items(): mapping = mapping_ids[mid] mapping.update(mitems) for key, _ in mitems: self.key_to_mapping[key] = mapping def keys(self): return chain.from_iterable(self.mappings.values()) def values(self): return chain.from_iterable(m.values() for m in self.mappings.values()) def items(self): return chain.from_iterable(m.items() for m in self.mappings.values()) def __len__(self): return sum(map(len, self.mappings.values())) __iter__ = keys def __contains__(self, key): return key in self.key_to_mapping def __str__(self): return 'Sieve<%s>' % (str(self.mappings),) __repr__ = __str__ def flush(self): for m in self.mappings.values(): m.flush() def close(self): for m in self.mappings.values(): close(m) zict-0.1.3/LICENSE.txt0000664000175000017500000000272412714371023015774 0ustar mrocklinmrocklin00000000000000Copyright (c) 2016 Matthew Rocklin All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: a. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. b. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. c. Neither the name of toolz nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. zict-0.1.3/MANIFEST.in0000664000175000017500000000026212714371023015702 0ustar mrocklinmrocklin00000000000000recursive-include zict *.py recursive-include docs *.rst include setup.py include README.rst include LICENSE.txt include MANIFEST.in include requirements.txt prune docs/_build zict-0.1.3/README.rst0000664000175000017500000000037613160224305015635 0ustar mrocklinmrocklin00000000000000Zict ==== |Build Status| Mutable Mapping interfaces. See documentation_. .. _documentation: http://zict.readthedocs.io/en/latest/ .. |Build Status| image:: https://travis-ci.org/dask/zict.svg?branch=master :target: https://travis-ci.org/dask/zict zict-0.1.3/setup.py0000775000175000017500000000111213160466335015663 0ustar mrocklinmrocklin00000000000000#!/usr/bin/env python import os from setuptools import setup setup(name='zict', version='0.1.3', description='Mutable mapping tools', url='http://zict.readthedocs.io/en/latest/', maintainer='Matthew Rocklin', maintainer_email='mrocklin@gmail.com', license='BSD', keywords='mutable mapping,dict,dask', packages=['zict'], install_requires=[open('requirements.txt').read().strip().split('\n')], long_description=(open('README.rst').read() if os.path.exists('README.rst') else ''), zip_safe=False) zict-0.1.3/setup.cfg0000664000175000017500000000007313160466433015773 0ustar mrocklinmrocklin00000000000000[egg_info] tag_build = tag_date = 0 tag_svn_revision = 0