1
2
3
4
5
6
7
8
9
10 __all__ = ['dump', 'load', 'Pickler', 'Unpickler',
11 'dump_many', 'load_many']
12
13 __docformat__ = "restructuredtext en"
14
15 from copy_reg import dispatch_table
16 from copy_reg import _extension_registry, _inverted_registry, _extension_cache
17 from types import *
18 import keyword, marshal
19 import tables, numpy, cPickle as pickle, re, struct, sys
20
21 from pickle import whichmodule, PicklingError, FLOAT, INT, LONG, NONE, \
22 REDUCE, STRING, UNICODE, GLOBAL, DICT, INST, LIST, TUPLE, EXT4, \
23 encode_long, decode_long
24
25 BOOL = 'BB'
26 REF = 'RR'
27 COMPLEX = 'CC'
28
29 NUMARRAY = 'NA'
30 NUMPY = 'NP'
31 NUMERIC = 'NU'
32
33 HIGHEST_PROTOCOL = 2
34 """The pickling (programming) protocol supported by this module"""
35
37 sys.stderr.write(' '.join(map(str, args)) + '\n')
38
39 try:
40 from org.python.core import PyStringMap
41 except ImportError:
42 PyStringMap = None
43
44 try:
45 UnicodeType
46 except NameError:
47 UnicodeType = None
48
49 try:
50 from tables import NoSuchNodeError
51 except ImportError:
52 NoSuchNodeError = LookupError
53
54 try:
55 from tables import checkflavor
56 except ImportError:
57 import tables.flavor
59 return flavor.lower() in tables.flavor.all_flavors
60
61
62
63 NumericArrayType = None
64 NumericArrayType_native = False
65 try:
66 try:
67 try: checkflavor('Numeric', 'f')
68 except TypeError: checkflavor('Numeric', 'f', '')
69 NumpyArrayType_native = True
70 except ValueError:
71 pass
72 import Numeric
73 from Numeric import ArrayType as NumericArrayType
74 except ImportError:
75 pass
76
77 NumarrayArrayType = None
78 NumarrayArrayType_native = False
79 try:
80 try:
81 try: checkflavor('NumArray', 'f')
82 except TypeError: checkflavor('NumArray', 'f', '')
83 NumarrayArrayType_native = True
84 except ValueError:
85 pass
86 import numarray
87 from numarray import ArrayType as NumarrayArrayType
88 except ImportError:
89 pass
90
91 NumpyArrayType = None
92 NumpyArrayType_native = False
93 try:
94 try:
95 try: checkflavor('numpy', 'f')
96 except TypeError: checkflavor('numpy', 'f', '')
97 NumpyArrayType_native = True
98 except ValueError:
99 pass
100
101 import numpy
102 numarray.asarray(numpy.array([1,2,3]))
103 from numpy.oldnumeric import ArrayType as NumpyArrayType
104 except (ImportError, TypeError):
105 pass
106
107
108 HDF5PICKLE_PROTOCOL = 1
109 """Identifier for the current HDF5 pickling protocol"""
110
111
112
113
115 """
116 Internal interface to a `tables.File` object.
117
118 Includes convenience functions, including type conversion.
119 """
120 - def __init__(self, file, type_map=None):
121 self.file = file
122 if type_map == None:
123 self.type_map = {}
124 else:
125 self.type_map = type_map
126
128 i = s.rindex('/')
129 where, name = s[:i], s[(i+1):]
130 if where == '': where = '/'
131 return where, name
132 _splitpath = staticmethod(_splitpath)
133
135 if isinstance(obj, tables.Group):
136 obj._f_setAttr(attr, value)
137 else:
138 setattr(obj.attrs, attr, value)
139
141 try:
142 self.get_attr(obj, attr)
143 return True
144 except AttributeError:
145 return False
146
148 if isinstance(obj, tables.Group):
149 return obj._f_getAttr(attr)
150 else:
151 return getattr(obj.attrs, attr)
152
154 return self.file.getNode(path)
155
157 try:
158 self.file.getNode(path)
159 return True
160 except NoSuchNodeError:
161 return False
162
164 where, name = self._splitpath(path)
165 type_ = type(data)
166
167 if type_ in (tuple, list, str):
168 if len(data) == 0:
169 array = self.file.createArray(
170 where, name, numpy.array([0], dtype=numpy.int8))
171 self.set_attr(array, 'empty', 1)
172 return array
173 elif type_ in (tuple, list):
174 btype = type(data[0])
175 if not btype in (int, float, complex):
176 raise TypeError
177 for item in data:
178 if type(item) != btype:
179 raise TypeError
180 if type_ is str:
181
182
183 return self.file.createArray(where, name, numpy.fromstring(
184 data, dtype=self.type_map.get(str, numpy.uint8)))
185 return self.file.createArray(where, name, numpy.array(
186 data, dtype=self.type_map.get(btype)))
187 elif type_ in (int, float, complex):
188 return self.file.createArray(where, name, numpy.array(
189 data, dtype=self.type_map.get(type_)))
190 elif type_ in (long,):
191 return self.file.createArray(where, name, numpy.array(
192 data, dtype=self.type_map.get(type_, numpy.object_)))
193 else:
194 raise TypeError
195
197 where, name = self._splitpath(path)
198 return self.file.createArray(where, name, data)
199
201 if type_ in (tuple, list, str):
202 if self.has_attr(node, 'empty'):
203 return type_()
204 else:
205 if type_ is str:
206
207
208 return numpy.asarray(node.read()).tostring()
209 return type_(node.read())
210 elif type_ in (int, float):
211 return type_(node.read())
212 elif type_ is bool:
213 return type_(numpy.alltrue(node.read()))
214 elif type_ is complex:
215 data = node.read()
216 return complex(data[()])
217 else:
218 raise TypeError()
219
221 where, name = self._splitpath(path)
222 return self.file.createGroup(where, name)
223
224
225
226
227
229 """
230 Pickles Python objects to a HDF5 file.
231
232 Usage:
233 1. Instantaniate
234 2. Call `dump` or `clear_memo` as necessary
235
236 You may wish to use a single instance of this class for multiple
237 objects to preserve references. It should be safe to call the `dump`
238 method multiple times, for different paths.
239 """
240 - def __init__(self, file, type_map=None):
251
253 self.memo[id(obj)] = obj
254
256 self.paths = {}
257 self.memo = {}
258
259 - def dump(self, path, obj):
260 self._save(path, obj)
261
262 - def _save(self, path, obj):
263 x = self.paths.get(id(obj))
264 if x:
265 self._save_ref(path, x)
266 return
267 else:
268 self.paths[id(obj)] = path
269
270 self._keep_alive(obj)
271
272
273 t = type(obj)
274 f = self._dispatch.get(t)
275 if f:
276 x = f(self, path, obj)
277 return
278
279
280 try:
281 issc = issubclass(t, TypeType)
282 except TypeError:
283 issc = 0
284 if issc:
285 self._save_global(path, obj)
286 return
287
288
289 reduce = dispatch_table.get(t)
290 if reduce:
291 rv = reduce(obj)
292 else:
293
294 reduce = getattr(obj, "__reduce_ex__", None)
295 if reduce:
296 rv = reduce(2)
297 else:
298 reduce = getattr(obj, "__reduce__", None)
299 if reduce:
300 rv = reduce()
301 else:
302 raise PicklingError("Can't pickle %r object: %r" %
303 (t.__name__, obj))
304
305
306 if type(rv) is StringType:
307 self._save_global(path, obj, rv)
308 return
309
310
311 if type(rv) is not TupleType:
312 raise PicklingError("%s must return string or tuple" % reduce)
313
314
315 l = len(rv)
316 if not (2 <= l <= 5):
317 raise PicklingError("Tuple returned by %s must have "
318 "two to five elements" % reduce)
319
320
321 self._save_reduce(path, obj=obj, *rv)
322
323 _dispatch = {}
324
329
330 - def _save_reduce(self, path, func, args, state=None,
331 listitems=None, dictitems=None, obj=None):
332
333
334
335 if not isinstance(args, TupleType):
336 if args is None:
337
338
339 warnings.warn("__basicnew__ special case is deprecated",
340 DeprecationWarning)
341 else:
342 raise PicklingError(
343 "args from reduce() should be a tuple")
344
345
346 if not callable(func):
347 raise PicklingError("func from reduce should be callable")
348
349 group = self.file.new_group(path)
350 self.file.new_group(path + '/__')
351
352 self.file.set_attr(group, 'pickletype', REDUCE)
353
354
355 if getattr(func, "__name__", "") == "__newobj__":
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382 cls = args[0]
383 if not hasattr(cls, "__new__"):
384 raise PicklingError(
385 "args[0] from __newobj__ args has no __new__")
386 if obj is not None and cls is not obj.__class__:
387 raise PicklingError(
388 "args[0] from __newobj__ args has the wrong class")
389 args = args[1:]
390
391 self._save('%s/__/cls' % path, cls)
392 self._save('%s/__/args' % path, args)
393 else:
394 self._save('%s/__/func' % path, func)
395 self._save('%s/__/args' % path, args)
396
397 if obj is not None:
398 self._keep_alive(obj)
399
400 if listitems is not None:
401 self._save('%s/__/listitems' % path, list(listitems))
402
403 if dictitems is not None:
404 self._save('%s/__/dictitems' % path, dict(dictitems))
405
406 if state is not None:
407 self.file.set_attr(group, 'has_reduce_content', 1)
408 if isinstance(state, dict):
409 self._save_dict_content(path, state)
410 self._keep_alive(state)
411 else:
412 self._save('%s/__/content' % path, state)
413
415 array = self.file.save_array(path, 0)
416 self.file.set_attr(array, 'pickletype', NONE)
417 _dispatch[NoneType] = _save_none
418
422 _dispatch[bool] = _save_bool
423
425 array = self.file.save_array(path, obj)
426 self.file.set_attr(array, 'pickletype', INT)
427 _dispatch[IntType] = _save_int
428
430 array = self.file.save_array(path, str(encode_long(obj)))
431 self.file.set_attr(array, 'pickletype', LONG)
432 _dispatch[LongType] = _save_long
433
435 array = self.file.save_array(path, obj)
436 self.file.set_attr(array, 'pickletype', FLOAT)
437 _dispatch[FloatType] = _save_float
438
442 _dispatch[ComplexType] = _save_complex
443
445 node = self.file.save_array(path, obj)
446 self.file.set_attr(node, 'pickletype', STRING)
447 _dispatch[StringType] = _save_string
448
450 node = self.file.save_array(path, obj.encode('utf-8'))
451 self.file.set_attr(node, 'pickletype', UNICODE)
452 _dispatch[UnicodeType] = _save_unicode
453
455 try:
456 array = self.file.save_array(path, obj)
457 self.file.set_attr(array, 'pickletype', TUPLE)
458 return array
459 except TypeError:
460 pass
461
462 group = self.file.new_group(path)
463 self.file.set_attr(group, 'pickletype', TUPLE)
464 for i, item in enumerate(obj):
465 self._save('%s/_%d' % (path, i), item)
466 return group
467 _dispatch[TupleType] = _save_tuple
468
472 _dispatch[ListType] = _save_list
473
478
479 - def _save_dict_content(self, path, obj):
480 strkeys = {}
481 seen = {}
482 keyi = 0
483 for key in obj.iterkeys():
484 if (isinstance(key, str) and _check_pytables_name(key)
485 and key != "__"):
486 strkeys[key] = key
487 seen[key] = True
488 for key in obj.iterkeys():
489 if not key in strkeys:
490 while ("_%d" % keyi) in seen: keyi += 1
491 strkeys[key] = "_%d" % keyi
492 seen[strkeys[key]] = True
493
494 hassub = self.file.has_path('%s/__' % path)
495
496 for key, value in obj.iteritems():
497 self._save('/'.join([path, strkeys[key]]), value)
498 if not strkeys[key] is key:
499 if not hassub:
500 self.file.new_group('%s/__' % path)
501 hassub = True
502 self._save('%s/__/%s' % (path, strkeys[key]), key)
503
504 _dispatch[DictionaryType] = _save_dict
505 if not PyStringMap is None:
506 _dispatch[PyStringMap] = _save_dict
507
509 cls = obj.__class__
510
511 if hasattr(obj, '__getinitargs__'):
512 args = obj.__getinitargs__()
513 len(args)
514 else:
515 args = ()
516
517 try:
518 getstate = obj.__getstate__
519 except AttributeError:
520 stuff = obj.__dict__
521 else:
522 stuff = getstate()
523
524 group = self.file.new_group(path)
525 self.file.set_attr(group, 'pickletype', INST)
526
527 self.file.new_group("%s/__" % path)
528 self._save('%s/__/cls' % path, cls)
529 self._save('%s/__/args' % path, args)
530
531 if isinstance(stuff, dict):
532 self._save_dict_content(path, stuff)
533 self._keep_alive(stuff)
534 else:
535 self._save('%s/__/content' % path, stuff)
536 _dispatch[InstanceType] = _save_inst
537
538 - def _save_global(self, path, obj, name=None, pack=struct.pack):
539 if name is None:
540 name = obj.__name__
541
542 module = getattr(obj, "__module__", None)
543 if module is None:
544 module = whichmodule(obj, name)
545
546 try:
547 __import__(module)
548 mod = sys.modules[module]
549 klass = getattr(mod, name)
550 except (ImportError, KeyError, AttributeError):
551 raise PicklingError(
552 "Can't pickle %r: it's not found as %s.%s" %
553 (obj, module, name))
554 else:
555 if klass is not obj:
556 raise PicklingError(
557 "Can't pickle %r: it's not the same object as %s.%s" %
558 (obj, module, name))
559
560 pickletype = None
561
562 code = _extension_registry.get((module, name))
563 if code:
564 assert code > 0
565 pickletype = EXT4
566 stuff = pack("<i", code)
567
568 if not pickletype:
569 stuff = module + '\n' + name
570 pickletype = GLOBAL
571
572 array = self.file.save_array(path, str(stuff))
573 self.file.set_attr(array, 'pickletype', pickletype)
574
575 _dispatch[ClassType] = _save_global
576 _dispatch[FunctionType] = _save_global
577 _dispatch[BuiltinFunctionType] = _save_global
578 _dispatch[TypeType] = _save_global
579
586 _dispatch[NumericArrayType] = _save_numeric_array
587
594 _dispatch[NumpyArrayType] = _save_numpy_array
595
602 _dispatch[NumarrayArrayType] = _save_numarray_array
603
604
605
606
607
609 """
610 Unpickles Python objects from a HDF5 file.
611
612 Usage:
613 1. Instantaniate
614 2. Call `load` or `clear_memo` as needed
615
616 You may wish to use a single instance of this class for multiple
617 objects to preserve references. It should be safe to call the `load`
618 method multiple times, for different paths.
619 """
620 - def __init__(self, file, type_map=None):
621 self.file = _FileInterface(file, type_map=None)
622 self.memo = {}
623
625 self.memo = {}
626
627 - def load(self, path):
628 if not path in self.memo:
629 node = self.file.get_path(path)
630 key = self.file.get_attr(node, 'pickletype')
631 if key:
632 f = self._dispatch[key]
633 obj = f(self, node)
634 else:
635 obj = node.read()
636 self.memo[path] = obj
637 return self.memo[path]
638
639 _dispatch = {}
640
642 path = self.file.get_attr(node, 'target')
643 return self.load(path)
644 _dispatch[REF] = _load_ref
645
647 path = node._v_pathname
648 args = self.load('%s/__/args' % path)
649
650 if self.file.has_path('%s/__/func' % path):
651 func = self.load('%s/__/func' % path)
652
653 if args is None:
654 warnings.warn("__basicnew__ special case is deprecated",
655 DeprecationWarning)
656 obj = func.__basicnew__()
657 else:
658 obj = func(*args)
659 else:
660 cls = self.load('%s/__/cls' % path)
661 obj = cls.__new__(cls, *args)
662
663 self.memo[path] = obj
664
665 if self.file.has_path('%s/__/listitems' % path):
666 data = self.load('%s/__/listitems' % path)
667 obj.extend(data)
668
669 if self.file.has_path('%s/__/dictitems' % path):
670 data = self.load('%s/__/dictitems' % path)
671 for key, value in data.iteritems():
672 obj[key] = value
673
674 if self.file.has_path('%s/__/content' % path):
675 state = self.load('%s/__/content' % path)
676 if state is not None:
677 self._setstate(obj, state)
678 elif self.file.has_attr(node, 'has_reduce_content'):
679 state = {}
680 state = self._load_dict_content(node, state)
681 self._setstate(obj, state)
682 return obj
683 _dispatch[REDUCE] = _load_reduce
684
686 return None
687 _dispatch[NONE] = _load_none
688
691 _dispatch[BOOL] = _load_bool
692
695 _dispatch[INT] = _load_int
696
698 data = self.file.load_array(node, str)
699 return decode_long(data)
700 _dispatch[LONG] = _load_long
701
704 _dispatch[FLOAT] = _load_float
705
708 _dispatch[COMPLEX] = _load_complex
709
712 _dispatch[STRING] = _load_string
713
715 data = self.file.load_array(node, str)
716 return data.decode('utf-8')
717 _dispatch[UNICODE] = _load_unicode
718
719 - def _load_list_content(self, node):
720 if isinstance(node, tables.Array):
721 return self.file.load_array(node, list)
722
723 items = []
724 self.memo[node._v_pathname] = items
725
726 def cmpfunc(a, b):
727 c = len(a) - len(b)
728 if c == 0:
729 c = cmp(a, b)
730 return c
731
732 names = list(node._v_children)
733 names.sort(cmpfunc)
734
735 for name in names:
736 items.append(self.load('%s/%s' % (node._v_pathname, name)))
737
738 return items
739
742 _dispatch[TUPLE] = _load_tuple
743
746 _dispatch[LIST] = _load_list
747
749 path = node._v_pathname
750 data = {}
751 self.memo[path] = data
752 return self._load_dict_content(node, data)
753
754 - def _load_dict_content(self, node, data):
755 path = node._v_pathname
756 strkeys = {}
757
758 if '__' in node._v_children:
759 n2 = node._v_children['__']
760 for name in n2._v_children:
761 if name.startswith('_'):
762 strkeys[name] = self.load('%s/__/%s' % (path, name))
763
764 for key in node._v_children:
765 if key == '__': continue
766
767 if key in strkeys:
768 realkey = strkeys[key]
769 else:
770 realkey = key
771
772 data[realkey] = self.load('%s/%s' % (path, key))
773
774 return data
775 _dispatch[DICT] = _load_dict
776
777
778
779
780
781
783 instantiated = 0
784 if (not args and type(klass) is ClassType and
785 not hasattr(klass, "__getinitargs__")):
786 try:
787 value = _EmptyClass()
788 value.__class__ = klass
789 instantiated = 1
790 except RuntimeError:
791
792
793 pass
794 if not instantiated:
795 try:
796 value = klass(*args)
797 except TypeError, err:
798 raise TypeError, "in constructor for %s: %s" % (
799 klass.__name__, str(err)), sys.exc_info()[2]
800 return value
801
803 path = node._v_pathname
804
805 cls = self.load('%s/__/cls' % path)
806 args = self.load('%s/__/args' % path)
807
808 inst = self._instantiate(cls, args)
809
810 self.memo[path] = inst
811
812 if self.file.has_path('%s/__/content' % path):
813 state = self.load('%s/__/content' % path)
814 else:
815 state = {}
816 state = self._load_dict_content(node, state)
817 self._setstate(inst, state)
818
819 return inst
820 _dispatch[INST] = _load_inst
821
823 setstate = getattr(inst, "__setstate__", None)
824 if setstate:
825 setstate(state)
826 return
827
828 slotstate = None
829 if isinstance(state, tuple) and len(state) == 2:
830 state, slotstate = state
831
832 if state:
833 try:
834 inst.__dict__.update(state)
835 except RuntimeError:
836
837
838
839
840
841
842
843
844
845 for k, v in state.items():
846 setattr(inst, k, v)
847 if slotstate:
848 for k, v in slotstate.items():
849 setattr(inst, k, v)
850
852 data = self.file.load_array(node, str)
853 module, name = data.split('\n')
854 return self._find_class(module, name)
855 _dispatch[GLOBAL] = _load_global
856
861 _dispatch[EXT4] = _load_ext
862
864 import Numeric
865 return Numeric.asarray(node.read())
866 _dispatch[NUMERIC] = _load_numeric_array
867
869 import numpy
870 return numpy.asarray(node.read())
871 _dispatch[NUMPY] = _load_numpy_array
872
874 import numarray
875 return numarray.asarray(node.read())
876 _dispatch[NUMARRAY] = _load_numarray_array
877
879 nil = []
880 obj = _extension_cache.get(code, nil)
881 if obj is not nil:
882 self.append(obj)
883 return
884 key = _inverted_registry.get(code)
885 if not key:
886 raise ValueError("unregistered extension code %d" % code)
887 obj = self._find_class(*key)
888 _extension_cache[code] = obj
889 return obj
890
892
893 __import__(module)
894 mod = sys.modules[module]
895 klass = getattr(mod, name)
896 return klass
897
898
899
900
901
903 pass
904
905 pythonIdRE = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
906 reservedIdRE = re.compile('^_[cfgv]_')
908 """
909 Check the validity of the `name` of a PyTables object,
910 so that PyTables won't spew warnings or exceptions...
911 """
912 if not isinstance(name, basestring):
913 raise TypeError()
914 if name == '':
915 raise ValueError()
916 if name == '.':
917 raise ValueError()
918 if '/' in name:
919 raise ValueError()
920 if not pythonIdRE.match(name):
921 raise ValueError()
922 if keyword.iskeyword(name):
923 raise ValueError()
924 if reservedIdRE.match(name):
925 raise ValueError()
926
933
934
935
936
937
938 -def dump(obj, file, path, type_map=None):
939 """
940 Dump a Python object to an open PyTables HDF5 file.
941
942 :param obj: the object to dump
943 :param file: where to dump
944 :type file: tables.File
945 :param path: path where to dump in the file
946 :param type_map:
947 mapping of Python basic types (str, int, ...) to numpy types.
948 If ``None``, numpy's default mapping is used.
949 """
950 Pickler(file, type_map=type_map).dump(path, obj)
951
952 -def load(file, path):
953 """
954 Load a Python object from an open PyTables HDF5 file.
955
956 :param file: where to load from
957 :type file: tables.File
958 :param path: path to the object in the file
959
960 :return: loaded object
961 """
962 return Unpickler(file).load(path)
963
965 """
966 Dump multiple Python objects to an open PyTables HDF5 file,
967 preserving any references between the objects.
968
969 Calling `dump(file, path)` many times for objects keeping references
970 to each other would result in duplicated data.
971
972 :param file: where to dump
973 :type file: tables.File
974 :param desc: a list of (path, obj)
975 :param type_map:
976 mapping of Python basic types (str, int, ...) to numpy types.
977 If ``None``, numpy's default mapping is used.
978 """
979 p = Pickler(file, type_map=type_map)
980 for path, obj in desc:
981 p.dump(path, obj)
982
984 """
985 Load multiple Python objects from the file, preserving any
986 references between them.
987
988 Calling `load(file, path)` many times for objects keeping references
989 to each other would result to duplicated data.
990
991 :param file: where to dump
992 :type file: tables.File
993 :param paths: a list of paths where to load from
994
995 :return: list of (path, object)
996 """
997 p = Unpickler(file)
998 r = []
999 for path in paths:
1000 obj = p.load(path)
1001 r.append( (path, obj) )
1002 return r
1003