Package hdf5pickle :: Module base
[hide private]
[frames] | no frames]

Source Code for Module hdf5pickle.base

   1  # This file is heavily adapted from 'pickle.py' in Python 2.4: 
   2  # Copyright (c) 2001, 2002, 2003, 2004 Python Software Foundation 
   3  # All Rights Reserved. 
   4  # 
   5  # Modifications to use Pytables: 
   6  # Copyright (c) 2006 Pauli Virtanen <pav@iki.fi> 
   7  # 
   8  # See LICENSE.txt for some legalese. 
   9   
  10  __all__ = ['dump', 'load', 'Pickler', 'Unpickler', 
  11             'dump_many', 'load_many'] 
  12   
  13  __docformat__ = "restructuredtext en" 
  14   
  15  from copy_reg import dispatch_table 
  16  from copy_reg import _extension_registry, _inverted_registry, _extension_cache 
  17  from types import * 
  18  import keyword, marshal 
  19  import tables, numpy, cPickle as pickle, re, struct, sys 
  20   
  21  from pickle import whichmodule, PicklingError, FLOAT, INT, LONG, NONE, \ 
  22       REDUCE, STRING, UNICODE, GLOBAL, DICT, INST, LIST, TUPLE, EXT4, \ 
  23       encode_long, decode_long 
  24   
  25  BOOL    = 'BB' 
  26  REF     = 'RR' 
  27  COMPLEX = 'CC' 
  28   
  29  NUMARRAY = 'NA' 
  30  NUMPY    = 'NP' 
  31  NUMERIC  = 'NU' 
  32   
  33  HIGHEST_PROTOCOL = 2 
  34  """The pickling (programming) protocol supported by this module""" 
  35   
36 -def _DEBUG(*args):
37 sys.stderr.write(' '.join(map(str, args)) + '\n')
38 39 try: 40 from org.python.core import PyStringMap 41 except ImportError: 42 PyStringMap = None 43 44 try: 45 UnicodeType 46 except NameError: 47 UnicodeType = None 48 49 try: 50 from tables import NoSuchNodeError 51 except ImportError: 52 NoSuchNodeError = LookupError 53 54 try: 55 from tables import checkflavor 56 except ImportError: 57 import tables.flavor
58 - def checkflavor(flavor, x=None, y=None):
59 return flavor.lower() in tables.flavor.all_flavors
60 61 ### Check what PyTables supports on this system 62 63 NumericArrayType = None 64 NumericArrayType_native = False 65 try: 66 try: 67 try: checkflavor('Numeric', 'f') 68 except TypeError: checkflavor('Numeric', 'f', '') 69 NumpyArrayType_native = True 70 except ValueError: 71 pass 72 import Numeric 73 from Numeric import ArrayType as NumericArrayType 74 except ImportError: 75 pass 76 77 NumarrayArrayType = None 78 NumarrayArrayType_native = False 79 try: 80 try: 81 try: checkflavor('NumArray', 'f') 82 except TypeError: checkflavor('NumArray', 'f', '') 83 NumarrayArrayType_native = True 84 except ValueError: 85 pass 86 import numarray 87 from numarray import ArrayType as NumarrayArrayType 88 except ImportError: 89 pass 90 91 NumpyArrayType = None 92 NumpyArrayType_native = False 93 try: 94 try: 95 try: checkflavor('numpy', 'f') 96 except TypeError: checkflavor('numpy', 'f', '') 97 NumpyArrayType_native = True 98 except ValueError: 99 pass 100 # check that conversion is possible 101 import numpy 102 numarray.asarray(numpy.array([1,2,3])) 103 from numpy.oldnumeric import ArrayType as NumpyArrayType 104 except (ImportError, TypeError): 105 pass 106 107 108 HDF5PICKLE_PROTOCOL = 1 109 """Identifier for the current HDF5 pickling protocol""" 110 111 ############################################################################# 112 113
114 -class _FileInterface(object):
115 """ 116 Internal interface to a `tables.File` object. 117 118 Includes convenience functions, including type conversion. 119 """
120 - def __init__(self, file, type_map=None):
121 self.file = file 122 if type_map == None: 123 self.type_map = {} 124 else: 125 self.type_map = type_map
126
127 - def _splitpath(s):
128 i = s.rindex('/') 129 where, name = s[:i], s[(i+1):] 130 if where == '': where = '/' 131 return where, name
132 _splitpath = staticmethod(_splitpath) 133
134 - def set_attr(self, obj, attr, value):
135 if isinstance(obj, tables.Group): 136 obj._f_setAttr(attr, value) 137 else: 138 setattr(obj.attrs, attr, value)
139
140 - def has_attr(self, obj, attr):
141 try: 142 self.get_attr(obj, attr) 143 return True 144 except AttributeError: 145 return False
146
147 - def get_attr(self, obj, attr):
148 if isinstance(obj, tables.Group): 149 return obj._f_getAttr(attr) 150 else: 151 return getattr(obj.attrs, attr)
152
153 - def get_path(self, path):
154 return self.file.getNode(path)
155
156 - def has_path(self, path):
157 try: 158 self.file.getNode(path) 159 return True 160 except NoSuchNodeError: 161 return False
162
163 - def save_array(self, path, data):
164 where, name = self._splitpath(path) 165 type_ = type(data) 166 167 if type_ in (tuple, list, str): 168 if len(data) == 0: 169 array = self.file.createArray( 170 where, name, numpy.array([0], dtype=numpy.int8)) 171 self.set_attr(array, 'empty', 1) 172 return array 173 elif type_ in (tuple, list): 174 btype = type(data[0]) 175 if not btype in (int, float, complex): 176 raise TypeError 177 for item in data: 178 if type(item) != btype: 179 raise TypeError 180 if type_ is str: 181 # FIXME: pytables chops off NULs from strings! 182 # protect via encoding in 8-bytes 183 return self.file.createArray(where, name, numpy.fromstring( 184 data, dtype=self.type_map.get(str, numpy.uint8))) 185 return self.file.createArray(where, name, numpy.array( 186 data, dtype=self.type_map.get(btype))) 187 elif type_ in (int, float, complex): 188 return self.file.createArray(where, name, numpy.array( 189 data, dtype=self.type_map.get(type_))) 190 elif type_ in (long,): 191 return self.file.createArray(where, name, numpy.array( 192 data, dtype=self.type_map.get(type_, numpy.object_))) 193 else: 194 raise TypeError
195
196 - def save_numeric_array(self, path, data):
197 where, name = self._splitpath(path) 198 return self.file.createArray(where, name, data)
199
200 - def load_array(self, node, type_):
201 if type_ in (tuple, list, str): 202 if self.has_attr(node, 'empty'): 203 return type_() 204 else: 205 if type_ is str: 206 # FIXME: pytables chops off NULs from strings! 207 # protect via encoding in 8-bytes 208 return numpy.asarray(node.read()).tostring() 209 return type_(node.read()) 210 elif type_ in (int, float): 211 return type_(node.read()) 212 elif type_ is bool: 213 return type_(numpy.alltrue(node.read())) 214 elif type_ is complex: 215 data = node.read() 216 return complex(data[()]) 217 else: 218 raise TypeError()
219
220 - def new_group(self, path):
221 where, name = self._splitpath(path) 222 return self.file.createGroup(where, name)
223 224 225 ############################################################################# 226 227
228 -class Pickler(object):
229 """ 230 Pickles Python objects to a HDF5 file. 231 232 Usage: 233 1. Instantaniate 234 2. Call `dump` or `clear_memo` as necessary 235 236 You may wish to use a single instance of this class for multiple 237 objects to preserve references. It should be safe to call the `dump` 238 method multiple times, for different paths. 239 """
240 - def __init__(self, file, type_map=None):
241 self.file = _FileInterface(file, type_map) 242 243 self.paths = {} 244 self.memo = {} 245 246 self.proto = HDF5PICKLE_PROTOCOL # hard-coded 247 248 self.file.set_attr(self.file.get_path('/'), 249 'hdf5pickle_protocol', 250 HDF5PICKLE_PROTOCOL)
251
252 - def _keep_alive(self, obj):
253 self.memo[id(obj)] = obj
254
255 - def clear_memo(self):
256 self.paths = {} 257 self.memo = {}
258
259 - def dump(self, path, obj):
260 self._save(path, obj)
261
262 - def _save(self, path, obj):
263 x = self.paths.get(id(obj)) 264 if x: 265 self._save_ref(path, x) 266 return 267 else: 268 self.paths[id(obj)] = path 269 270 self._keep_alive(obj) 271 272 # Check if we have a dispatch for it 273 t = type(obj) 274 f = self._dispatch.get(t) 275 if f: 276 x = f(self, path, obj) 277 return 278 279 # Check for a class with a custom metaclass; treat as regular class 280 try: 281 issc = issubclass(t, TypeType) 282 except TypeError: # t is not a class (old Boost; see SF #502085) 283 issc = 0 284 if issc: 285 self._save_global(path, obj) 286 return 287 288 # Check copy_reg.dispatch_table 289 reduce = dispatch_table.get(t) 290 if reduce: 291 rv = reduce(obj) 292 else: 293 # Check for a __reduce_ex__ method, fall back to __reduce__ 294 reduce = getattr(obj, "__reduce_ex__", None) 295 if reduce: 296 rv = reduce(2) # "protocol 2" 297 else: 298 reduce = getattr(obj, "__reduce__", None) 299 if reduce: 300 rv = reduce() 301 else: 302 raise PicklingError("Can't pickle %r object: %r" % 303 (t.__name__, obj)) 304 305 # Check for string returned by reduce(), meaning "save as global" 306 if type(rv) is StringType: 307 self._save_global(path, obj, rv) 308 return 309 310 # Assert that reduce() returned a tuple 311 if type(rv) is not TupleType: 312 raise PicklingError("%s must return string or tuple" % reduce) 313 314 # Assert that it returned an appropriately sized tuple 315 l = len(rv) 316 if not (2 <= l <= 5): 317 raise PicklingError("Tuple returned by %s must have " 318 "two to five elements" % reduce) 319 320 # Save the reduce() output and finally memoize the object 321 self._save_reduce(path, obj=obj, *rv)
322 323 _dispatch = {} 324
325 - def _save_ref(self, path, objpath):
326 group = self.file.new_group(path) 327 self.file.set_attr(group, 'target', objpath) 328 self.file.set_attr(group, 'pickletype', REF)
329
330 - def _save_reduce(self, path, func, args, state=None, 331 listitems=None, dictitems=None, obj=None):
332 # This API is called by some subclasses 333 334 # Assert that args is a tuple or None 335 if not isinstance(args, TupleType): 336 if args is None: 337 # A hack for Jim Fulton's ExtensionClass, now deprecated. 338 # See load_reduce() 339 warnings.warn("__basicnew__ special case is deprecated", 340 DeprecationWarning) 341 else: 342 raise PicklingError( 343 "args from reduce() should be a tuple") 344 345 # Assert that func is callable 346 if not callable(func): 347 raise PicklingError("func from reduce should be callable") 348 349 group = self.file.new_group(path) 350 self.file.new_group(path + '/__') 351 352 self.file.set_attr(group, 'pickletype', REDUCE) 353 354 # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ 355 if getattr(func, "__name__", "") == "__newobj__": 356 # A __reduce__ implementation can direct protocol 2 to 357 # use the more efficient NEWOBJ opcode, while still 358 # allowing protocol 0 and 1 to work normally. For this to 359 # work, the function returned by __reduce__ should be 360 # called __newobj__, and its first argument should be a 361 # new-style class. The implementation for __newobj__ 362 # should be as follows, although pickle has no way to 363 # verify this: 364 # 365 # def __newobj__(cls, *args): 366 # return cls.__new__(cls, *args) 367 # 368 # Protocols 0 and 1 will pickle a reference to __newobj__, 369 # while protocol 2 (and above) will pickle a reference to 370 # cls, the remaining args tuple, and the NEWOBJ code, 371 # which calls cls.__new__(cls, *args) at unpickling time 372 # (see load_newobj below). If __reduce__ returns a 373 # three-tuple, the state from the third tuple item will be 374 # pickled regardless of the protocol, calling __setstate__ 375 # at unpickling time (see load_build below). 376 # 377 # Note that no standard __newobj__ implementation exists; 378 # you have to provide your own. This is to enforce 379 # compatibility with Python 2.2 (pickles written using 380 # protocol 0 or 1 in Python 2.3 should be unpicklable by 381 # Python 2.2). 382 cls = args[0] 383 if not hasattr(cls, "__new__"): 384 raise PicklingError( 385 "args[0] from __newobj__ args has no __new__") 386 if obj is not None and cls is not obj.__class__: 387 raise PicklingError( 388 "args[0] from __newobj__ args has the wrong class") 389 args = args[1:] 390 391 self._save('%s/__/cls' % path, cls) 392 self._save('%s/__/args' % path, args) 393 else: 394 self._save('%s/__/func' % path, func) 395 self._save('%s/__/args' % path, args) 396 397 if obj is not None: 398 self._keep_alive(obj) 399 400 if listitems is not None: 401 self._save('%s/__/listitems' % path, list(listitems)) 402 403 if dictitems is not None: 404 self._save('%s/__/dictitems' % path, dict(dictitems)) 405 406 if state is not None: 407 self.file.set_attr(group, 'has_reduce_content', 1) 408 if isinstance(state, dict): 409 self._save_dict_content(path, state) 410 self._keep_alive(state) 411 else: 412 self._save('%s/__/content' % path, state)
413
414 - def _save_none(self, path, obj):
415 array = self.file.save_array(path, 0) 416 self.file.set_attr(array, 'pickletype', NONE)
417 _dispatch[NoneType] = _save_none 418
419 - def _save_bool(self, path, obj):
420 array = self.file.save_array(path, int(obj)) 421 self.file.set_attr(array, 'pickletype', BOOL)
422 _dispatch[bool] = _save_bool 423
424 - def _save_int(self, path, obj):
425 array = self.file.save_array(path, obj) 426 self.file.set_attr(array, 'pickletype', INT)
427 _dispatch[IntType] = _save_int 428
429 - def _save_long(self, path, obj):
430 array = self.file.save_array(path, str(encode_long(obj))) 431 self.file.set_attr(array, 'pickletype', LONG)
432 _dispatch[LongType] = _save_long 433
434 - def _save_float(self, path, obj):
435 array = self.file.save_array(path, obj) 436 self.file.set_attr(array, 'pickletype', FLOAT)
437 _dispatch[FloatType] = _save_float 438
439 - def _save_complex(self, path, obj):
440 array = self.file.save_array(path, obj) 441 self.file.set_attr(array, 'pickletype', COMPLEX)
442 _dispatch[ComplexType] = _save_complex 443
444 - def _save_string(self, path, obj):
445 node = self.file.save_array(path, obj) 446 self.file.set_attr(node, 'pickletype', STRING)
447 _dispatch[StringType] = _save_string 448
449 - def _save_unicode(self, path, obj):
450 node = self.file.save_array(path, obj.encode('utf-8')) 451 self.file.set_attr(node, 'pickletype', UNICODE)
452 _dispatch[UnicodeType] = _save_unicode 453
454 - def _save_tuple(self, path, obj):
455 try: 456 array = self.file.save_array(path, obj) 457 self.file.set_attr(array, 'pickletype', TUPLE) 458 return array 459 except TypeError: 460 pass 461 462 group = self.file.new_group(path) 463 self.file.set_attr(group, 'pickletype', TUPLE) 464 for i, item in enumerate(obj): 465 self._save('%s/_%d' % (path, i), item) 466 return group
467 _dispatch[TupleType] = _save_tuple 468
469 - def _save_list(self, path, obj):
470 item = self._save_tuple(path, obj) 471 self.file.set_attr(item, 'pickletype', LIST)
472 _dispatch[ListType] = _save_list 473
474 - def _save_dict(self, path, obj):
475 group = self.file.new_group(path) 476 self.file.set_attr(group, 'pickletype', DICT) 477 self._save_dict_content(path, obj)
478
479 - def _save_dict_content(self, path, obj):
480 strkeys = {} 481 seen = {} 482 keyi = 0 483 for key in obj.iterkeys(): 484 if (isinstance(key, str) and _check_pytables_name(key) 485 and key != "__"): 486 strkeys[key] = key 487 seen[key] = True 488 for key in obj.iterkeys(): 489 if not key in strkeys: 490 while ("_%d" % keyi) in seen: keyi += 1 491 strkeys[key] = "_%d" % keyi 492 seen[strkeys[key]] = True 493 494 hassub = self.file.has_path('%s/__' % path) 495 496 for key, value in obj.iteritems(): 497 self._save('/'.join([path, strkeys[key]]), value) 498 if not strkeys[key] is key: 499 if not hassub: 500 self.file.new_group('%s/__' % path) 501 hassub = True 502 self._save('%s/__/%s' % (path, strkeys[key]), key)
503 504 _dispatch[DictionaryType] = _save_dict 505 if not PyStringMap is None: 506 _dispatch[PyStringMap] = _save_dict 507
508 - def _save_inst(self, path, obj):
509 cls = obj.__class__ 510 511 if hasattr(obj, '__getinitargs__'): 512 args = obj.__getinitargs__() 513 len(args) # XXX Assert it's a sequence 514 else: 515 args = () 516 517 try: 518 getstate = obj.__getstate__ 519 except AttributeError: 520 stuff = obj.__dict__ 521 else: 522 stuff = getstate() 523 524 group = self.file.new_group(path) 525 self.file.set_attr(group, 'pickletype', INST) 526 527 self.file.new_group("%s/__" % path) 528 self._save('%s/__/cls' % path, cls) 529 self._save('%s/__/args' % path, args) 530 531 if isinstance(stuff, dict): 532 self._save_dict_content(path, stuff) 533 self._keep_alive(stuff) 534 else: 535 self._save('%s/__/content' % path, stuff)
536 _dispatch[InstanceType] = _save_inst 537
538 - def _save_global(self, path, obj, name=None, pack=struct.pack):
539 if name is None: 540 name = obj.__name__ 541 542 module = getattr(obj, "__module__", None) 543 if module is None: 544 module = whichmodule(obj, name) 545 546 try: 547 __import__(module) 548 mod = sys.modules[module] 549 klass = getattr(mod, name) 550 except (ImportError, KeyError, AttributeError): 551 raise PicklingError( 552 "Can't pickle %r: it's not found as %s.%s" % 553 (obj, module, name)) 554 else: 555 if klass is not obj: 556 raise PicklingError( 557 "Can't pickle %r: it's not the same object as %s.%s" % 558 (obj, module, name)) 559 560 pickletype = None 561 562 code = _extension_registry.get((module, name)) 563 if code: 564 assert code > 0 565 pickletype = EXT4 566 stuff = pack("<i", code) 567 568 if not pickletype: 569 stuff = module + '\n' + name 570 pickletype = GLOBAL 571 572 array = self.file.save_array(path, str(stuff)) 573 self.file.set_attr(array, 'pickletype', pickletype)
574 575 _dispatch[ClassType] = _save_global 576 _dispatch[FunctionType] = _save_global 577 _dispatch[BuiltinFunctionType] = _save_global 578 _dispatch[TypeType] = _save_global 579
580 - def _save_numeric_array(self, path, obj):
581 if not NumericArrayType_native: 582 obj = numpy.asarray(obj) 583 array = self.file.save_numeric_array(path, obj) 584 self.file.set_attr(array, 'pickletype', NUMERIC) 585 return array
586 _dispatch[NumericArrayType] = _save_numeric_array 587
588 - def _save_numpy_array(self, path, obj):
589 if not NumpyArrayType_native: 590 obj = numpy.asarray(obj) 591 array = self.file.save_numeric_array(path, obj) 592 self.file.set_attr(array, 'pickletype', NUMPY) 593 return array
594 _dispatch[NumpyArrayType] = _save_numpy_array 595
596 - def _save_numarray_array(self, path, obj):
597 if not NumarrayArrayType_native: 598 obj = numpy.asarray(obj) 599 array = self.file.save_numeric_array(path, obj) 600 self.file.set_attr(array, 'pickletype', NUMARRAY) 601 return array
602 _dispatch[NumarrayArrayType] = _save_numarray_array
603 604 605 ############################################################################# 606 607
608 -class Unpickler(object):
609 """ 610 Unpickles Python objects from a HDF5 file. 611 612 Usage: 613 1. Instantaniate 614 2. Call `load` or `clear_memo` as needed 615 616 You may wish to use a single instance of this class for multiple 617 objects to preserve references. It should be safe to call the `load` 618 method multiple times, for different paths. 619 """
620 - def __init__(self, file, type_map=None):
621 self.file = _FileInterface(file, type_map=None) 622 self.memo = {}
623
624 - def clear_memo(self):
625 self.memo = {}
626
627 - def load(self, path):
628 if not path in self.memo: 629 node = self.file.get_path(path) 630 key = self.file.get_attr(node, 'pickletype') 631 if key: 632 f = self._dispatch[key] 633 obj = f(self, node) 634 else: 635 obj = node.read() 636 self.memo[path] = obj 637 return self.memo[path]
638 639 _dispatch = {} 640
641 - def _load_ref(self, node):
642 path = self.file.get_attr(node, 'target') 643 return self.load(path)
644 _dispatch[REF] = _load_ref 645
646 - def _load_reduce(self, node):
647 path = node._v_pathname 648 args = self.load('%s/__/args' % path) 649 650 if self.file.has_path('%s/__/func' % path): 651 func = self.load('%s/__/func' % path) 652 653 if args is None: 654 warnings.warn("__basicnew__ special case is deprecated", 655 DeprecationWarning) 656 obj = func.__basicnew__() 657 else: 658 obj = func(*args) 659 else: 660 cls = self.load('%s/__/cls' % path) 661 obj = cls.__new__(cls, *args) 662 663 self.memo[path] = obj 664 665 if self.file.has_path('%s/__/listitems' % path): 666 data = self.load('%s/__/listitems' % path) 667 obj.extend(data) 668 669 if self.file.has_path('%s/__/dictitems' % path): 670 data = self.load('%s/__/dictitems' % path) 671 for key, value in data.iteritems(): 672 obj[key] = value 673 674 if self.file.has_path('%s/__/content' % path): 675 state = self.load('%s/__/content' % path) 676 if state is not None: 677 self._setstate(obj, state) 678 elif self.file.has_attr(node, 'has_reduce_content'): 679 state = {} 680 state = self._load_dict_content(node, state) 681 self._setstate(obj, state) 682 return obj
683 _dispatch[REDUCE] = _load_reduce 684
685 - def _load_none(self, node):
686 return None
687 _dispatch[NONE] = _load_none 688
689 - def _load_bool(self, node):
690 return self.file.load_array(node, bool)
691 _dispatch[BOOL] = _load_bool 692
693 - def _load_int(self, node):
694 return self.file.load_array(node, int)
695 _dispatch[INT] = _load_int 696
697 - def _load_long(self, node):
698 data = self.file.load_array(node, str) 699 return decode_long(data)
700 _dispatch[LONG] = _load_long 701
702 - def _load_float(self, node):
703 return self.file.load_array(node, float)
704 _dispatch[FLOAT] = _load_float 705
706 - def _load_complex(self, node):
707 return self.file.load_array(node, complex)
708 _dispatch[COMPLEX] = _load_complex 709
710 - def _load_string(self, node):
711 return self.file.load_array(node, str)
712 _dispatch[STRING] = _load_string 713
714 - def _load_unicode(self, node):
715 data = self.file.load_array(node, str) 716 return data.decode('utf-8')
717 _dispatch[UNICODE] = _load_unicode 718
719 - def _load_list_content(self, node):
720 if isinstance(node, tables.Array): 721 return self.file.load_array(node, list) 722 723 items = [] 724 self.memo[node._v_pathname] = items # avoid infinite loop 725 726 def cmpfunc(a, b): 727 c = len(a) - len(b) 728 if c == 0: 729 c = cmp(a, b) 730 return c
731 732 names = list(node._v_children) 733 names.sort(cmpfunc) 734 735 for name in names: 736 items.append(self.load('%s/%s' % (node._v_pathname, name))) 737 738 return items
739
740 - def _load_tuple(self, node):
741 return tuple(self._load_list_content(node))
742 _dispatch[TUPLE] = _load_tuple 743
744 - def _load_list(self, node):
745 return self._load_list_content(node)
746 _dispatch[LIST] = _load_list 747
748 - def _load_dict(self, node):
749 path = node._v_pathname 750 data = {} 751 self.memo[path] = data 752 return self._load_dict_content(node, data)
753
754 - def _load_dict_content(self, node, data):
755 path = node._v_pathname 756 strkeys = {} 757 758 if '__' in node._v_children: 759 n2 = node._v_children['__'] 760 for name in n2._v_children: 761 if name.startswith('_'): 762 strkeys[name] = self.load('%s/__/%s' % (path, name)) 763 764 for key in node._v_children: 765 if key == '__': continue 766 767 if key in strkeys: 768 realkey = strkeys[key] 769 else: 770 realkey = key 771 772 data[realkey] = self.load('%s/%s' % (path, key)) 773 774 return data
775 _dispatch[DICT] = _load_dict 776 777 # INST and OBJ differ only in how they get a class object. It's not 778 # only sensible to do the rest in a common routine, the two routines 779 # previously diverged and grew different bugs. 780 # klass is the class to instantiate, and k points to the topmost mark 781 # object, following which are the arguments for klass.__init__.
782 - def _instantiate(self, klass, args):
783 instantiated = 0 784 if (not args and type(klass) is ClassType and 785 not hasattr(klass, "__getinitargs__")): 786 try: 787 value = _EmptyClass() 788 value.__class__ = klass 789 instantiated = 1 790 except RuntimeError: 791 # In restricted execution, assignment to inst.__class__ is 792 # prohibited 793 pass 794 if not instantiated: 795 try: 796 value = klass(*args) 797 except TypeError, err: 798 raise TypeError, "in constructor for %s: %s" % ( 799 klass.__name__, str(err)), sys.exc_info()[2] 800 return value
801
802 - def _load_inst(self, node):
803 path = node._v_pathname 804 805 cls = self.load('%s/__/cls' % path) 806 args = self.load('%s/__/args' % path) 807 808 inst = self._instantiate(cls, args) 809 810 self.memo[path] = inst 811 812 if self.file.has_path('%s/__/content' % path): 813 state = self.load('%s/__/content' % path) 814 else: 815 state = {} 816 state = self._load_dict_content(node, state) 817 self._setstate(inst, state) 818 819 return inst
820 _dispatch[INST] = _load_inst 821
822 - def _setstate(self, inst, state):
823 setstate = getattr(inst, "__setstate__", None) 824 if setstate: 825 setstate(state) 826 return 827 828 slotstate = None 829 if isinstance(state, tuple) and len(state) == 2: 830 state, slotstate = state 831 832 if state: 833 try: 834 inst.__dict__.update(state) 835 except RuntimeError: 836 # XXX In restricted execution, the instance's __dict__ 837 # is not accessible. Use the old way of unpickling 838 # the instance variables. This is a semantic 839 # difference when unpickling in restricted 840 # vs. unrestricted modes. 841 # Note, however, that cPickle has never tried to do the 842 # .update() business, and always uses 843 # PyObject_SetItem(inst.__dict__, key, value) in a 844 # loop over state.items(). 845 for k, v in state.items(): 846 setattr(inst, k, v) 847 if slotstate: 848 for k, v in slotstate.items(): 849 setattr(inst, k, v)
850
851 - def _load_global(self, node):
852 data = self.file.load_array(node, str) 853 module, name = data.split('\n') 854 return self._find_class(module, name)
855 _dispatch[GLOBAL] = _load_global 856
857 - def _load_ext(self, node):
858 data = self.file.load_array(node, str) 859 code = marshal.loads('i' + data) 860 return self._get_extension(code)
861 _dispatch[EXT4] = _load_ext 862
863 - def _load_numeric_array(self, node):
864 import Numeric 865 return Numeric.asarray(node.read())
866 _dispatch[NUMERIC] = _load_numeric_array 867
868 - def _load_numpy_array(self, node):
869 import numpy 870 return numpy.asarray(node.read())
871 _dispatch[NUMPY] = _load_numpy_array 872
873 - def _load_numarray_array(self, node):
874 import numarray 875 return numarray.asarray(node.read())
876 _dispatch[NUMARRAY] = _load_numarray_array 877
878 - def _get_extension(self, code):
879 nil = [] 880 obj = _extension_cache.get(code, nil) 881 if obj is not nil: 882 self.append(obj) 883 return 884 key = _inverted_registry.get(code) 885 if not key: 886 raise ValueError("unregistered extension code %d" % code) 887 obj = self._find_class(*key) 888 _extension_cache[code] = obj 889 return obj
890
891 - def _find_class(self, module, name):
892 # Subclasses may override this 893 __import__(module) 894 mod = sys.modules[module] 895 klass = getattr(mod, name) 896 return klass
897 898 899 ############################################################################# 900 901
902 -class _EmptyClass:
903 pass
904 905 pythonIdRE = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$') 906 reservedIdRE = re.compile('^_[cfgv]_')
907 -def _checkNameValidity(name):
908 """ 909 Check the validity of the `name` of a PyTables object, 910 so that PyTables won't spew warnings or exceptions... 911 """ 912 if not isinstance(name, basestring): # Python >= 2.3 913 raise TypeError() 914 if name == '': 915 raise ValueError() 916 if name == '.': 917 raise ValueError() 918 if '/' in name: 919 raise ValueError() 920 if not pythonIdRE.match(name): 921 raise ValueError() 922 if keyword.iskeyword(name): 923 raise ValueError() 924 if reservedIdRE.match(name): 925 raise ValueError()
926
927 -def _check_pytables_name(key):
928 try: 929 _checkNameValidity(key) 930 return True 931 except: 932 return False
933 934 935 ############################################################################# 936 937
938 -def dump(obj, file, path, type_map=None):
939 """ 940 Dump a Python object to an open PyTables HDF5 file. 941 942 :param obj: the object to dump 943 :param file: where to dump 944 :type file: tables.File 945 :param path: path where to dump in the file 946 :param type_map: 947 mapping of Python basic types (str, int, ...) to numpy types. 948 If ``None``, numpy's default mapping is used. 949 """ 950 Pickler(file, type_map=type_map).dump(path, obj)
951
952 -def load(file, path):
953 """ 954 Load a Python object from an open PyTables HDF5 file. 955 956 :param file: where to load from 957 :type file: tables.File 958 :param path: path to the object in the file 959 960 :return: loaded object 961 """ 962 return Unpickler(file).load(path)
963
964 -def dump_many(file, desc, type_map=None):
965 """ 966 Dump multiple Python objects to an open PyTables HDF5 file, 967 preserving any references between the objects. 968 969 Calling `dump(file, path)` many times for objects keeping references 970 to each other would result in duplicated data. 971 972 :param file: where to dump 973 :type file: tables.File 974 :param desc: a list of (path, obj) 975 :param type_map: 976 mapping of Python basic types (str, int, ...) to numpy types. 977 If ``None``, numpy's default mapping is used. 978 """ 979 p = Pickler(file, type_map=type_map) 980 for path, obj in desc: 981 p.dump(path, obj)
982
983 -def load_many(file, paths):
984 """ 985 Load multiple Python objects from the file, preserving any 986 references between them. 987 988 Calling `load(file, path)` many times for objects keeping references 989 to each other would result to duplicated data. 990 991 :param file: where to dump 992 :type file: tables.File 993 :param paths: a list of paths where to load from 994 995 :return: list of (path, object) 996 """ 997 p = Unpickler(file) 998 r = [] 999 for path in paths: 1000 obj = p.load(path) 1001 r.append( (path, obj) ) 1002 return r
1003