Source code for slurmjobs.receipt

import os
import time
import hashlib
import functools
import pprint
import json


[docs]class Receipt: '''Make a receipt for a function call. This allows you skip over a function if it was successfully ran. This is useful if a long script fails in the middle and you want to re-run it, but you don't need to re-run the first part. This will cache the function execution history using the string representation of the function's arguments. This works well as 99% of things have some sort of string representation, however, if you have a string representation that doesn't stay consistent, like an object that prints out its ID, then the receipt won't work. To resolve this, you can either: * subclass this method and override ``Receipt.hash_args(*a, **kw)`` to return something invariant for your * Provide your own ``receipt_id`` to the function call. * submit a PR for a better hash function! .. code-block:: python receipt = Receipt(func.__name__, *a, **kw) if not receipt.exists(): try: do_something() receipt.make() # yay we can skip it next time! except Exception: """Oh well. It failed, but we'll just try again next time.""" else: """Oh good it ran completely last time so we can skip it and move to the next one.""" ''' ROOT_DIR = './receipts' TEST = False def __init__(self, name='', *a, receipt_id=None, __dir__=None, **kw): if callable(name): name = getattr(name, '__qualname__') or getattr(name, '__name__') assert name or a or kw, 'you must pass some identifiable information to be used for a hash.' self.name = name self.id = '{}{}'.format(name or '', receipt_id or self.hash_args(*a, **kw)) self.ROOT_DIR = __dir__ or self.ROOT_DIR self.fname = os.path.join(self.ROOT_DIR, self.id) def __str__(self): return '<Receipt exists={} file={}>'.format( self.exists, self.fname)
[docs] def hash_args(self, *a, **kw): '''Take the function arguments and return a hash string for them.''' return hashlib.md5(( str(a) + str(sorted(kw.items())) ).encode()).hexdigest()
@property def exists(self): return os.path.isfile(self.fname) def make(self, **meta): os.makedirs(self.ROOT_DIR, exist_ok=True) with open(self.fname, 'w') as f: os.utime(self.fname) try: json.dump(meta, f) except Exception as e: json.dump({ 'error': type(e).__name__, 'description': str(e), 'data': str(meta) }, f) def clear(self): if self.exists: os.remove(self.fname) @property def meta(self): if os.path.isfile(self.fname): with open(self.fname, 'r') as f: try: s = f.read() return json.loads(s) if s else {} except json.decoder.JSONDecodeError as e: print('error:', e, s) return s
def _fallbacks(*xs): for x in xs: if x is not None: return x
[docs]def use_receipt(func, receipt_dir=None, test=None): '''Use a receipt for a function call, which lets us skip a result if the function completed successfully the last run. This is just a wrapper around ``Receipt`` that handles the receipt checking/making logic for you. .. code-block:: python # do step 1. If it already ran successfully it will skip and do nothing. use_receipt(my_step1_function)(**step1_kwargs) # do step 2. here we're passing a custom receipt ID custom_receipt_id = ... use_receipt(my_step2_function)(**step2_kwargs, receipt_id=custom_receipt_id) # do step 3 use_receipt(my_step3_function)(**step3_kwargs) ''' @functools.wraps(func) def inner(*a, overwrite_=False, test=None, receipt_dir_=None, **kw): test = _fallbacks(test, inner.TEST, Receipt.TEST) receipt_dir_ = _fallbacks(receipt_dir_, inner.ROOT_DIR, Receipt.ROOT_DIR) r = Receipt(func, *a, __dir__=receipt_dir_, **kw) name = r.name if test: print(''' ------------------------ -- Test Run -- Function: {} Receipt: {} *args: {} **kwargs: {} ------------------------ '''.format(name, r, pprint.pformat(a), pprint.pformat(kw))) return if overwrite_ or not r.exists: start_time = time.time() try: result = func(*a, **kw) except BaseException as e: r.clear() print(''' ------------------------ -- Error during receipted function {}. -- Receipt: {} Error: ({}) {} No receipt is written. ------------------------ '''.format(name, r, type(e).__name__, e)) raise r.make(duration_secs=time.time() - start_time, time=time.time()) print(''' ------------------------ -- Receipt written for {} -- Receipt: {} Took: {} seconds. ------------------------ '''.format(name, r, (r.meta or {}).get('duration_secs'))) return result else: print(''' ------------------------ -- Receipt exists for {} -- Receipt: {} Skipping. ------------------------ '''.format(name, r, (r.meta or {}).get('duration_secs'))) inner.TEST, inner.ROOT_DIR = test, receipt_dir return inner
# so that setting attributes will set receipt instead class _DeprecatedSetAttr: func = None def __init__(self, func): self.func = func functools.update_wrapper(self, func) def __call__(self, *a, **kw): return self.func(*a, **kw) def __setattr__(self, k, v): print(k, v, k not in self.__dict__) if k not in self.__dict__: setattr(Receipt, k, v) return super().__setattr__(k, v)