chainsyncer

Blockchain syncer driver
Log | Files | Refs | LICENSE

commit 059f585efe109d363aed3e2669e7cb4a4fc3ce0e
parent 07685134c16e2772be38df30fce6ecb5056e2e7f
Author: nolash <dev@holbrook.no>
Date:   Fri, 27 Aug 2021 12:23:11 +0200

Complete docstrings and cleanup of chainsyncer backends

Diffstat:
Mchainsyncer/backend/base.py | 25++++++++++++++++++++-----
Mchainsyncer/backend/file.py | 170+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Mchainsyncer/backend/memory.py | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Mchainsyncer/backend/sql.py | 92+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
4 files changed, 300 insertions(+), 68 deletions(-)

diff --git a/chainsyncer/backend/base.py b/chainsyncer/backend/base.py @@ -1,12 +1,18 @@ # standard imports import logging -logg = logging.getLogger().getChild(__name__) +logg = logging.getLogger(__name__) class Backend: + """Base class for syncer state backend. - def __init__(self, flags_reversed=False): + :param flags_reversed: If set, filter flags are interpreted from left to right + :type flags_reversed: bool + """ + + def __init__(self, object_id, flags_reversed=False): + self.object_id = object_id self.filter_count = 0 self.flags_reversed = flags_reversed @@ -19,9 +25,17 @@ class Backend: self.block_height_target = 0 self.tx_index_target = 0 - def check_filter(self, n, flags): + """Check whether an individual filter flag is set. + + :param n: Bit index + :type n: int + :param flags: Bit field to check against + :type flags: int + :rtype: bool + :returns: True if set + """ if self.flags_reversed: try: v = 1 << flags.bit_length() - 1 @@ -34,12 +48,13 @@ class Backend: def chain(self): - """Returns chain spec for syncer + """Returns chain spec for syncer. :returns: Chain spec :rtype chain_spec: cic_registry.chain.ChainSpec """ return self.chain_spec + def __str__(self): - return "syncerbackend chain {} start {} target {}".format(self.chain(), self.start(), self.target()) + return "syncerbackend {} chain {} start {} target {}".format(self.object_id, self.chain(), self.start(), self.target()) diff --git a/chainsyncer/backend/file.py b/chainsyncer/backend/file.py @@ -9,23 +9,54 @@ from .base import Backend logg = logging.getLogger().getChild(__name__) -base_dir = '/var/lib' +BACKEND_BASE_DIR = '/var/lib' -def chain_dir_for(chain_spec, base_dir=base_dir): +def chain_dir_for(chain_spec, base_dir=BACKEND_BASE_DIR): + """Retrieve file backend directory for the given chain spec. + + :param chain_spec: Chain spec context of backend + :type chain_spec: chainlib.chain.ChainSpec + :param base_dir: Base directory to use for generation. Default is value of BACKEND_BASE_DIR + :type base_dir: str + :rtype: str + :returns: Absolute path of chain backend directory + """ base_data_dir = os.path.join(base_dir, 'chainsyncer') return os.path.join(base_data_dir, str(chain_spec).replace(':', '/')) -def data_dir_for(chain_spec, object_id, base_dir=base_dir): +def data_dir_for(chain_spec, object_id, base_dir=BACKEND_BASE_DIR): + """Retrieve file backend directory for the given syncer. + + :param chain_spec: Chain spec context of backend + :type chain_spec: chainlib.chain.ChainSpec + :param object_id: Syncer id + :type object_id: str + :param base_dir: Base directory to use for generation. Default is value of BACKEND_BASE_DIR + :type base_dir: str + :rtype: str + :returns: Absolute path of chain backend directory + """ chain_dir = chain_dir_for(chain_spec, base_dir=base_dir) return os.path.join(chain_dir, object_id) class FileBackend(Backend): + """Filesystem backend implementation for syncer state. + + FileBackend uses reverse order of filter flags. + + :param chain_spec: Chain spec for the chain that syncer is running for. + :type chain_spec: cic_registry.chain.ChainSpec + :param object_id: Unique id for the syncer session. + :type object_id: str + :param base_dir: Base directory to use for generation. Default is value of BACKEND_BASE_DIR + :type base_dir: str + """ - def __init__(self, chain_spec, object_id=None, base_dir=base_dir): - super(FileBackend, self).__init__(flags_reversed=True) + def __init__(self, chain_spec, object_id, base_dir=BACKEND_BASE_DIR): + super(FileBackend, self).__init__(object_id, flags_reversed=True) self.object_data_dir = data_dir_for(chain_spec, object_id, base_dir=base_dir) self.object_id = object_id @@ -42,7 +73,16 @@ class FileBackend(Backend): @staticmethod - def create_object(chain_spec, object_id=None, base_dir=base_dir): + def create_object(chain_spec, object_id=None, base_dir=BACKEND_BASE_DIR): + """Creates a new syncer session at the given backend destination. + + :param chain_spec: Chain spec for the chain that syncer is running for. + :type chain_spec: cic_registry.chain.ChainSpec + :param object_id: Unique id for the syncer session. + :type object_id: str + :param base_dir: Base directory to use for generation. Default is value of BACKEND_BASE_DIR + :type base_dir: str + """ if object_id == None: object_id = str(uuid.uuid4()) @@ -89,6 +129,11 @@ class FileBackend(Backend): def load(self): + """Loads the state of the syncer at the given location of the instance. + + :raises FileNotFoundError: Invalid data directory + :raises IsADirectoryError: Invalid data directory + """ offset_path = os.path.join(self.object_data_dir, 'offset') f = open(offset_path, 'rb') b = f.read(16) @@ -130,6 +175,10 @@ class FileBackend(Backend): def connect(self): + """Proxy for chainsyncer.backend.file.FileBackend.load that performs a basic sanity check for instance's backend location. + + :raises ValueError: Sanity check failed + """ object_path = os.path.join(self.object_data_dir, 'object_id') f = open(object_path, 'r') object_id = f.read() @@ -141,23 +190,46 @@ class FileBackend(Backend): def disconnect(self): + """FileBackend applies no actual connection, so this is noop + """ pass def purge(self): + """Remove syncer state from backend. + """ shutil.rmtree(self.object_data_dir) def get(self): + """Get the current state of the syncer cursor. + + :rtype: tuple + :returns: Block height / tx index tuple, and filter flags value + """ logg.debug('filter {}'.format(self.filter.hex())) return ((self.block_height_cursor, self.tx_index_cursor), self.get_flags()) def get_flags(self): + """Get canonical representation format of flags. + + :rtype: int + :returns: Filter flag bitfield value + """ return int.from_bytes(self.filter, 'little') def set(self, block_height, tx_index): + """Update the state of the syncer cursor. + + :param block_height: New block height + :type block_height: int + :param tx_height: New transaction height in block + :type tx_height: int + :returns: Block height / tx index tuple, and filter flags value + :rtype: tuple + """ self.__set(block_height, tx_index, 'cursor') # cursor_path = os.path.join(self.object_data_dir, 'filter') @@ -188,7 +260,21 @@ class FileBackend(Backend): @staticmethod - def initial(chain_spec, target_block_height, start_block_height=0, base_dir=base_dir): + def initial(chain_spec, target_block_height, start_block_height=0, base_dir=BACKEND_BASE_DIR): + """Creates a new syncer session and commit its initial state to backend. + + :param chain_spec: Chain spec of chain that syncer is running for. + :type chain_spec: cic_registry.chain.ChainSpec + :param target_block_height: Target block height + :type target_block_height: int + :param start_block_height: Start block height + :type start_block_height: int + :param base_dir: Base directory to use for generation. Default is value of BACKEND_BASE_DIR + :type base_dir: str + :raises ValueError: Invalid start/target specification + :returns: New syncer object + :rtype: cic_eth.db.models.BlockchainSync + """ if start_block_height >= target_block_height: raise ValueError('start block height must be lower than target block height') @@ -203,7 +289,18 @@ class FileBackend(Backend): @staticmethod - def live(chain_spec, block_height, base_dir=base_dir): + def live(chain_spec, block_height, base_dir=BACKEND_BASE_DIR): + """Creates a new open-ended syncer session starting at the given block height. + + :param chain: Chain spec of chain that syncer is running for. + :type chain: cic_registry.chain.ChainSpec + :param block_height: Start block height + :type block_height: int + :param base_dir: Base directory to use for generation. Default is value of BACKEND_BASE_DIR + :type base_dir: str + :returns: "Live" syncer object + :rtype: cic_eth.db.models.BlockchainSync + """ uu = FileBackend.create_object(chain_spec, base_dir=base_dir) o = FileBackend(chain_spec, uu, base_dir=base_dir) o.__set(block_height, 0, 'offset') @@ -213,15 +310,26 @@ class FileBackend(Backend): def target(self): + """Get the target state (upper bound of sync) of the syncer cursor. + + :returns: Block height and filter flags value + :rtype: tuple + """ + return (self.block_height_target, 0,) def start(self): + """Get the initial state of the syncer cursor. + + :returns: Block height / tx index tuple, and filter flags value + :rtype: tuple + """ return ((self.block_height_offset, self.tx_index_offset), 0,) @staticmethod - def __sorted_entries(chain_spec, base_dir=base_dir): + def __sorted_entries(chain_spec, base_dir=BACKEND_BASE_DIR): chain_dir = chain_dir_for(chain_spec, base_dir=base_dir) entries = {} @@ -246,7 +354,21 @@ class FileBackend(Backend): @staticmethod - def resume(chain_spec, block_height, base_dir=base_dir): + def resume(chain_spec, block_height, base_dir=BACKEND_BASE_DIR): + """Retrieves and returns all previously unfinished syncer sessions. + + If a previous open-ended syncer is found, a new syncer will be generated to sync from where that syncer left off until the block_height given as argument. + + :param chain_spec: Chain spec of chain that syncer is running for + :type chain_spec: cic_registry.chain.ChainSpec + :param block_height: Target block height for previous live syncer + :type block_height: int + :param base_dir: Base directory to use for generation. Default is value of BACKEND_BASE_DIR + :type base_dir: str + :raises FileNotFoundError: Invalid backend location + :returns: Syncer objects of unfinished syncs + :rtype: list of cic_eth.db.models.BlockchainSync + """ try: return FileBackend.__sorted_entries(chain_spec, base_dir=base_dir) except FileNotFoundError: @@ -254,7 +376,16 @@ class FileBackend(Backend): @staticmethod - def first(chain_spec, base_dir=base_dir): + def first(chain_spec, base_dir=BACKEND_BASE_DIR): + """Returns the model object of the most recent syncer in backend. + + :param chain_spec: Chain spec of chain that syncer is running for. + :type chain_spec: cic_registry.chain.ChainSpec + :param base_dir: Base directory to use for generation. Default is value of BACKEND_BASE_DIR + :type base_dir: str + :returns: Last syncer object + :rtype: cic_eth.db.models.BlockchainSync + """ entries = [] try: entries = FileBackend.__sorted_entries(chain_spec, base_dir=base_dir) @@ -264,8 +395,13 @@ class FileBackend(Backend): # n is zero-index of bit field - def complete_filter(self, n, base_dir=base_dir): + def complete_filter(self, n, base_dir=BACKEND_BASE_DIR): + """Sets the filter at the given index as completed. + :param n: Filter index, starting at zero + :type n: int + :raises IndexError: Index is outside filter count range + """ if self.filter_count <= n: raise IndexError('index {} out of ranger for filter size {}'.format(n, self.filter_count)) @@ -286,8 +422,14 @@ class FileBackend(Backend): f.close() - # overwrites disk if manual changed members in struct def register_filter(self, name): + """Add filter to backend. + + Overwrites record on disk if manual changed members in struct + + :param name: Name of filter + :type name: str + """ filter_path = os.path.join(self.object_data_dir, 'filter') if (self.filter_count + 1) % 8 == 0: self.filter += b'\x00' @@ -308,6 +450,8 @@ class FileBackend(Backend): def reset_filter(self): + """Reset all filter states. + """ self.filter = b'\x00' * len(self.filter) cursor_path = os.path.join(self.object_data_dir, 'filter') f = open(cursor_path, 'r+b') diff --git a/chainsyncer/backend/memory.py b/chainsyncer/backend/memory.py @@ -4,66 +4,111 @@ import logging # local imports from .base import Backend -logg = logging.getLogger().getChild(__name__) +logg = logging.getLogger(__name__) class MemBackend(Backend): + """Disposable syncer backend. Keeps syncer state in memory. - def __init__(self, chain_spec, object_id, target_block=None): - super(MemBackend, self).__init__() - self.object_id = object_id + Filter bitfield is interpreted right to left. + + :param chain_spec: Chain spec context of syncer + :type chain_spec: chainlib.chain.ChainSpec + :param object_id: Unique id for the syncer session. + :type object_id: str + :param target_block: Block height to terminate sync at + :type target_block: int + """ + + def __init__(self, chain_spec, object_id, target_block=None, block_height=0, tx_height=0, flags=0): + super(MemBackend, self).__init__(object_id) self.chain_spec = chain_spec - self.block_height = 0 - self.tx_height = 0 - self.flags = 0 - self.target_block = target_block + self.block_height_offset = block_height + self.block_height_cursor = block_height + self.tx_height_offset = tx_height + self.tx_height_cursor = tx_height + self.block_height_target = target_block self.db_session = None + self.flags = flags self.filter_names = [] - self.filter_states = {} def connect(self): + """NOOP as memory backend implements no connection. + """ pass def disconnect(self): + """NOOP as memory backend implements no connection. + """ pass def set(self, block_height, tx_height): - logg.debug('stateless backend received {} {}'.format(block_height, tx_height)) - self.block_height = block_height - self.tx_height = tx_height + """Set the syncer state. + + :param block_height: New block height + :type block_height: int + :param tx_height: New transaction height in block + :type tx_height: int + """ + logg.debug('memory backend received {} {}'.format(block_height, tx_height)) + self.block_height_cursor = block_height + self.tx_height_cursor = tx_height def get(self): - return ((self.block_height, self.tx_height), self.flags) + """Get the current syncer state + + :rtype: tuple + :returns: block height / tx index tuple, and filter flags value + """ + return ((self.block_height_cursor, self.tx_height_cursor), self.flags) def target(self): - return (self.target_block, self.flags) + """Returns the syncer target. + + :rtype: tuple + :returns: block height / tx index tuple + """ + return (self.block_height_target, self.flags) def register_filter(self, name): + """Adds a filter identifier to the syncer. + + :param name: Filter name + :type name: str + """ self.filter_names.append(name) self.filter_count += 1 def complete_filter(self, n): + """Set filter at index as completed for the current block / tx state. + + :param n: Filter index + :type n: int + """ v = 1 << n self.flags |= v logg.debug('set filter {} {}'.format(self.filter_names[n], v)) def reset_filter(self): + """Set all filters to unprocessed for the current block / tx state. + """ logg.debug('reset filters') self.flags = 0 - def get_flags(self): - return flags +# def get_flags(self): +# """Returns flags +# """ +# return self.flags def __str__(self): - return "syncer membackend chain {} cursor".format(self.get()) - + return "syncer membackend {} chain {} cursor {}".format(self.object_id, self.chain(), self.get()) diff --git a/chainsyncer/backend/sql.py b/chainsyncer/backend/sql.py @@ -19,32 +19,44 @@ class SQLBackend(Backend): :param chain_spec: Chain spec for the chain that syncer is running for. :type chain_spec: cic_registry.chain.ChainSpec - :param object_id: Unique id for the syncer session. - :type object_id: number + :param object_id: Unique database record id for the syncer session. + :type object_id: int """ base = None def __init__(self, chain_spec, object_id): - super(SQLBackend, self).__init__() + super(SQLBackend, self).__init__(int(object_id)) self.db_session = None self.db_object = None self.db_object_filter = None self.chain_spec = chain_spec - self.object_id = object_id self.connect() self.disconnect() @classmethod - def setup(cls, dsn, debug=False, *args, **kwargs): + def setup(cls, dsn, debug=False, pool_size=0, *args, **kwargs): + """Set up database connection backend. + + :param dsn: Database connection string + :type dsn: str + :param debug: Activate debug output in sql engine + :type debug: bool + :param pool_size: Size of transaction pool + :type pool_size: int + """ if cls.base == None: cls.base = SessionBase - cls.base.connect(dsn, debug=debug, pool_size=kwargs.get('pool_size', 0)) + cls.base.connect(dsn, debug=debug, pool_size=pool_size) def connect(self): - """Loads the state of the syncer session with the given id. + """Loads the state of the syncer session by the given database record id. + + :raises ValueError: Database syncer object with given id does not exist + :rtype: sqlalchemy.orm.session.Session + :returns: Database session object """ if self.db_session == None: self.db_session = SessionBase.create_session() @@ -66,7 +78,7 @@ class SQLBackend(Backend): def disconnect(self): - """Commits state of sync to backend. + """Commits state of sync to backend and frees connection resources. """ if self.db_session == None: return @@ -83,8 +95,8 @@ class SQLBackend(Backend): def get(self): """Get the current state of the syncer cursor. - :returns: Block and block transaction height, respectively :rtype: tuple + :returns: Block height / tx index tuple, and filter flags value """ self.connect() pair = self.db_object.cursor() @@ -94,12 +106,13 @@ class SQLBackend(Backend): def set(self, block_height, tx_height): - """Update the state of the syncer cursor - :param block_height: Block height of cursor - :type block_height: number - :param tx_height: Block transaction height of cursor - :type tx_height: number - :returns: Block and block transaction height, respectively + """Update the state of the syncer cursor. + + :param block_height: New block height + :type block_height: int + :param tx_height: New transaction height in block + :type tx_height: int + :returns: Block height / tx index tuple, and filter flags value :rtype: tuple """ self.connect() @@ -112,7 +125,7 @@ class SQLBackend(Backend): def start(self): """Get the initial state of the syncer cursor. - :returns: Initial block and block transaction height, respectively + :returns: Block height / tx index tuple, and filter flags value :rtype: tuple """ self.connect() @@ -125,8 +138,8 @@ class SQLBackend(Backend): def target(self): """Get the target state (upper bound of sync) of the syncer cursor. - :returns: Target block height - :rtype: number + :returns: Block height and filter flags value + :rtype: tuple """ self.connect() target = self.db_object.target() @@ -139,12 +152,11 @@ class SQLBackend(Backend): def first(chain_spec): """Returns the model object of the most recent syncer in backend. - :param chain: Chain spec of chain that syncer is running for. - :type chain: cic_registry.chain.ChainSpec + :param chain_spec: Chain spec of chain that syncer is running for. + :type chain_spec: cic_registry.chain.ChainSpec :returns: Last syncer object :rtype: cic_eth.db.models.BlockchainSync """ - #return BlockchainSync.first(str(chain_spec)) object_id = BlockchainSync.first(str(chain_spec)) if object_id == None: return None @@ -156,10 +168,13 @@ class SQLBackend(Backend): def initial(chain_spec, target_block_height, start_block_height=0): """Creates a new syncer session and commit its initial state to backend. - :param chain: Chain spec of chain that syncer is running for. - :type chain: cic_registry.chain.ChainSpec - :param block_height: Target block height - :type block_height: number + :param chain_spec: Chain spec of chain that syncer is running for + :type chain_spec: cic_registry.chain.ChainSpec + :param target_block_height: Target block height + :type target_block_height: int + :param start_block_height: Start block height + :type start_block_height: int + :raises ValueError: Invalid start/target specification :returns: New syncer object :rtype: cic_eth.db.models.BlockchainSync """ @@ -185,11 +200,12 @@ class SQLBackend(Backend): def resume(chain_spec, block_height): """Retrieves and returns all previously unfinished syncer sessions. + If a previous open-ended syncer is found, a new syncer will be generated to sync from where that syncer left off until the block_height given as argument. - :param chain_spec: Chain spec of chain that syncer is running for. + :param chain_spec: Chain spec of chain that syncer is running for :type chain_spec: cic_registry.chain.ChainSpec - :param block_height: Target block height - :type block_height: number + :param block_height: Target block height for previous live syncer + :type block_height: int :returns: Syncer objects of unfinished syncs :rtype: list of cic_eth.db.models.BlockchainSync """ @@ -261,8 +277,8 @@ class SQLBackend(Backend): :param chain: Chain spec of chain that syncer is running for. :type chain: cic_registry.chain.ChainSpec - :param block_height: Target block height - :type block_height: number + :param block_height: Start block height + :type block_height: int :returns: "Live" syncer object :rtype: cic_eth.db.models.BlockchainSync """ @@ -283,6 +299,13 @@ class SQLBackend(Backend): def register_filter(self, name): + """Add filter to backend. + + No check is currently implemented to enforce that filters are the same for existing syncers. Care must be taken by the caller to avoid inconsistencies. + + :param name: Name of filter + :type name: str + """ self.connect() if self.db_object_filter == None: self.db_object_filter = BlockchainSyncFilter(self.db_object) @@ -292,6 +315,11 @@ class SQLBackend(Backend): def complete_filter(self, n): + """Sets the filter at the given index as completed. + + :param n: Filter index + :type n: int + """ self.connect() self.db_object_filter.set(n) self.db_session.add(self.db_object_filter) @@ -300,8 +328,8 @@ class SQLBackend(Backend): def reset_filter(self): + """Reset all filter states. + """ self.connect() self.db_object_filter.clear() self.disconnect() - -