Source code for kuha_client.kuha_upsert

#!/usr/bin/env python3
# Author(s): Toni Sissala
# Copyright 2020 Finnish Social Science Data Archive FSD / University of Tampere
# Licensed under the EUPL. See LICENSE.txt for full license.
"""Callable module serves as entry point to upsert (insert or update) records from DocumentStore.

Use Document Store's Query API to see if document exists. If it exists, fetch it,
update it, submit it back to Document Store via REST API.

Example run from command line. Upsert records from /some/path::

    python -m kuha_client.kuha_upsert --document-store-url=http://localhost:6001/v0 /some/path

Print help::

    python -m kuha_client.kuha_upsert -h

"""
import sys
import os

from kuha_common import cli_setup
from kuha_common.document_store.records import COLLECTIONS

from kuha_client.kuha_client import BatchProcessor


[docs]def upsert_run(paths, collections=None, file_log_path=None, remove_absent=False, sourcefiletype=None): """Upsert run with arguments. :param paths: Lookup source files from paths. :param collections: Limit run to collections. :param file_log_path: Path to file log. :param remove_absent: Should upsert run remove records, which are found from Document Store but not from source files in current run. :param sourcefiletype: File type of source files. :returns: 0 on success. :rtype: int """ if file_log_path: with BatchProcessor.with_file_log(file_log_path, collections, sourcefiletype) as processor: processor.upsert_run(paths, remove_absent) return 0 processor = BatchProcessor(collections=collections, sourcefiletype=sourcefiletype) processor.upsert_run(paths, remove_absent) return 0
[docs]def cli(): """Parse command line arguments. Call :func:`upsert_run`. :returns: Return value of :func:`upsert_run` """ cli_setup.load(os.path.dirname(os.path.realpath(__file__)), prog='python -m kuha_client.kuha_upsert', description='Insert or update records parsed from DDI-C files to document store.', config_file='kuha_client.ini') cli_setup.add_document_store_url(cli_setup.settings.parser, required=True) cli_setup.add('--collection', type=str, action='append', help=('Specific collection to process. If not given, will process all collections. ' 'Note that if used with --remove-absent, will remove all records from collections not ' 'given here. Repeatable argument.'), choices=COLLECTIONS) cli_setup.add('--remove-absent', action='store_true', help='Should upsert remove records stored in document store but not present in current run.') cli_setup.add('--file-log-path', type=str, default=None, help=('Path where to load and store file timestamps for comparing ' 'on subsequent runs. Leave unset to submit all found records.')) cli_setup.add('--source-file-type', type=str, default=BatchProcessor.SOURCEFILETYPE_DDIC, choices=BatchProcessor.get_supported_sourcefiletypes(), help=('Source file types used for import. Note that all files found from paths ' 'must have same type.')) cli_setup.add('paths', nargs='+', help=('Paths to look for files. May be a directory or a file. ' 'Repeatable argument.')) settings = cli_setup.setup( cli_setup.MOD_DS_CLIENT, cli_setup.MOD_LOGGING ) cli_setup.settings.setup_document_store_query() return upsert_run(settings.paths, collections=settings.collection, file_log_path=settings.file_log_path, remove_absent=settings.remove_absent, sourcefiletype=settings.source_file_type)
if __name__ == '__main__': sys.exit(cli())