Source code for kuha_osmh_repo_handler.osmh.records

#!/usr/bin/env python3
# Author(s): Toni Sissala
# Copyright 2020 Finnish Social Science Data Archive FSD / University of Tampere
# Licensed under the EUPL. See LICENSE.txt for full license.
"""Build OSMH payload from Document Store record objects.
Provide mapping between these two record formats.
Provide Document Store fields for querying.

:note: This module has strict dependency to
       :mod:`kuha_common.document_store.records`
"""

from abc import ABC, abstractmethod

from kuha_common.document_store.records import (
    Study,
    Variable,
    Question,
    StudyGroup
)

# Import OSMH record types and payload keys
from kuha_osmh_repo_handler.osmh.constants import (
    OSMH_RECORD_TYPE_STUDY,
    OSMH_RECORD_TYPE_VARIABLE,
    OSMH_RECORD_TYPE_QUESTION,
    OSMH_RECORD_TYPE_STUDY_GROUP,
    OSMH_RECORD_HEADER,
    OSMH_PK_IDENTIFIER,
    OSMH_PK_LAST_MODIFIED,
    OSMH_PK_RECORD_TYPE,
    OSMH_PK_TYPE,
    OSMH_PK_STUDY_TITLE,
    OSMH_PK_STUDY_PREFLABEL,
    OSMH_PK_STUDY_ABSTRACT,
    OSMH_PK_STUDY_SUBJECT,
    OSMH_PK_STUDY_INSTRUMENT,
    OSMH_PK_STUDY_INGROUP,
    OSMH_PK_STUDY_VARIABLE,
    OSMH_PK_STUDY_SPATIAL,
    OSMH_PK_STUDY_UNIVERSE,
    OSMH_PK_STUDY_ANALYSISUNIT,
    OSMH_PK_VARIABLE_INSTUDY,
    OSMH_PK_VARIABLE_NOTATION,
    OSMH_PK_VARIABLE_PREFLABEL,
    OSMH_PK_VARIABLE_CODELIST,
    OSMH_PK_QUESTION_QUESTIONTEXT,
    OSMH_PK_QUESTION_CODELIST,
    OSMH_PK_QUESTION_NOTATION,
    OSMH_PK_STUDYGROUP_TITLE
)

OSMH_RECORD_TYPES = [
    OSMH_RECORD_TYPE_STUDY,
    OSMH_RECORD_TYPE_VARIABLE,
    OSMH_RECORD_TYPE_QUESTION,
    OSMH_RECORD_TYPE_STUDY_GROUP
]


[docs]class Payload: """Represents OSMH record's payload. Provides methods for manipulating the payload. Stores the payload in a dictionary, which can be easilly encoded to JSON. Example:: >>> from kuha_osmh_repo_handler.osmh.records import Payload >>> payload = Payload('1', '2017-01-01') >>> payload.insert_localized_value('study_title', 'en', 'Household Survey') >>> payload.insert_localized_value('study_title', 'fi', 'Kotitalouskysely') >>> payload.get() # Indent for better readability {'identifier': '1', 'lastModified': '2017-01-01', 'study_title': {'fi': 'Kotitalouskysely', 'en': 'Household Survey'} } :param identifier: Record's OSMH-identifier. Must uniquelly identify the record within other records of the same OSMH record type in the repository. :type identifier: str :param last_modified: timestamp of the last modification made to the record. :type last_modified: str :returns: :obj:`Payload` """ k_identifier = OSMH_PK_IDENTIFIER k_last_modified = OSMH_PK_LAST_MODIFIED k_record_type = OSMH_PK_RECORD_TYPE k_type = OSMH_PK_TYPE _join_character = ':' def __init__(self, identifier, last_modified): if not all([identifier, last_modified]): raise ValueError("Identifier (%s) and last_modified (%s) must not be None"% (identifier, last_modified)) self.payload = {self.k_identifier: identifier, self.k_last_modified: last_modified}
[docs] @classmethod def join_values(cls, *args): r"""Join values together using :attr:`_join_character` :param \*args: values to join :type \*args: str """ if not args or not all(args): raise ValueError("Values to join may not be None") args = list(map(str, args)) has_join_char = list(filter(lambda x: cls._join_character in x, args)) if has_join_char: raise ValueError("Values may not contain join_character: (%s)"%has_join_char) return cls._join_character.join(args)
[docs] @classmethod def split_value(cls, value): """Split value using :attr:`_join_character` :param value: value to split :type value: str :returns: splitted values :rtype: list """ return value.split(cls._join_character)
[docs] def insert(self, key, value): """Insert a value to payload. Insert a value for given key to the payload. If the key is not present in the payload, creates one. :param key: payload key for the value. :type key: str :param value: value to be inserted. :type value: str """ self.payload.update({key: value})
[docs] def insert_localized_value(self, key, locale, value): """Insert a localized value to payload. Insert value for given locale into the given payload key. If the key is not present in the payload, creates one. :param key: payload key :type key: str :param locale: values locale :type locale: str :param value: payload value :type value: str """ if key not in self.payload: self.payload.update({ key: { locale: value } }) else: self.payload[key].update({locale: value})
[docs] def append(self, key, value, unique=False): """Insert list item to given payload key If key is not in payload, creates it and inserts a list with a single cell containing value. If parameter unique is True, will not append duplicate values to list. :param key: payload key :type key: str :param value: value to insert as list item :type value: str :param unique: whether to keep the list of values unique (no duplicates) :type unique: bool """ if key not in self.payload: self.payload.update({key: [value]}) else: if unique: if value not in self.payload[key]: self.payload[key].append(value) else: self.payload[key].append(value)
[docs] def header(self, osmh_type): """Create record header to payload :note: Header is common for all record types. The only changing value is the record type. :param osmh_type: OSMH record type :type osmh_type: str """ self.insert(self.k_record_type, OSMH_RECORD_HEADER) self.insert(self.k_type, osmh_type)
[docs] def get(self): """Return the constructed payload :returns: OSMH payload :rtype: dict """ return self.payload
[docs]class OSMHRecord(ABC): """Abstract Base class for OSMH record. Use from a subclass. Provides common properties and methods to be used in OSMH records. :param payload: payload of the record. :type payload: :obj:`Payload` :raises: :exc:`TypeError` if subclass does not define class attributes. """ def __init__(self, payload): self.payload = payload @property @abstractmethod def osmh_type(self): """OSMH type. Declare in subclass.""" pass @property @abstractmethod def query_document(self): """Document Store record to query. Declare in subclass.""" pass @property @abstractmethod def relative_queries_for_record(self): """Does the record-response require relative records queried from Dccument Store. Declare in subclass. """ pass
[docs] @staticmethod @abstractmethod def fields_for_header(): """Get fields to query that are required to build the record header. Override in subclass. """ pass
[docs] @staticmethod @abstractmethod def fields_for_record(): """Get fields to query that are required to build the record. Override in subclass. """ pass
[docs] @staticmethod @abstractmethod def query_filter_for_record(identifier): """Get filter which queries the correct record from Document Store. Override in subclass. """ pass
[docs] @classmethod def for_header_response(cls, ds_record): """Create a record for response that only contains headers for records. :param ds_record: Document Store record. :type ds_record: Record defined in :mod:`kuha_common.document_store.records` :returns: Instantiated OSMH record object. """ record = cls(ds_record) record.build_header_payload() return record
[docs] @classmethod def for_record_response(cls, ds_record): """Create record for response containing the actual record. :param ds_record: Document Store record. :type ds_record: Record defined in :mod:`kuha_common.document_store.records` :returns: Instantiated OSHM record object. """ record = cls(ds_record) record.build_record_payload() return record
[docs] @classmethod def get_query_document(cls): """Return the Document Store record used for Querying. :returns: Document Store record used for querying. """ return cls.query_document
[docs] @classmethod def requires_relative_queries_for_record(cls): """Does the record require querying for relative records from Document Store to construct the full record response. :returns: True or False. :rtype: bool """ return cls.relative_queries_for_record
[docs] def build_header_payload(self): """Builds the common header payload.""" self.payload.header(self.osmh_type)
[docs] @abstractmethod def build_record_payload(self): """Builds the common record payload.""" self.payload.insert(self.payload.k_record_type, self.osmh_type)
[docs] def get_payload(self): """Get the built payload. :returns: record payload. :rtype: dict """ return self.payload.get()
[docs]class StudyRecord(OSMHRecord): """Represents OSMH Study. Derived from :class:`OSMHRecord`. :param study: Study from Document Store. :type study: :obj:`kuha_common.document_store.records.Study` :returns: Instantiated OSMH Study record :rtype: :obj:`StudyRecord` """ osmh_type = OSMH_RECORD_TYPE_STUDY query_document = Study relative_queries_for_record = True def __init__(self, study): super().__init__(Payload(study.study_number.get_value(), study.get_updated())) self.study = study
[docs] @staticmethod def fields_for_header(): """Get fields to query that are required to build the record header. :returns: Study fields required to build record header. :rtype: list """ return [ Study._metadata, Study.study_number ]
[docs] @staticmethod def fields_for_record(): """Get fields to query that are required to build the record. :returns: Study fields required to build record header. :rtype: list """ return [ Study._metadata, Study.study_number, Study.study_titles, Study.universes, Study.abstract, Study.keywords, Study.study_area_countries, Study.instruments, Study.analysis_units, Study.study_groups ]
[docs] @staticmethod def query_filter_for_record(identifier): """Get filter which queries the correct record from Document Store. :param identifier: study identifier (study number). :type identifier: str :returns: filter to use for query. :rtype: dict """ return {Study.study_number: identifier}
[docs] @staticmethod def get_secondary_query_fields_for_record(): """Get fields to query that are required to build the relative record (Variable). :returns: Variable fields. :rtype: list """ return [Variable.variable_name, Variable._metadata]
[docs] @staticmethod def get_secondary_query_document(): """Get secondary query document (Document Store record). :returns: Document Store variable record. :rtype: :class:`kuha_common.document_store.records.Variable` """ return Variable
[docs] def get_secondary_query_filter_for_record(self): """Get filter which queries the correct record from Document Store. :returns: filter to use for query. :rtype: dict """ return {Variable.study_number: self.study.study_number.get_value()}
[docs] def build_relative_record_payload(self, relative_record): """Build payload for relative record. :param relative_record: Relative record instance. :type relative_record: :obj:`kuha_common.document_store.records.Variable` """ self.payload.append(OSMH_PK_STUDY_VARIABLE, Payload.join_values(self.study.study_number.get_value(), relative_record.variable_name.get_value()))
[docs] def build_record_payload(self): """Build payload for record.""" super().build_record_payload() for title in self.study.study_titles: self.payload.insert_localized_value( OSMH_PK_STUDY_TITLE, title.get_language(), title.get_value()) self.payload.insert_localized_value( OSMH_PK_STUDY_PREFLABEL, title.get_language(), title.get_value()) for abstract in self.study.abstract: self.payload.insert_localized_value( OSMH_PK_STUDY_ABSTRACT, abstract.get_language(), abstract.get_value()) for keyword in self.study.keywords: self.payload.append(OSMH_PK_STUDY_SUBJECT, {keyword.get_language(): keyword.attr_description.get_value()}) for country in self.study.study_area_countries: self.payload.append(OSMH_PK_STUDY_SPATIAL, {country.get_language(): country.get_value()}) for instrument in self.study.instruments: self.payload.append(OSMH_PK_STUDY_INSTRUMENT, instrument.attr_instrument_name.get_value()) for universe in self.study.universes: if universe.attr_included.get_value(): self.payload.insert_localized_value(OSMH_PK_STUDY_UNIVERSE, universe.get_language(), universe.get_value()) for unit in self.study.analysis_units: description = unit.attr_description.get_value() if description: self.payload.insert_localized_value(OSMH_PK_STUDY_ANALYSISUNIT, unit.get_language(), description) for group in self.study.study_groups: self.payload.append(OSMH_PK_STUDY_INGROUP, group.get_value(), unique=True)
[docs]class VariableRecord(OSMHRecord): """Represents OSMH Variable. Derived from :class:`OSMHRecord`. :param variable: Variable from Document Store. :type variable: :obj:`kuha_common.document_store.records.Variable` :returns: Instantiated OSMH Variable record :rtype: :obj:`VariableRecord` """ osmh_type = OSMH_RECORD_TYPE_VARIABLE query_document = Variable relative_queries_for_record = False def __init__(self, variable): _id = Payload.join_values(variable.study_number.get_value(), variable.variable_name.get_value()) super().__init__(Payload(_id, variable.get_updated())) self.variable = variable
[docs] @staticmethod def fields_for_header(): """Get fields to query that are required to build the record header. :returns: Variable fields required to build record header. :rtype: list """ return [ Variable._metadata, Variable.study_number, Variable.variable_name ]
[docs] @staticmethod def fields_for_record(): """Get fields to query that are required to build the record. :returns: Variable fields required to build record header. :rtype: list """ return [ Variable._metadata, Variable.study_number, Variable.variable_name, Variable.variable_labels, Variable.codelist_codes ]
[docs] @staticmethod def query_filter_for_record(identifier): """Get filter which queries the correct record from Document Store. :param identifier: variable identifier. :type identifier: str :returns: filter to use for query. :rtype: dict """ study_number, variable_name = Payload.split_value(identifier) _filter = {Variable.study_number: study_number, Variable.variable_name: variable_name} return _filter
[docs] def build_record_payload(self): """Build payload for record.""" super().build_record_payload() self.payload.append(OSMH_PK_VARIABLE_INSTUDY, self.variable.study_number.get_value()) self.payload.insert(OSMH_PK_VARIABLE_NOTATION, self.variable.variable_name.get_value()) for label in self.variable.variable_labels: self.payload.insert_localized_value( OSMH_PK_VARIABLE_PREFLABEL, label.get_language(), label.get_value()) _codelists = {} for codelist in self.variable.codelist_codes: if codelist.get_value() not in _codelists: _codelists.update({codelist.get_value(): {}}) if codelist.attr_label.get_value(): _codelists[codelist.get_value()].update( {codelist.get_language(): codelist.attr_label.get_value()}) # Sorting for better readability. May be removed. keys = sorted(list(_codelists.keys())) for notation in keys: labels = _codelists.pop(notation) if labels: self.payload.append(OSMH_PK_VARIABLE_CODELIST, {OSMH_PK_VARIABLE_NOTATION: notation, OSMH_PK_VARIABLE_PREFLABEL: labels}) else: self.payload.append(OSMH_PK_VARIABLE_CODELIST, {OSMH_PK_VARIABLE_NOTATION: notation})
[docs]class QuestionRecord(OSMHRecord): """Represents OSMH Question. Derived from :class:`OSMHRecord`. :param question: Question from Document Store. :type question: :obj:`kuha_common.document_store.records.Question` :returns: Instantiated OSMH Question record :rtype: :obj:`QuestionRecord` """ osmh_type = OSMH_RECORD_TYPE_QUESTION query_document = Question relative_queries_for_record = False def __init__(self, question): _id = Payload.join_values(question.study_number.get_value(), question.question_identifier.get_value()) super().__init__(Payload(_id, question.get_updated())) self.question = question
[docs] @staticmethod def fields_for_header(): """Get fields to query that are required to build the record header. :returns: Question fields required to build record header. :rtype: list """ return [ Question._metadata, Question.study_number, Question.question_identifier ]
[docs] @staticmethod def fields_for_record(): """Get fields to query that are required to build the record. :returns: Question fields required to build record header. :rtype: list """ return [ Question._metadata, Question.study_number, Question.question_identifier, Question.question_texts, Question.codelist_references, ]
[docs] @staticmethod def query_filter_for_record(identifier): """Get filter which queries the correct record from Document Store. :param identifier: question identifier. :type identifier: str :returns: filter to use for query. :rtype: dict """ study_number, question_identifier = Payload.split_value(identifier) _filter = {Question.study_number: study_number, Question.question_identifier: question_identifier} return _filter
[docs] def build_record_payload(self): """Build record payload.""" super().build_record_payload() for text in self.question.question_texts: self.payload.insert_localized_value(OSMH_PK_QUESTION_QUESTIONTEXT, text.get_language(), text.get_value()) for codelist in self.question.codelist_references: # In CMM these should only be references, so no support for labels here. self.payload.append(OSMH_PK_QUESTION_CODELIST, {OSMH_PK_QUESTION_NOTATION: codelist.get_value()}, unique=True)
[docs]class StudyGroupRecord(OSMHRecord): """Represents OSMH StudyGroup. Derived from :class:`OSMHRecord`. :param study_group: StudyGroup from Document Store. :type study_group: :obj:`kuha_common.document_store.records.StudyGroup` :returns: Instantiated OSMH StudyGroup record :rtype: :obj:`StudyGroupRecord` """ osmh_type = OSMH_RECORD_TYPE_STUDY_GROUP query_document = StudyGroup relative_queries_for_record = False def __init__(self, study_group): super().__init__(Payload(study_group.study_group_identifier.get_value(), study_group.get_updated())) self.study_group = study_group
[docs] @staticmethod def fields_for_header(): """Get fields to query that are required to build the record header. :returns: StudyGroup fields required to build record header. :rtype: list """ return [ StudyGroup._metadata, StudyGroup.study_group_identifier, ]
[docs] @staticmethod def fields_for_record(): """Get fields to query that are required to build the record. :returns: StudyGroup fields required to build record header. :rtype: list """ return [ StudyGroup._metadata, StudyGroup.study_group_identifier, StudyGroup.study_group_names, StudyGroup.study_numbers ]
[docs] @staticmethod def query_filter_for_record(identifier): """Get filter which queries the correct record from Document Store. :param identifier: Study group identifier. :type identifier: str :returns: filter to use for query. :rtype: dict """ return {StudyGroup.study_group_identifier: identifier}
[docs] def build_record_payload(self): """Build record payload.""" super().build_record_payload() for name in self.study_group.study_group_names: self.payload.insert_localized_value(OSMH_PK_STUDYGROUP_TITLE, name.get_language(), name.get_value())
[docs]def get_osmh_record_for_type(osmh_record_type): """Return the OSMH record class representing `osmh_record_type`. :param osmh_record_type: Supported OSMH record type. :type osmh_record_type: str :returns: One of the OSMH records defined in this module. :rtype: :class:`StudyRecord` or :class:`VariableRecord` or :class:`QuestionRecord` or :class:`StudyGroupRecord` """ return { StudyRecord.osmh_type: StudyRecord, VariableRecord.osmh_type: VariableRecord, QuestionRecord.osmh_type: QuestionRecord, StudyGroupRecord.osmh_type: StudyGroupRecord }[osmh_record_type]