#!/usr/bin/env python3
# Author(s): Toni Sissala
# Copyright 2020 Finnish Social Science Data Archive FSD / University of Tampere
# Licensed under the EUPL. See LICENSE.txt for full license.
"""Build OSMH payload from Document Store record objects.
Provide mapping between these two record formats.
Provide Document Store fields for querying.
:note: This module has strict dependency to
:mod:`kuha_common.document_store.records`
"""
from abc import ABC, abstractmethod
from kuha_common.document_store.records import (
Study,
Variable,
Question,
StudyGroup
)
# Import OSMH record types and payload keys
from kuha_osmh_repo_handler.osmh.constants import (
OSMH_RECORD_TYPE_STUDY,
OSMH_RECORD_TYPE_VARIABLE,
OSMH_RECORD_TYPE_QUESTION,
OSMH_RECORD_TYPE_STUDY_GROUP,
OSMH_RECORD_HEADER,
OSMH_PK_IDENTIFIER,
OSMH_PK_LAST_MODIFIED,
OSMH_PK_RECORD_TYPE,
OSMH_PK_TYPE,
OSMH_PK_STUDY_TITLE,
OSMH_PK_STUDY_PREFLABEL,
OSMH_PK_STUDY_ABSTRACT,
OSMH_PK_STUDY_SUBJECT,
OSMH_PK_STUDY_INSTRUMENT,
OSMH_PK_STUDY_INGROUP,
OSMH_PK_STUDY_VARIABLE,
OSMH_PK_STUDY_SPATIAL,
OSMH_PK_STUDY_UNIVERSE,
OSMH_PK_STUDY_ANALYSISUNIT,
OSMH_PK_VARIABLE_INSTUDY,
OSMH_PK_VARIABLE_NOTATION,
OSMH_PK_VARIABLE_PREFLABEL,
OSMH_PK_VARIABLE_CODELIST,
OSMH_PK_QUESTION_QUESTIONTEXT,
OSMH_PK_QUESTION_CODELIST,
OSMH_PK_QUESTION_NOTATION,
OSMH_PK_STUDYGROUP_TITLE
)
OSMH_RECORD_TYPES = [
OSMH_RECORD_TYPE_STUDY,
OSMH_RECORD_TYPE_VARIABLE,
OSMH_RECORD_TYPE_QUESTION,
OSMH_RECORD_TYPE_STUDY_GROUP
]
[docs]class Payload:
"""Represents OSMH record's payload.
Provides methods for manipulating the payload.
Stores the payload in a dictionary, which can
be easilly encoded to JSON.
Example::
>>> from kuha_osmh_repo_handler.osmh.records import Payload
>>> payload = Payload('1', '2017-01-01')
>>> payload.insert_localized_value('study_title', 'en', 'Household Survey')
>>> payload.insert_localized_value('study_title', 'fi', 'Kotitalouskysely')
>>> payload.get() # Indent for better readability
{'identifier': '1',
'lastModified': '2017-01-01',
'study_title':
{'fi': 'Kotitalouskysely',
'en': 'Household Survey'}
}
:param identifier: Record's OSMH-identifier. Must uniquelly identify the
record within other records of the same OSMH record type
in the repository.
:type identifier: str
:param last_modified: timestamp of the last modification made to the record.
:type last_modified: str
:returns: :obj:`Payload`
"""
k_identifier = OSMH_PK_IDENTIFIER
k_last_modified = OSMH_PK_LAST_MODIFIED
k_record_type = OSMH_PK_RECORD_TYPE
k_type = OSMH_PK_TYPE
_join_character = ':'
def __init__(self, identifier, last_modified):
if not all([identifier, last_modified]):
raise ValueError("Identifier (%s) and last_modified (%s) must not be None"%
(identifier, last_modified))
self.payload = {self.k_identifier: identifier,
self.k_last_modified: last_modified}
[docs] @classmethod
def join_values(cls, *args):
r"""Join values together using :attr:`_join_character`
:param \*args: values to join
:type \*args: str
"""
if not args or not all(args):
raise ValueError("Values to join may not be None")
args = list(map(str, args))
has_join_char = list(filter(lambda x: cls._join_character in x, args))
if has_join_char:
raise ValueError("Values may not contain join_character: (%s)"%has_join_char)
return cls._join_character.join(args)
[docs] @classmethod
def split_value(cls, value):
"""Split value using :attr:`_join_character`
:param value: value to split
:type value: str
:returns: splitted values
:rtype: list
"""
return value.split(cls._join_character)
[docs] def insert(self, key, value):
"""Insert a value to payload.
Insert a value for given key to the payload.
If the key is not present in the payload, creates one.
:param key: payload key for the value.
:type key: str
:param value: value to be inserted.
:type value: str
"""
self.payload.update({key: value})
[docs] def insert_localized_value(self, key, locale, value):
"""Insert a localized value to payload.
Insert value for given locale into the given
payload key. If the key is not present in the
payload, creates one.
:param key: payload key
:type key: str
:param locale: values locale
:type locale: str
:param value: payload value
:type value: str
"""
if key not in self.payload:
self.payload.update({
key: {
locale: value
}
})
else:
self.payload[key].update({locale: value})
[docs] def append(self, key, value, unique=False):
"""Insert list item to given payload key
If key is not in payload, creates it and inserts a
list with a single cell containing value.
If parameter unique is True, will not append
duplicate values to list.
:param key: payload key
:type key: str
:param value: value to insert as list item
:type value: str
:param unique: whether to keep the list of values unique (no duplicates)
:type unique: bool
"""
if key not in self.payload:
self.payload.update({key: [value]})
else:
if unique:
if value not in self.payload[key]:
self.payload[key].append(value)
else:
self.payload[key].append(value)
[docs] def get(self):
"""Return the constructed payload
:returns: OSMH payload
:rtype: dict
"""
return self.payload
[docs]class OSMHRecord(ABC):
"""Abstract Base class for OSMH record.
Use from a subclass.
Provides common properties and methods to be used in OSMH records.
:param payload: payload of the record.
:type payload: :obj:`Payload`
:raises: :exc:`TypeError` if subclass does not define class attributes.
"""
def __init__(self, payload):
self.payload = payload
@property
@abstractmethod
def osmh_type(self):
"""OSMH type. Declare in subclass."""
pass
@property
@abstractmethod
def query_document(self):
"""Document Store record to query. Declare in subclass."""
pass
@property
@abstractmethod
def relative_queries_for_record(self):
"""Does the record-response require
relative records queried from Dccument Store.
Declare in subclass.
"""
pass
[docs] @staticmethod
@abstractmethod
def fields_for_record():
"""Get fields to query that are required to build the record.
Override in subclass.
"""
pass
[docs] @staticmethod
@abstractmethod
def query_filter_for_record(identifier):
"""Get filter which queries the correct record from Document Store.
Override in subclass.
"""
pass
[docs] @classmethod
def for_record_response(cls, ds_record):
"""Create record for response containing the actual record.
:param ds_record: Document Store record.
:type ds_record: Record defined in :mod:`kuha_common.document_store.records`
:returns: Instantiated OSHM record object.
"""
record = cls(ds_record)
record.build_record_payload()
return record
[docs] @classmethod
def get_query_document(cls):
"""Return the Document Store record used for Querying.
:returns: Document Store record used for querying.
"""
return cls.query_document
[docs] @classmethod
def requires_relative_queries_for_record(cls):
"""Does the record require querying for relative records
from Document Store to construct the full record response.
:returns: True or False.
:rtype: bool
"""
return cls.relative_queries_for_record
[docs] @abstractmethod
def build_record_payload(self):
"""Builds the common record payload."""
self.payload.insert(self.payload.k_record_type, self.osmh_type)
[docs] def get_payload(self):
"""Get the built payload.
:returns: record payload.
:rtype: dict
"""
return self.payload.get()
[docs]class StudyRecord(OSMHRecord):
"""Represents OSMH Study.
Derived from :class:`OSMHRecord`.
:param study: Study from Document Store.
:type study: :obj:`kuha_common.document_store.records.Study`
:returns: Instantiated OSMH Study record
:rtype: :obj:`StudyRecord`
"""
osmh_type = OSMH_RECORD_TYPE_STUDY
query_document = Study
relative_queries_for_record = True
def __init__(self, study):
super().__init__(Payload(study.study_number.get_value(),
study.get_updated()))
self.study = study
[docs] @staticmethod
def fields_for_record():
"""Get fields to query that are required to build the record.
:returns: Study fields required to build record header.
:rtype: list
"""
return [
Study._metadata,
Study.study_number,
Study.study_titles,
Study.universes,
Study.abstract,
Study.keywords,
Study.study_area_countries,
Study.instruments,
Study.analysis_units,
Study.study_groups
]
[docs] @staticmethod
def query_filter_for_record(identifier):
"""Get filter which queries the correct record from Document Store.
:param identifier: study identifier (study number).
:type identifier: str
:returns: filter to use for query.
:rtype: dict
"""
return {Study.study_number: identifier}
[docs] @staticmethod
def get_secondary_query_fields_for_record():
"""Get fields to query that are required to build the relative record (Variable).
:returns: Variable fields.
:rtype: list
"""
return [Variable.variable_name, Variable._metadata]
[docs] @staticmethod
def get_secondary_query_document():
"""Get secondary query document (Document Store record).
:returns: Document Store variable record.
:rtype: :class:`kuha_common.document_store.records.Variable`
"""
return Variable
[docs] def get_secondary_query_filter_for_record(self):
"""Get filter which queries the correct record from Document Store.
:returns: filter to use for query.
:rtype: dict
"""
return {Variable.study_number: self.study.study_number.get_value()}
[docs] def build_relative_record_payload(self, relative_record):
"""Build payload for relative record.
:param relative_record: Relative record instance.
:type relative_record: :obj:`kuha_common.document_store.records.Variable`
"""
self.payload.append(OSMH_PK_STUDY_VARIABLE,
Payload.join_values(self.study.study_number.get_value(),
relative_record.variable_name.get_value()))
[docs] def build_record_payload(self):
"""Build payload for record."""
super().build_record_payload()
for title in self.study.study_titles:
self.payload.insert_localized_value(
OSMH_PK_STUDY_TITLE, title.get_language(), title.get_value())
self.payload.insert_localized_value(
OSMH_PK_STUDY_PREFLABEL, title.get_language(), title.get_value())
for abstract in self.study.abstract:
self.payload.insert_localized_value(
OSMH_PK_STUDY_ABSTRACT, abstract.get_language(), abstract.get_value())
for keyword in self.study.keywords:
self.payload.append(OSMH_PK_STUDY_SUBJECT,
{keyword.get_language(): keyword.attr_description.get_value()})
for country in self.study.study_area_countries:
self.payload.append(OSMH_PK_STUDY_SPATIAL,
{country.get_language(): country.get_value()})
for instrument in self.study.instruments:
self.payload.append(OSMH_PK_STUDY_INSTRUMENT,
instrument.attr_instrument_name.get_value())
for universe in self.study.universes:
if universe.attr_included.get_value():
self.payload.insert_localized_value(OSMH_PK_STUDY_UNIVERSE,
universe.get_language(),
universe.get_value())
for unit in self.study.analysis_units:
description = unit.attr_description.get_value()
if description:
self.payload.insert_localized_value(OSMH_PK_STUDY_ANALYSISUNIT,
unit.get_language(),
description)
for group in self.study.study_groups:
self.payload.append(OSMH_PK_STUDY_INGROUP,
group.get_value(),
unique=True)
[docs]class VariableRecord(OSMHRecord):
"""Represents OSMH Variable.
Derived from :class:`OSMHRecord`.
:param variable: Variable from Document Store.
:type variable: :obj:`kuha_common.document_store.records.Variable`
:returns: Instantiated OSMH Variable record
:rtype: :obj:`VariableRecord`
"""
osmh_type = OSMH_RECORD_TYPE_VARIABLE
query_document = Variable
relative_queries_for_record = False
def __init__(self, variable):
_id = Payload.join_values(variable.study_number.get_value(),
variable.variable_name.get_value())
super().__init__(Payload(_id, variable.get_updated()))
self.variable = variable
[docs] @staticmethod
def fields_for_record():
"""Get fields to query that are required to build the record.
:returns: Variable fields required to build record header.
:rtype: list
"""
return [
Variable._metadata,
Variable.study_number,
Variable.variable_name,
Variable.variable_labels,
Variable.codelist_codes
]
[docs] @staticmethod
def query_filter_for_record(identifier):
"""Get filter which queries the correct record from Document Store.
:param identifier: variable identifier.
:type identifier: str
:returns: filter to use for query.
:rtype: dict
"""
study_number, variable_name = Payload.split_value(identifier)
_filter = {Variable.study_number: study_number,
Variable.variable_name: variable_name}
return _filter
[docs] def build_record_payload(self):
"""Build payload for record."""
super().build_record_payload()
self.payload.append(OSMH_PK_VARIABLE_INSTUDY,
self.variable.study_number.get_value())
self.payload.insert(OSMH_PK_VARIABLE_NOTATION,
self.variable.variable_name.get_value())
for label in self.variable.variable_labels:
self.payload.insert_localized_value(
OSMH_PK_VARIABLE_PREFLABEL, label.get_language(), label.get_value())
_codelists = {}
for codelist in self.variable.codelist_codes:
if codelist.get_value() not in _codelists:
_codelists.update({codelist.get_value(): {}})
if codelist.attr_label.get_value():
_codelists[codelist.get_value()].update(
{codelist.get_language(): codelist.attr_label.get_value()})
# Sorting for better readability. May be removed.
keys = sorted(list(_codelists.keys()))
for notation in keys:
labels = _codelists.pop(notation)
if labels:
self.payload.append(OSMH_PK_VARIABLE_CODELIST,
{OSMH_PK_VARIABLE_NOTATION: notation,
OSMH_PK_VARIABLE_PREFLABEL: labels})
else:
self.payload.append(OSMH_PK_VARIABLE_CODELIST,
{OSMH_PK_VARIABLE_NOTATION: notation})
[docs]class QuestionRecord(OSMHRecord):
"""Represents OSMH Question.
Derived from :class:`OSMHRecord`.
:param question: Question from Document Store.
:type question: :obj:`kuha_common.document_store.records.Question`
:returns: Instantiated OSMH Question record
:rtype: :obj:`QuestionRecord`
"""
osmh_type = OSMH_RECORD_TYPE_QUESTION
query_document = Question
relative_queries_for_record = False
def __init__(self, question):
_id = Payload.join_values(question.study_number.get_value(),
question.question_identifier.get_value())
super().__init__(Payload(_id, question.get_updated()))
self.question = question
[docs] @staticmethod
def fields_for_record():
"""Get fields to query that are required to build the record.
:returns: Question fields required to build record header.
:rtype: list
"""
return [
Question._metadata,
Question.study_number,
Question.question_identifier,
Question.question_texts,
Question.codelist_references,
]
[docs] @staticmethod
def query_filter_for_record(identifier):
"""Get filter which queries the correct record from Document Store.
:param identifier: question identifier.
:type identifier: str
:returns: filter to use for query.
:rtype: dict
"""
study_number, question_identifier = Payload.split_value(identifier)
_filter = {Question.study_number: study_number,
Question.question_identifier: question_identifier}
return _filter
[docs] def build_record_payload(self):
"""Build record payload."""
super().build_record_payload()
for text in self.question.question_texts:
self.payload.insert_localized_value(OSMH_PK_QUESTION_QUESTIONTEXT,
text.get_language(),
text.get_value())
for codelist in self.question.codelist_references:
# In CMM these should only be references, so no support for labels here.
self.payload.append(OSMH_PK_QUESTION_CODELIST,
{OSMH_PK_QUESTION_NOTATION: codelist.get_value()},
unique=True)
[docs]class StudyGroupRecord(OSMHRecord):
"""Represents OSMH StudyGroup.
Derived from :class:`OSMHRecord`.
:param study_group: StudyGroup from Document Store.
:type study_group: :obj:`kuha_common.document_store.records.StudyGroup`
:returns: Instantiated OSMH StudyGroup record
:rtype: :obj:`StudyGroupRecord`
"""
osmh_type = OSMH_RECORD_TYPE_STUDY_GROUP
query_document = StudyGroup
relative_queries_for_record = False
def __init__(self, study_group):
super().__init__(Payload(study_group.study_group_identifier.get_value(),
study_group.get_updated()))
self.study_group = study_group
[docs] @staticmethod
def fields_for_record():
"""Get fields to query that are required to build the record.
:returns: StudyGroup fields required to build record header.
:rtype: list
"""
return [
StudyGroup._metadata,
StudyGroup.study_group_identifier,
StudyGroup.study_group_names,
StudyGroup.study_numbers
]
[docs] @staticmethod
def query_filter_for_record(identifier):
"""Get filter which queries the correct record from Document Store.
:param identifier: Study group identifier.
:type identifier: str
:returns: filter to use for query.
:rtype: dict
"""
return {StudyGroup.study_group_identifier: identifier}
[docs] def build_record_payload(self):
"""Build record payload."""
super().build_record_payload()
for name in self.study_group.study_group_names:
self.payload.insert_localized_value(OSMH_PK_STUDYGROUP_TITLE,
name.get_language(),
name.get_value())
[docs]def get_osmh_record_for_type(osmh_record_type):
"""Return the OSMH record class representing `osmh_record_type`.
:param osmh_record_type: Supported OSMH record type.
:type osmh_record_type: str
:returns: One of the OSMH records defined in this module.
:rtype: :class:`StudyRecord` or :class:`VariableRecord` or
:class:`QuestionRecord` or :class:`StudyGroupRecord`
"""
return {
StudyRecord.osmh_type: StudyRecord,
VariableRecord.osmh_type: VariableRecord,
QuestionRecord.osmh_type: QuestionRecord,
StudyGroupRecord.osmh_type: StudyGroupRecord
}[osmh_record_type]