#!/usr/bin/env python3
# Author(s): Toni Sissala
# Copyright 2020 Finnish Social Science Data Archive FSD / University of Tampere
# Licensed under the EUPL. See LICENSE.txt for full license.
"""Define OAI records.
:note: This module has a strict dependency to :mod:`kuha_common.document_store.records`
Contains information for querying records from document store and
appending them to responses with :class:`OAIHeaders`, :class:`OAIRecord` and
:const:`SETS`.
"""
import logging
import re
from collections import namedtuple
from kuha_common.document_store import Study
from kuha_common.document_store.query import FilterKeyConstants
from kuha_oai_pmh_repo_handler.oai.constants import (
OAI_REC_NAMESPACE_IDENTIFIER,
OAI_REC_IDENTIFIER_PREFIX,
REGEX_OAI_IDENTIFIER,
REGEX_LOCAL_IDENTIFIER,
REGEX_SETSPEC
)
#: Attribute to store set configuration
SetAttribute = namedtuple("Set", [
# String used in <setName>
'setname',
# String used in <setSpec>
'setspec',
# Document store field for getting the record's setName value.
'record_field_setname',
# Document store field for getting the record's setSpec value.
'record_field_setspec',
# Document store field for querying records.
'record_query_field',
# Document store field for queying (distinct) set values
'set_values_query_field'
])
#: Configuration for study_group set
SET_STUDY_GROUP = SetAttribute(
setname='Study group',
setspec=Study.study_groups.name.name,
record_field_setname=Study.study_groups.attr_name,
record_field_setspec=Study.study_groups.sub_name.name,
record_query_field=Study.study_groups.sub_name,
set_values_query_field=Study.study_groups
)
#: Configuration for language set
SET_LANGUAGE = SetAttribute(
setname='Language',
setspec=Study.study_titles.attr_language.name,
record_field_setname=None,
record_field_setspec=Study.study_titles.attr_language.name,
record_query_field=Study.study_titles.attr_language,
set_values_query_field=Study.study_titles.attr_language
)
#: Configuration for datakind set
SET_DATAKIND = SetAttribute(
setname='Kind of data',
setspec=Study.data_kinds.sub_name.name,
record_field_setname=None,
record_field_setspec=Study.data_kinds.sub_name.name,
record_query_field=Study.data_kinds.sub_name,
set_values_query_field=Study.data_kinds.sub_name
)
#: Supported sets
SETS = [
SET_STUDY_GROUP,
SET_LANGUAGE,
SET_DATAKIND
]
#: Validation regex for setspec
REGEX_VALID_SETSPEC = re.compile(REGEX_SETSPEC)
[docs]def is_valid_setspec(candidate):
"""Validates setSpec value.
:param candidate: setSpec value to validate.
:type candidate: str
:returns: True if valid, False if not.
:rtype: bool
"""
return REGEX_VALID_SETSPEC.fullmatch(candidate) is not None
[docs]def get_record_query_field_by_setspec(setspec):
"""Get document store field to query for set value.
:param setspec: setSpec field of the requested set.
:type setspec: str
:returns: document store field or None
:rtype: :obj:`kuha_common.document_store.field_types.FieldAttribute` or None
"""
rval = None
for set_ in SETS:
if set_.setspec == setspec:
rval = set_.record_query_field
break
return rval
[docs]def get_set_specs_from_ds_record(ds_record):
"""Get set specs from document store record.
:param ds_record: One of the document store records.
Currently only Study is supported.
:type ds_record: Record object from :mod:`kuha_common.document_store.records`
:returns: set specs for use in oai-headers.
:rtype: dict
"""
_dict = ds_record.export_dict()
set_specs = {}
for set_ in SETS:
values = set()
value = set_.record_query_field.value_from_dict(_dict)
if isinstance(value, list):
for val in value:
if not is_valid_setspec(val):
logging.warning(
"Discarding invalid setSpec value: %s", val
)
continue
values.add(val)
elif value is not None:
if not is_valid_setspec(value):
logging.warning(
"Discarding invalid setSpec value: %s", value
)
continue
values.add(value)
if values:
set_specs.update({set_.setspec: list(values)})
return set_specs
[docs]def get_sets_list_from_query_result(set_, query_result):
"""Get sets list from query results.
Query is built on the basis of set attributes defined
in this class. It is a distinct type of query, so
the retuned object is not a document store record.
This function accepts the results and builds a sets
list with each cell containing setName and setSpec keys
with their values.
:param set_: set-attribute used for the query.
:type set_: :obj:`SetAttribute`
:param query_result: results from the query.
:type query_result: dict
:returns: list of sets to be used in list sets response.
:rtype: list
"""
sets = []
for record in query_result[set_.set_values_query_field.path]:
if set_.record_field_setname:
_name = set_.record_field_setname.name
if record.get(set_.record_field_setspec) is None:
continue
_set_spec = set_.setspec + ':' + \
record.get(set_.record_field_setspec, '')
_set_name = record.get(_name, '')
else:
_set_name = ''
_set_spec = set_.setspec + ':' + record
if not is_valid_setspec(_set_spec):
logging.warning(
"Discarding invalid setSpec value: %s", _set_spec
)
continue
sets.append({'setName': _set_name,
'setSpec': _set_spec})
return sets
[docs]def get_query_filter_for_set(set_request):
"""Get filter to use for querying document store.
Returns a dictionary to use for querying document store and filtering by
requested set. Returns None if requested set does not exists or is unsupported.
:param str set_request: requested set
:returns: Query filter or None
:rtype: dict or None
"""
colon_count = set_request.count(':')
if colon_count == 0:
key = set_request
value = {FilterKeyConstants.exists: True}
elif colon_count == 1:
key, value = set_request.split(':')
else:
return None
query_field = get_record_query_field_by_setspec(key)
if query_field is None:
return None
return {query_field: value}
[docs]class OAIRecord:
"""Class stores record and headers.
:param study: Document Store study record.
:type study: :obj:`kuha_common.document_store.records.Study`
"""
def __init__(self, study):
self.study = study
self.headers = OAIHeaders.from_ds_record(study)
self.variables = []
self.questions = {}
[docs] def add_variable(self, variable):
"""Add variable to OAIRecord.
:param variable: Document Store variable.
:type variable: :obj:`kuha_common.document_store.records.Variable`
"""
self.variables.append(variable)
[docs] def add_question(self, question):
"""Add question to OAIRecord.
Question lookup is done by variable name. Therefore
it makes sense to use a dictionary with variable_name as key.
The key content will be a list, since a variable may refer
multiple questions.
:note: questions without variable_name will be discarded
and a warning will be logged.
:param question: Document Store question.
:type question: :obj:`kuha_common.document_store.records.Question`
"""
if not question.variable_name.get_value():
logging.warning("Discarding question without variable_name")
return
if question.variable_name.get_value() not in self.questions:
self.questions.update({question.variable_name.get_value(): [question]})
else:
self.questions[question.variable_name.get_value()].append(question)
[docs] def get_questions_by_variable(self, variable):
"""Get questions for OAIRecord by variable.
Lookup questions by variable's variable_name.
:param variable: Document Store variable.
:type variable: :obj:`kuha_common.document_store.records.Variable`
:returns: List of :obj:`kuha_common.document_store.records.Question`
:rtype: list
"""
return self.questions.get(variable.variable_name.get_value(), [])
[docs] def iter_relpubls(self):
"""Iterates related publications by distinct description and lang.
Generator yields two-tuples ('lang_desc', 'relpubls'): 'lang_desc' is a two-tuple
with first item being the related publication description and the second item
being the language of the relpubl element. 'relpubls' is a list containing
all bibliographic citation contents of the related publication.
:returns: generator that yields tuples (lang_desc, relpubls)
"""
relpubls = {}
for relpubl in self.study.related_publications:
key = (relpubl.get_language(), relpubl.attr_description.get_value())
if key in relpubls:
relpubls[key].append(relpubl)
else:
relpubls.update({key: [relpubl]})
for lang_desc, relpubls in relpubls.items():
yield lang_desc, relpubls