from kuha_common.query import QueryController
from .const import valid_openaire_id_types
[docs]class MDSet:
"""Subclass to define OAI-Sets"""
spec = None
_exists_filter = {QueryController.fk_constants.exists: True}
def __init__(self, mdformat):
if self.spec is None:
raise NotImplementedError("MDSet must be subclassed and 'spec' must be defined.")
self._mdformat = mdformat
[docs] @classmethod
def add_cli_args(cls, parser):
"""Add command line arguments to parser.
:param :obj:`configargparse.ArgumentParser` parser: Active command line parser.
"""
[docs] async def fields(self):
"""Return list of fields to include when querying for record headers.
This is used when gathering all docstore fields that are needed to
construct oai headers.
:returns: list of fields
:rtype: list
"""
raise NotImplementedError
[docs] async def query(self, on_set_cb):
"""Query and add distinct values for setspecs
This is used when constructing ListSets OAI response.
:param on_set_cb: Async callback with signature (spec, name=None, description=None),
where spec is the setSpec value.
:returns: None
"""
raise NotImplementedError
[docs] async def get(self, study):
"""Get values from record used in setspec: '<key>:<value>'.
A None item ([None]) will leave out the <value> part: '<key>'
This is used when constructing setspecs for a specific record.
:param study: study record to get set values from
:returns: List of values
"""
raise NotImplementedError
[docs] async def filter(self, value):
"""Return a query filter that includes all studies matching 'value'.
This is used when constructing docstore query that will include all records
in this OAI-set group. In other words, in selective harvesting.
:param str or None value: Request setspec value after colon (setspec = <self.spec>:<value>).
If the requested setspec only includes the top-level
setspec part (setspec = <self.spec>) the parameter is None.
:returns: query filter
:rtype: dict
"""
raise NotImplementedError
[docs]class LanguageSet(MDSet):
"""OAI-Set Language"""
spec = 'language'
[docs] async def fields(self):
"""Return a list of fields to include in query for header fields.
These fields are used to build record headers for
language OAI-set.
:returns: list of fields
"""
return [self._mdformat.study_class.study_titles]
[docs] async def query(self, on_set_cb):
"""Query and add distinct values for languages.
:param on_set_cb: Async callback called for each setspec
:returns: None
"""
result = await QueryController().query_distinct(
self._mdformat.study_class,
headers=self._mdformat.corr_id_header,
fieldname=self._mdformat.study_class.study_titles.attr_language)
await on_set_cb(self.spec, name='Language')
for lang in result.get(self._mdformat.study_class.study_titles.attr_language.path, []):
await on_set_cb('%s:%s' % (self.spec, lang), name='Language %s' % (lang,))
[docs] async def get(self, study):
"""Get language oai-set values from docstore record.
:param study: Document store Study
:returns: list of available languages of the study
:rtype: list
"""
return study.study_titles.get_available_languages()
[docs] async def filter(self, value):
"""Return a query filter that includes all studies matching 'value'
:param str or None value: Request setspec value after colon
:returns: query filter
:rtype: dict
"""
return {self._mdformat.study_class.study_titles.attr_language: value or self._exists_filter}
[docs]class StudyGroupsSet(MDSet):
"""OAI-Set Study groups"""
spec = 'study_groups'
[docs] async def fields(self):
"""Return a list of fields to include in query for header fields.
:returns: list of fields
:rtype: list
"""
return [self._mdformat.study_class.study_groups]
[docs] async def query(self, on_set_cb):
"""Query and add distinct values for study groups.
:param on_set_cb: Async callable called for each setspec
:returns: None
"""
result = await QueryController().query_distinct(
self._mdformat.study_class,
headers=self._mdformat.corr_id_header,
fieldname=self._mdformat.study_class.study_groups)
await on_set_cb(self.spec, name='Study group')
for sg in result.get(self._mdformat.study_class.study_groups.path, []):
if self._mdformat.study_class.study_groups.sub_name.name not in sg:
continue
await on_set_cb(
'%s:%s' % (self.spec, sg[self._mdformat.study_class.study_groups.sub_name.name]),
name=sg.get(self._mdformat.study_class.study_groups.attr_name.name, ''))
[docs] async def get(self, study):
"""Get values from study used to construct setspecs for study groups.
:param study: Document store study
:returns: list of distinct study group values.
"""
study_groups = []
for sg in study.study_groups:
val = sg.get_value()
if val is not None and val not in study_groups:
study_groups.append(sg.get_value())
return study_groups
[docs] async def filter(self, value):
"""Return a query filter that includes all studies matching 'value'
:param str or None value: Request setspec value after colon
:returns: query filter
:rtype: dict
"""
return {self._mdformat.study_class.study_groups.sub_name: value or self._exists_filter}
[docs]class DataKindSet(MDSet):
"""OAI-Set Data kind"""
spec = 'data_kind'
[docs] async def fields(self):
"""Return list of fields to include when querying for header fields.
:returns: list of fields
:rtype: list
"""
return [self._mdformat.study_class.data_kinds]
[docs] async def query(self, on_set_cb):
"""Query and add distinct values for Data kinds.
:param on_set_cb: Async callable called for each setspec.
:returns: None
"""
result = await QueryController().query_distinct(
self._mdformat.study_class, headers=self._mdformat.corr_id_header,
fieldname=self._mdformat.study_class.data_kinds.sub_name)
await on_set_cb(self.spec, name='Kind of data')
for kd in result.get(self._mdformat.study_class.data_kinds.sub_name.path, []):
await on_set_cb('%s:%s' % (self.spec, kd), name='%s data' % (kd,))
[docs] async def get(self, study):
"""Get values from record for oai-set Data kinds.
:param study: Document store study
:returns: list of study's data kinds
:rtype: list
"""
data_kinds = []
for kd in study.data_kinds:
val = kd.get_value()
if val is not None and val not in data_kinds:
data_kinds.append(kd.get_value())
return data_kinds
[docs] async def filter(self, value):
"""Return a query filter that includes all studies matching 'value'
:param dict value: Request setspec value after colon
:returns: query filter
:rtype: dict
"""
return {self._mdformat.study_class.data_kinds.sub_name: value or self._exists_filter}
[docs]class OpenAIREDataSet(MDSet):
"""OAI-Set OpenAIRE data"""
spec = 'openaire_data'
[docs] async def fields(self):
"""Return a list of fields to include in query for header fields
:returns: list of fields
:rtype: list
"""
return [self._mdformat.study_class.identifiers]
[docs] async def query(self, on_set_cb):
"""Query and add distinct values for OpenAIRE data.
The openaire_data setspec is non-hiearchical, but only contains a single setspec:
'<setSpec>openaire_data</setSpec>'
:param on_set_cb: Async callable, called for each setspec.
:returns: None
"""
await on_set_cb(self.spec, name='OpenAIRE')
[docs] async def get(self, study):
"""Get values from study used to construct setspecs for openaire_data.
OpenAIRE set does not have a hiearchy of values. Instead a study belongs in the
set, if the study has a suitable identifier.
:param study: Document store study
:returns: [None] if study belongs to the set, [] if not.
:rtype: list
"""
for identifier in study.identifiers:
typ = identifier.attr_agency.get_value()
if typ in valid_openaire_id_types:
return [None]
return []
[docs] async def filter(self, value):
"""Return a query filter that includes all studies in set.
The parameter 'value' is discarded.
:param value: parameter is discarded.
:returns: Query filter
:rtype: dict
"""
return {self._mdformat.study_class.identifiers.attr_agency: {
QueryController.fk_constants.in_: list(valid_openaire_id_types)}}