Source code for kuha_oai_pmh_repo_handler.metadataformats._mdsets

from kuha_common.query import QueryController
from .const import valid_openaire_id_types


[docs]class MDSet: """Subclass to define OAI-Sets""" spec = None _exists_filter = {QueryController.fk_constants.exists: True} def __init__(self, mdformat): if self.spec is None: raise NotImplementedError("MDSet must be subclassed and 'spec' must be defined.") self._mdformat = mdformat
[docs] @classmethod def add_cli_args(cls, parser): """Add command line arguments to parser. :param :obj:`configargparse.ArgumentParser` parser: Active command line parser. """
[docs] @classmethod def configure(cls, settings): """Configure set using settings. Consult settings and configure set. Called via MDFormat on server startup. Return False if this set should not be loaded. :param :obj:`argparse.Namespace` setting: Loaded settings :returns: False to bypass loading of this set. """
[docs] async def fields(self): """Return list of fields to include when querying for record headers. This is used when gathering all docstore fields that are needed to construct oai headers. :returns: list of fields :rtype: list """ raise NotImplementedError
[docs] async def query(self, on_set_cb): """Query and add distinct values for setspecs This is used when constructing ListSets OAI response. :param on_set_cb: Async callback with signature (spec, name=None, description=None), where spec is the setSpec value. :returns: None """ raise NotImplementedError
[docs] async def get(self, study): """Get values from record used in setspec: '<key>:<value>'. A None item ([None]) will leave out the <value> part: '<key>' This is used when constructing setspecs for a specific record. :param study: study record to get set values from :returns: List of values """ raise NotImplementedError
[docs] async def filter(self, value): """Return a query filter that includes all studies matching 'value'. This is used when constructing docstore query that will include all records in this OAI-set group. In other words, in selective harvesting. :param str or None value: Request setspec value after colon (setspec = <self.spec>:<value>). If the requested setspec only includes the top-level setspec part (setspec = <self.spec>) the parameter is None. :returns: query filter :rtype: dict """ raise NotImplementedError
[docs]class LanguageSet(MDSet): """OAI-Set Language""" spec = 'language'
[docs] async def fields(self): """Return a list of fields to include in query for header fields. These fields are used to build record headers for language OAI-set. :returns: list of fields """ return [self._mdformat.study_class.study_titles]
[docs] async def query(self, on_set_cb): """Query and add distinct values for languages. :param on_set_cb: Async callback called for each setspec :returns: None """ result = await QueryController().query_distinct( self._mdformat.study_class, headers=self._mdformat.corr_id_header, fieldname=self._mdformat.study_class.study_titles.attr_language) await on_set_cb(self.spec, name='Language') for lang in result.get(self._mdformat.study_class.study_titles.attr_language.path, []): await on_set_cb('%s:%s' % (self.spec, lang), name='Language %s' % (lang,))
[docs] async def get(self, study): """Get language oai-set values from docstore record. :param study: Document store Study :returns: list of available languages of the study :rtype: list """ return study.study_titles.get_available_languages()
[docs] async def filter(self, value): """Return a query filter that includes all studies matching 'value' :param str or None value: Request setspec value after colon :returns: query filter :rtype: dict """ return {self._mdformat.study_class.study_titles.attr_language: value or self._exists_filter}
[docs]class StudyGroupsSet(MDSet): """OAI-Set Study groups""" spec = 'study_groups'
[docs] async def fields(self): """Return a list of fields to include in query for header fields. :returns: list of fields :rtype: list """ return [self._mdformat.study_class.study_groups]
[docs] async def query(self, on_set_cb): """Query and add distinct values for study groups. :param on_set_cb: Async callable called for each setspec :returns: None """ result = await QueryController().query_distinct( self._mdformat.study_class, headers=self._mdformat.corr_id_header, fieldname=self._mdformat.study_class.study_groups) await on_set_cb(self.spec, name='Study group') for sg in result.get(self._mdformat.study_class.study_groups.path, []): if self._mdformat.study_class.study_groups.sub_name.name not in sg: continue await on_set_cb( '%s:%s' % (self.spec, sg[self._mdformat.study_class.study_groups.sub_name.name]), name=sg.get(self._mdformat.study_class.study_groups.attr_name.name, ''))
[docs] async def get(self, study): """Get values from study used to construct setspecs for study groups. :param study: Document store study :returns: list of distinct study group values. """ study_groups = [] for sg in study.study_groups: val = sg.get_value() if val is not None and val not in study_groups: study_groups.append(sg.get_value()) return study_groups
[docs] async def filter(self, value): """Return a query filter that includes all studies matching 'value' :param str or None value: Request setspec value after colon :returns: query filter :rtype: dict """ return {self._mdformat.study_class.study_groups.sub_name: value or self._exists_filter}
[docs]class DataKindSet(MDSet): """OAI-Set Data kind""" spec = 'data_kind'
[docs] async def fields(self): """Return list of fields to include when querying for header fields. :returns: list of fields :rtype: list """ return [self._mdformat.study_class.data_kinds]
[docs] async def query(self, on_set_cb): """Query and add distinct values for Data kinds. :param on_set_cb: Async callable called for each setspec. :returns: None """ result = await QueryController().query_distinct( self._mdformat.study_class, headers=self._mdformat.corr_id_header, fieldname=self._mdformat.study_class.data_kinds.sub_name) await on_set_cb(self.spec, name='Kind of data') for kd in result.get(self._mdformat.study_class.data_kinds.sub_name.path, []): await on_set_cb('%s:%s' % (self.spec, kd), name='%s data' % (kd,))
[docs] async def get(self, study): """Get values from record for oai-set Data kinds. :param study: Document store study :returns: list of study's data kinds :rtype: list """ data_kinds = [] for kd in study.data_kinds: val = kd.get_value() if val is not None and val not in data_kinds: data_kinds.append(kd.get_value()) return data_kinds
[docs] async def filter(self, value): """Return a query filter that includes all studies matching 'value' :param dict value: Request setspec value after colon :returns: query filter :rtype: dict """ return {self._mdformat.study_class.data_kinds.sub_name: value or self._exists_filter}
[docs]class OpenAIREDataSet(MDSet): """OAI-Set OpenAIRE data""" spec = 'openaire_data'
[docs] async def fields(self): """Return a list of fields to include in query for header fields :returns: list of fields :rtype: list """ return [self._mdformat.study_class.identifiers]
[docs] async def query(self, on_set_cb): """Query and add distinct values for OpenAIRE data. The openaire_data setspec is non-hiearchical, but only contains a single setspec: '<setSpec>openaire_data</setSpec>' :param on_set_cb: Async callable, called for each setspec. :returns: None """ await on_set_cb(self.spec, name='OpenAIRE')
[docs] async def get(self, study): """Get values from study used to construct setspecs for openaire_data. OpenAIRE set does not have a hiearchy of values. Instead a study belongs in the set, if the study has a suitable identifier. :param study: Document store study :returns: [None] if study belongs to the set, [] if not. :rtype: list """ for identifier in study.identifiers: typ = identifier.attr_agency.get_value() if typ in valid_openaire_id_types: return [None] return []
[docs] async def filter(self, value): """Return a query filter that includes all studies in set. The parameter 'value' is discarded. :param value: parameter is discarded. :returns: Query filter :rtype: dict """ return {self._mdformat.study_class.identifiers.attr_agency: { QueryController.fk_constants.in_: list(valid_openaire_id_types)}}