Source code for kuha_oai_pmh_repo_handler.metadataformats

"""Define metadata formats.

Metadataformats create contexts by calling oai_response object and
declare templates if needed. Metadataformats raise oai_errors if
needed.
"""
# Stdlib
import os.path
import datetime
import logging
from collections import namedtuple
# Kuha Common
from kuha_common.query import QueryController
from kuha_common.document_store import (
    query,
    client,
    Study,
    Variable,
    Question
)
from kuha_common.document_store.constants import REC_STATUS_DELETED
# Kuha OAI-PMH
from kuha_oai_pmh_repo_handler.genshi_loader import (
    add_template_folders,
    GenPlate
)
from kuha_oai_pmh_repo_handler.constants import TEMPLATE_FOLDER
from kuha_oai_pmh_repo_handler.oai.constants import (
    OAI_RESPONSE_LIST_SIZE,
    OAI_DEL_RECORDS_DECL_NO,
    OAI_DEL_RECORDS_DECL_TRANSIENT,
    OAI_DEL_RECORDS_DECL_PERSISTENT,
    OAI_DATESTAMP_GRANULARITY_DATETIME
)
# Local subpackage
from . import (
    const,
    exc,
    _mdsets
)


_logger = logging.getLogger(__name__)


_STORED = {'args_added': False,
           'configured': False,
           'deleted_records': None,
           'loaded_sets': None}


[docs]class DuplicateSetSpec(Exception): """Every OAI set must have a unique spec value"""
[docs]class MDFormat: """Base class for metadata formats. Defines common attributes and methods. Subclass to define metadataformats. """ default_template_folders = [ os.path.abspath( os.path.join( os.path.dirname( os.path.realpath(__file__)), '..', TEMPLATE_FOLDER))] #: overridable controls how plugin discovery handles metadataformats with #: same mdprefix. Built-in metadataformats could be overridable, those developed #: as a plugin should not. overridable = False mdprefix = None mdschema = None mdnamespace = None study_class = Study variable_class = Variable question_class = Question list_size = OAI_RESPONSE_LIST_SIZE _deleted_records_default = OAI_DEL_RECORDS_DECL_TRANSIENT datestamp_granularity = OAI_DATESTAMP_GRANULARITY_DATETIME #: For convenience to facilitate easier subclassing of sets. #: The MDSet is tightly coupled with this class. MDSet = _mdsets.MDSet # All sets in repository. # Override to customize OAI sets. Note, that this attribute must # contain same objects for all metadataformat-objects in the # OAI-PMH repository. sets = [_mdsets.LanguageSet, _mdsets.StudyGroupsSet, _mdsets.DataKindSet, _mdsets.OpenAIREDataSet] def __init__(self, oai, corr_id_header): """Initialize base MDFormat. The oai argument wraps the oai-protocol. Its keys include 'arguments', 'headers', 'errors', 'response', and values correspond to following objects: * arguments: :obj:`kuha_oai_pmh_repo_handler.oai.protocol.OAIArguments` * headers: :class:`kuha_oai_pmh_repo_handler.oai.protocol.OAIHeaders` * response: :obj:`kuha_oai_pmh_repo_handler.oai.protocol.OAIResponse` * errors: :mod:`kuha_oai_pmh_repo_handler.errors` :param oai: Object that wraps the oai-protocol. :param dict corr_id_header: CorrelationId HTTP-header. """ if any(x is None for x in (self.mdprefix, self.mdschema, self.mdnamespace)): raise NotImplementedError("mdprefix, mdschema and mdnamespace must be defined in " "subclass") self._oai = oai self.corr_id_header = corr_id_header
[docs] @classmethod def add_cli_args(cls, parser): """Add command line arguments to parser. Adds required command line arguments regarding metadataformats & sets. This should be called on program startup along with other command line argument definitions if the program is allowing configuration of metadataformats & sets. :param :obj:`configargparse.ArgumentParser` parser: Active command line parser. """ if _STORED['args_added']: return # TODO where should the default template folder be declared? parser.add('--template-folder', help='Folder containing XML templates', default=cls.default_template_folders, env_var='OPRH_TEMPLATES', action='append', type=str) parser.add('--oai-pmh-deleted-records', help='Deleted records declaration for Identify verb.', default=cls._deleted_records_default, env_var='OPRH_DELETED_RECORDS', choices=[OAI_DEL_RECORDS_DECL_NO, OAI_DEL_RECORDS_DECL_TRANSIENT, OAI_DEL_RECORDS_DECL_PERSISTENT]) client.add_cli_args() query.add_cli_args() for set_ in cls.sets: set_.add_cli_args(parser) _STORED['args_added'] = True
[docs] @classmethod def configure_sets(cls, settings): """Configure & load sets using settings. Calls configure() of each MDSet class stored in class variable 'sets'. The configure() will be called with 'settings'-parameter. If the configure() return False the set will not be loaded, but will be discarded instead. Otherwise, the configured set will be stored in module level variable and used to serve OAI requests. :param :obj:`argparse.Namespace` setting: Loaded settings :raises: :exc:`DuplicateSetSpec` if two configured sets should have duplicate value in 'spec' class level variable. """ if _STORED['loaded_sets'] is not None: raise ValueError("Sets already loaded") _STORED['loaded_sets'] = {} for set_ in cls.sets: if set_.configure(settings) is False: # Discarding set _logger.info("Discarding OAI set '%s' with spec '%s'", set_, set_.spec) continue if set_.spec in _STORED['loaded_sets']: raise DuplicateSetSpec("Found duplicate spec value '%s'" % (set_.spec,)) _STORED['loaded_sets'][set_.spec] = set_
[docs] @classmethod def configure(cls, settings): """Configure metadataformats & sets using settings. :param :obj:`argparse.Namespace` setting: Loaded settings """ if _STORED['configured']: return add_template_folders(*settings.template_folder) client.configure(settings) query.configure(settings) cls.configure_sets(settings) _STORED['deleted_records'] = settings.oai_pmh_deleted_records _STORED['configured'] = True
[docs] @staticmethod def get_deleted_record(): """Get DeletedRecord OAI-PMH property """ if _STORED['deleted_records'] is None: raise ValueError("DeletedRecord is not configured. Call configure() first.") return _STORED['deleted_records']
async def _header_fields(self): fields = [self.study_class._metadata, self.study_class.study_number] for set_ in self._iter_initialized_sets(): fields.extend(await set_.fields()) return fields @property def _record_fields(self): """Implement in subclass""" raise NotImplementedError @staticmethod async def _min_increment_step(datetime_str): """Count smallest increment step from datetime string. :param datetime_str: string representation of a datetime. Datetime must be represented either by day's precision or by second's precision. :type datetime_str: str :returns: smallest increment step. :rtype: :obj:`datetime.timedelta` :raises: :exc:`ValueError` if string lenght is invalid. """ if len(datetime_str) == 10: # day's precision increment = datetime.timedelta(days=1) elif len(datetime_str) == 20: # second's precision increment = datetime.timedelta(seconds=1) else: ValueError("Invalid datetime string: {}".format(datetime_str)) return increment
[docs] @classmethod def get_set(cls, setspec): """Get set matching 'setspec' value. :param str setspec: Set to lookup. :returns: Found set, which is a subclass of :class:`MDSet` :raises: :exc:`exc.NoSuchSet` if a set is not found. """ for set_ in cls.sets: if setspec == set_.spec: return set_ raise exc.NoSuchSet("Could not find set matching setspec '%s'" % (setspec,))
@staticmethod def _iter_loaded_sets(): loaded_sets = _STORED['loaded_sets'].values() if _STORED['loaded_sets'] else [] for set_ in loaded_sets: yield set_ def _iter_initialized_sets(self): for set_ in self._iter_loaded_sets(): yield set_(self) @staticmethod def _get_loaded_set(setspec): loaded_set = _STORED['loaded_sets'].get(setspec) if loaded_set is None: raise exc.NoSuchSet("Could not find set matching setspec '%s'" % (setspec,)) return loaded_set def _get_initialized_set(self, setspec): return self._get_loaded_set(setspec)(self) async def _set_filter(self, requested_set): colon_count = requested_set.count(':') if colon_count == 0: set_key = requested_set value = None elif colon_count == 1: set_key, value = requested_set.split(':') else: raise self._oai.errors.NoRecordsMatch() try: set_ = self._get_initialized_set(set_key) except exc.NoSuchSet: # This method is called when HTTP Request is using a set. # Therefore the condition is not a programming error but # an oaierror. Mask NoSuchSet and raise NoRecordsMatch. raise self._oai.errors.NoRecordsMatch() return await set_.filter(value) async def _prepare_get_record(self): if self._oai.response.records == []: raise self._oai.errors.IdDoesNotExist(context=self._oai.arguments.identifier) return await self._oai.response.get_record_response() async def _prepare_list_records(self): if self._oai.arguments.is_selective() and self._oai.response.records == []: raise self._oai.errors.NoRecordsMatch() return await self._oai.response.list_records_response() async def _metadata_response(self): await self._oai.response.set_metadata_format(self.mdschema, self.mdnamespace) _prepare_call = { self._oai.arguments.verb_value_get_record: self._prepare_get_record, self._oai.arguments.verb_value_list_records: self._prepare_list_records }[self._oai.arguments.verb] return await _prepare_call() async def _add_record(self, identifier, datestamp, record_objects, setspecs, deleted): headers = self._oai.headers(identifier, datestamp, deleted) for set_ in self._iter_loaded_sets(): if set_.spec not in setspecs: raise ValueError("Setspecs for '%s' is missing. Cannot build sets." % (set_.spec,)) for val in setspecs.pop(set_.spec): headers.add_set_spec(set_.spec, val) if setspecs != {}: raise ValueError("Found extra set information in metadataformat: '%s'" % (setspecs,)) record_objects.update({'headers': headers}) self._oai.response.records.append(record_objects) async def _get_identifier(self, study, **record_objs): """Get identifier from record objects. Override in subclass to declare specific identifier :param study: Study from document store. :returns: Identifier """ return study.study_number.get_value() async def _on_record(self, study, **record_objs): identifier = await self._get_identifier(study, **record_objs) setspecs = {} for set_ in self._iter_initialized_sets(): setspecs.update({set_.spec: await set_.get(study)}) record_objs['study'] = study datestamp = study.get_deleted() if study.is_deleted() else study.get_updated() await self._add_record(identifier, datestamp, record_objs, setspecs, study.is_deleted()) async def _has_record(self): _filter = await self._valid_record_filter() result = await QueryController().query_single( self.study_class, headers=self.corr_id_header, _filter=_filter, fields=self.study_class._id) return bool(result) async def _queryparams_from_resumption_token(self): MDQueryParams = namedtuple('MDQueryParams', ['skip', 'from_', 'until', 'set_']) self._oai.arguments.resumption_token.response_list_size = self.list_size _until = self._oai.arguments.resumption_token.until + await self._min_increment_step( self._oai.arguments.resumption_token.until_str) return MDQueryParams(skip=self._oai.arguments.resumption_token.cursor, from_=self._oai.arguments.resumption_token.from_, set_=self._oai.arguments.resumption_token.set_, until=_until) async def _get_record(self): fields = await self._header_fields() + self._record_fields _filter = await self._valid_record_filter() await QueryController().query_single( self.study_class, on_record=self._on_record, headers=self.corr_id_header, _filter=_filter, fields=list(set(fields))) async def _valid_records_filter(self): """Return query filter that returns all valid records. Override in subclass to define specific filter requirements. :returns: Query filter :rtype: dict """ return {} async def _valid_record_filter(self): """Return query filter that return a valid record. Override in subclass to define specific filter requirements. :returns: Query filter :rtype: dict """ return {self.study_class.study_number: self._oai.arguments.get_local_identifier()} async def _list_request_filter(self, qparams): _filter = await self._valid_records_filter() if qparams.set_: _filter.update(await self._set_filter(qparams.set_)) if qparams.from_ or qparams.until: _filter.update({self.study_class._metadata.attr_updated: { QueryController.fk_constants.from_: qparams.from_, QueryController.fk_constants.until: qparams.until}}) return _filter async def _query_records(self, add_fields=None): add_fields = add_fields or [] qparams = await self._queryparams_from_resumption_token() _filter = await self._list_request_filter(qparams) queryctrl = QueryController(headers=self.corr_id_header) count = await queryctrl.query_count(self.study_class, _filter=_filter) self._oai.arguments.resumption_token.complete_list_size = count fields = await self._header_fields() + add_fields await queryctrl.query_multiple( self.study_class, on_record=self._on_record, _filter=_filter, fields=list(set(fields)), limit=self.list_size, skip=qparams.skip) async def _list_records(self): await self._query_records(self._record_fields)
[docs] async def get_earliest_datestamp(self): """Get earliest datestamp as python datetime object. :returns: earliest datestamp for this metadataformat. :rtype: :obj:`datetime.datetime` """ datestamp = None study = await QueryController().query_single( self.study_class, headers=self.corr_id_header, fields=self.study_class._metadata, sort_order=1, sort_by=self.study_class._metadata.attr_updated) if study: datestamp = study._metadata.attr_updated.get_value() return datestamp
[docs] async def list_sets(self): """Outputs all sets from all records in the whole repository. If overridden, this should be overridden in all subclasses. It should also have the same behaviour in all subclasses:: async def _list_sets(): ... class MyMetadataFormat(MDFormat): list_sets = _list_sets """ if list(self._iter_loaded_sets()) == []: raise self._oai.errors.NoSetHierarchy() for set_ in self._iter_initialized_sets(): await set_.query(self._oai.response.add_sets_element)
[docs] async def list_identifiers(self): """Query record identifiers from backend. Queries records and raises NoRecordsMatch oai error if the request is selective and no records were found. """ await self._query_records() if self._oai.arguments.is_selective() and self._oai.response.records == []: raise self._oai.errors.NoRecordsMatch()
[docs] async def list_metadata_formats(self): """Adds information regarding this metadataformat to response. If the request contains an identifiers, first makes sure the record exists in backend, then adds the metadataformat information to response. """ if self._oai.arguments.identifier: if await self._has_record(): await self._oai.response.add_available_metadata_format( self.mdprefix, self.mdschema, self.mdnamespace) return await self._oai.response.add_available_metadata_format( self.mdprefix, self.mdschema, self.mdnamespace)
# Subclass defines implementation for the following methods.
[docs] async def get_record(self): """Adds record to response. This is an abstract method that must be implemented in subclass. Note that also the correct templates needs to be defined in subclass via decoration. The implementation must query the backend for the requested record, raise OAI errors if needed and return the correct oai.response.context. :raises: :exc:`NotImplementedError` """ raise NotImplementedError
[docs] async def list_records(self): """Adds records to response. This is an abstract method that must be implemented in subclass. The subclass must also define the correct template via decoration. The implementation must query the backend for the requested records, raise OAI errors when needed and return the correct oai.response.context. :raises: :exc:`NotImplementedError` """ raise NotImplementedError
[docs]class DCMetadataFormat(MDFormat): overridable = True mdprefix = 'oai_dc' mdschema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd' mdnamespace = 'http://www.openarchives.org/OAI/2.0/oai_dc/' @property def _record_fields(self): return [self.study_class.identifiers, self.study_class.principal_investigators, self.study_class.publishers, self.study_class.document_uris, self.study_class.abstract, self.study_class.keywords, self.study_class.publication_years, self.study_class.study_area_countries, self.study_class.data_collection_copyrights]
[docs] @classmethod def add_cli_args(cls, parser): super().add_cli_args(parser) parser.add('--oai-pmh-list-size-oai-dc', help='How many results should a list response contain for ' 'OAI DC metadata', default=OAI_RESPONSE_LIST_SIZE, env_var='OPRH_OP_LIST_SIZE_OAI_DC', type=int)
[docs] @classmethod def configure(cls, settings): cls.list_size = settings.oai_pmh_list_size_oai_dc super().configure(settings)
@GenPlate('get_record.xml', subtemplate='oai_dc.xml') async def get_record(self): await super()._get_record() return await super()._metadata_response() @GenPlate('list_records.xml', subtemplate='oai_dc.xml') async def list_records(self): await super()._list_records() return await super()._metadata_response()
[docs]class EAD3MetadataFormat(MDFormat): overridable = True mdprefix = 'ead3' mdschema = 'http://www.loc.gov/ead/ead3.xsd' mdnamespace = 'http://ead3.archivists.org/schema/' @property def _record_fields(self): return [self.study_class.publishers, self.study_class.file_names, self.study_class.document_uris, self.study_class.collection_periods, self.study_class.principal_investigators, self.study_class.keywords, self.study_class.classifications, self.study_class.study_area_countries, self.study_class.geographic_coverages, self.study_class.data_access, self.study_class.data_collection_copyrights, self.study_class.citation_requirements, self.study_class.deposit_requirements, self.study_class.abstract]
[docs] @classmethod def add_cli_args(cls, parser): super().add_cli_args(parser) parser.add('--oai-pmh-list-size-ead3', help='How many results should a list response contain for ' 'EAD3 metadata', default=OAI_RESPONSE_LIST_SIZE, env_var='OPRH_OP_LIST_SIZE_EAD3', type=int)
[docs] @classmethod def configure(cls, settings): cls.list_size = settings.oai_pmh_list_size_ead3 super().configure(settings)
@GenPlate('get_record.xml', subtemplate='ead3.xml') async def get_record(self): await super()._get_record() return await super()._metadata_response() @GenPlate('list_records.xml', subtemplate='ead3.xml') async def list_records(self): await super()._list_records() return await super()._metadata_response()
[docs] @staticmethod def get_daterange_pairs(colldates): """Record helper method extracts daterange pairs from a list of Study.collection_periods. Returns a list of two-tuples [(start, end)]. Both items inside tuple are instances of Study.collection_periods values. :param list colldates: collection periods list :returns: List of date range pairs in two-tuples (start, end) :rtype: list """ pairs = [] cur_pair = {'start': None, 'end': None} def _push(start=None, end=None): if start is not None: if cur_pair['start'] is not None: pairs.append((cur_pair['start'], cur_pair['end'])) cur_pair['end'] = None cur_pair['start'] = start return if end is not None: if cur_pair['end'] is not None: pairs.append((cur_pair['start'], cur_pair['end'])) cur_pair['start'] = None cur_pair['end'] = end return if any(x is not None for x in [cur_pair['start'], cur_pair['end']]): pairs.append((cur_pair['start'], cur_pair['end'])) for colldate in colldates: if colldate.attr_event.get_value() == 'start': _push(start=colldate) elif colldate.attr_event.get_value() == 'end': _push(end=colldate) _push() return pairs
[docs] @staticmethod def get_singledates(colldates): """Record helper method extracts single dates from a list of Study.collection_periods. Returns a list Study.collection_periods values. :param list colldates: collection periods list :returns: List of single dates :rtype: list """ dates = [] for colldate in colldates: if colldate.attr_event.get_value() == 'single': dates.append(colldate) return dates
async def _on_record(self, study): await super()._on_record(study, get_daterange_pairs=self.get_daterange_pairs, get_singledates=self.get_singledates)
[docs]class DDICMetadataFormat(MDFormat): overridable = True mdprefix = 'ddi_c' mdschema = 'http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd' mdnamespace = 'ddi:codebook:2_5' @property def _record_fields(self): return [self.study_class.identifiers, self.study_class.document_titles, self.study_class.publishers, self.study_class.document_uris, self.study_class.study_uris, self.study_class.distributors, self.study_class.copyrights, self.study_class.parallel_titles, self.study_class.principal_investigators, self.study_class.publication_dates, self.study_class.publication_years, self.study_class.keywords, self.study_class.time_methods, self.study_class.sampling_procedures, self.study_class.collection_modes, self.study_class.analysis_units, self.study_class.collection_periods, self.study_class.classifications, self.study_class.abstract, self.study_class.study_area_countries, self.study_class.universes, self.study_class.data_access, self.study_class.data_access_descriptions, self.study_class.file_names, self.study_class.data_collection_copyrights, self.study_class.citation_requirements, self.study_class.deposit_requirements, self.study_class.geographic_coverages, self.study_class.instruments, self.study_class.related_publications, self.study_class.grant_numbers, self.study_class.funding_agencies]
[docs] @classmethod def add_cli_args(cls, parser): super().add_cli_args(parser) parser.add('--oai-pmh-list-size-ddi-c', help='How many results should a list response contain for ' 'DDI_C metadata', default=OAI_RESPONSE_LIST_SIZE, env_var='OPRH_OP_LIST_SIZE_DDI_C', type=int)
[docs] @classmethod def configure(cls, settings): cls.list_size = settings.oai_pmh_list_size_ddi_c super().configure(settings)
@staticmethod def iter_relpubls(study): relpubls = {} for relpubl in study.related_publications: key = (relpubl.get_language(), relpubl.attr_description.get_value()) if key in relpubls: relpubls[key].append(relpubl) else: relpubls.update({key: [relpubl]}) for lang_desc, relpubls in relpubls.items(): yield lang_desc, relpubls async def _on_record(self, study): if self._oai.arguments.verb == self._oai.arguments.verb_value_list_identifiers\ or study.is_deleted(): await super()._on_record(study) return variables, questions = [], {} async def _add_question(question): varname = question.variable_name.get_value() if varname is None: return if varname in questions: questions[varname].append(question) return questions.update({varname: [question]}) await QueryController().query_multiple( self.variable_class, on_record=variables.append, headers=self.corr_id_header, _filter={ self.variable_class.study_number: study.study_number.get_value(), self.variable_class._metadata.attr_status: { QueryController.fk_constants.not_equal: REC_STATUS_DELETED}}, fields=[self.variable_class.variable_name, self.variable_class.variable_labels, self.variable_class.codelist_codes], sort_by=self.variable_class._metadata.attr_created) await QueryController().query_multiple( self.question_class, on_record=_add_question, headers=self.corr_id_header, _filter={ self.question_class.study_number: study.study_number.get_value(), self.question_class._metadata.attr_status: { QueryController.fk_constants.not_equal: REC_STATUS_DELETED}}, fields=[self.question_class.question_identifier, self.question_class.question_texts, self.question_class.variable_name], sort_by=self.question_class._metadata.attr_created) await super()._on_record(study, iter_relpubls=DDICMetadataFormat.iter_relpubls, variables=variables, questions=questions) @GenPlate('get_record.xml', subtemplate='ddi_c.xml') async def get_record(self): await super()._get_record() return await super()._metadata_response() @GenPlate('list_records.xml', subtemplate='ddi_c.xml') async def list_records(self): await super()._list_records() return await super()._metadata_response()
[docs]class OAIDDI25MetadataFormat(MDFormat): overridable = True mdprefix = 'oai_ddi25' mdschema = DDICMetadataFormat.mdschema mdnamespace = DDICMetadataFormat.mdnamespace @property def _record_fields(self): return [self.study_class.identifiers, self.study_class.document_titles, self.study_class.publishers, self.study_class.document_uris, self.study_class.study_uris, self.study_class.distributors, self.study_class.copyrights, self.study_class.parallel_titles, self.study_class.principal_investigators, self.study_class.publication_dates, self.study_class.publication_years, self.study_class.keywords, self.study_class.time_methods, self.study_class.sampling_procedures, self.study_class.collection_modes, self.study_class.analysis_units, self.study_class.collection_periods, self.study_class.classifications, self.study_class.abstract, self.study_class.study_area_countries, self.study_class.universes, self.study_class.data_access, self.study_class.data_access_descriptions, self.study_class.file_names, self.study_class.data_collection_copyrights, self.study_class.citation_requirements, self.study_class.deposit_requirements, self.study_class.geographic_coverages, self.study_class.instruments, self.study_class.grant_numbers, self.study_class.related_publications, self.study_class.funding_agencies]
[docs] @classmethod def add_cli_args(cls, parser): super().add_cli_args(parser) parser.add('--oai-pmh-list-size-oai-ddi25', help='How many results should a list response contain for ' 'OAI DDI25 metadata', default=OAI_RESPONSE_LIST_SIZE, env_var='OPRH_OP_LIST_SIZE_OAI_DDI25', type=int)
[docs] @classmethod def configure(cls, settings): cls.list_size = settings.oai_pmh_list_size_oai_ddi25 super().configure(settings)
async def _on_record(self, study): await super()._on_record(study, iter_relpubls=DDICMetadataFormat.iter_relpubls) @GenPlate('get_record.xml', subtemplate='oai_ddi25.xml') async def get_record(self): await super()._get_record() return await super()._metadata_response() @GenPlate('list_records.xml', subtemplate='oai_ddi25.xml') async def list_records(self): await super()._list_records() return await super()._metadata_response()
[docs]class OAIDataciteMetadataFormat(MDFormat): """Metadataformat for OpenAIRE DataCite""" overridable = True mdprefix = 'oai_datacite' mdschema = 'http://schema.datacite.org/meta/kernel-3/metadata.xsd' mdnamespace = 'http://datacite.org/schema/kernel-3' async def _header_fields(self): fields = await super()._header_fields() fields.append(self.study_class.identifiers) return fields @property def _record_fields(self): return [self.study_class.identifiers, self.study_class.principal_investigators, self.study_class.distributors, self.study_class.publishers, self.study_class.publication_years, self.study_class.keywords, self.study_class.classifications, self.study_class.data_access, self.study_class.abstract, self.study_class.geographic_coverages, self.study_class.study_titles, self.study_class.related_publications, self.study_class.grant_numbers]
[docs] @classmethod def add_cli_args(cls, parser): super().add_cli_args(parser) parser.add('--oai-pmh-list-size-oai-datacite', help='How many results should a list response contain for ' 'OAI Datacite metadata', default=OAI_RESPONSE_LIST_SIZE, env_var='OPRH_OP_LIST_SIZE_OAI_DATACITE', type=int)
[docs] @classmethod def configure(cls, settings): cls.list_size = settings.oai_pmh_list_size_oai_datacite super().configure(settings)
[docs] @classmethod async def get_preferred_identifier(cls, study): """OpenAIRE datacite requires a certain type of ID. Identifier type must be one of (also the lookup order): * DOI * ARK * Handle * PURL * URN * URL :param :obj:`kuha_common.document_store.records.Study` study: Currently serialized study. :returns: (<str:type>, <str:id>) :rtype: tuple """ types_ids = {} for identifier in study.identifiers: typ = identifier.attr_agency.get_value() val = identifier.get_value() if typ in const.valid_openaire_id_types and val != types_ids.get(typ, None): types_ids.update({typ: val}) for preferred in const.valid_openaire_id_types: if preferred in types_ids: return (preferred, types_ids[preferred]) return ()
@staticmethod async def get_publication_year(study): for publyear in study.publication_years: candidate = publyear.attr_distribution_date.get_value() or publyear.get_value() if candidate: return candidate[:4] if len(candidate) > 4 else candidate
[docs] @staticmethod async def get_publisher_lang_value_pair(study): """Get publisher language & value pair as tuple. :param :obj:`kuha_common.document_store.records.Study` study: Currently serialized study. :returns: (<str:language>, <str:publisher>) :rtype: tuple """ # Distributor is the primary source candidate = () for distributor in study.distributors: if distributor.get_language() == 'en': candidate = ('en', distributor.get_value()) break if not candidate: candidate = (distributor.get_language(), distributor.get_value()) if candidate: return candidate for publisher in study.publishers: if publisher.get_language() == 'en': candidate = ('en', publisher.get_value()) break if not candidate: candidate = (publisher.get_language(), publisher.get_value()) return candidate
[docs] @staticmethod async def get_funders(study): """Get OpenAIRE Datacite funders. OpenAIRE Datacite requires a certain nameIdentifier for Contributor. The syntax is described at https://guidelines.openaire.eu/en/latest/data/field_contributor.html#nameidentifier-ma-o This method filters in study.grant_number values that conform to the syntax. :param :obj:`kuha_common.document_store.records.Study` study: Currently serialized study. :returns: list of three-tuples [(<str:language>, <str:nameidentifier>, <str:agency>)] :rtype: list """ rval = [] for grant_no in study.grant_numbers: val = grant_no.get_value() if val and val.startswith('info:eu-repo/grantAgreement/'): rval.append((grant_no.get_language(), val, grant_no.attr_agency.get_value())) return rval
async def _on_record(self, study): preferred_id = await self.get_preferred_identifier(study) if preferred_id != (): # Only add records that have some valid id. # For GetRecord, this leads to idDoesNotExist # For ListRecords & ListIdentifiers this may lead to false record count, # however, ListRecords & ListIdentifiers should use _valid_records_filter() to # make sure this will never happen. publication_year = await self.get_publication_year(study) publisher = await self.get_publisher_lang_value_pair(study) related_identifier_types_ids = await self.get_related_identifiers_types(study) funders = await self.get_funders(study) await super()._on_record(study, preferred_identifier=preferred_id, publication_year=publication_year, publisher_lang_val=publisher, related_identifier_types_ids=related_identifier_types_ids, funders=funders) @GenPlate('get_record.xml', subtemplate='oai_datacite.xml') async def get_record(self): await super()._get_record() return await super()._metadata_response() async def _valid_records_filter(self): _filter = await super()._valid_records_filter() _filter.update({ self.study_class.identifiers.attr_agency: { QueryController.fk_constants.in_: list(const.valid_openaire_id_types)}}) return _filter @GenPlate('list_records.xml', subtemplate='oai_datacite.xml') async def list_records(self): await super()._list_records() return await super()._metadata_response()