Source code for kuha_oai_pmh_repo_handler.metadataformats
"""Define metadata formats.
Metadataformats create contexts by calling oai_response object and
declare templates if needed. Metadataformats raise oai_errors if
needed.
"""
# Stdlib
import os.path
import datetime
import logging
from collections import namedtuple
# Kuha Common
from kuha_common.query import QueryController
from kuha_common.document_store import (
query,
client,
Study,
Variable,
Question
)
from kuha_common.document_store.constants import REC_STATUS_DELETED
# Kuha OAI-PMH
from kuha_oai_pmh_repo_handler.genshi_loader import (
add_template_folders,
GenPlate
)
from kuha_oai_pmh_repo_handler.constants import TEMPLATE_FOLDER
from kuha_oai_pmh_repo_handler.oai.constants import (
OAI_RESPONSE_LIST_SIZE,
OAI_DEL_RECORDS_DECL_NO,
OAI_DEL_RECORDS_DECL_TRANSIENT,
OAI_DEL_RECORDS_DECL_PERSISTENT,
OAI_DATESTAMP_GRANULARITY_DATETIME
)
# Local subpackage
from . import (
const,
exc,
_mdsets
)
_logger = logging.getLogger(__name__)
_STORED = {'args_added': False,
'configured': False,
'deleted_records': None,
'loaded_sets': None}
[docs]class MDFormat:
"""Base class for metadata formats.
Defines common attributes and methods. Subclass to define metadataformats.
"""
default_template_folders = [
os.path.abspath(
os.path.join(
os.path.dirname(
os.path.realpath(__file__)), '..', TEMPLATE_FOLDER))]
#: overridable controls how plugin discovery handles metadataformats with
#: same mdprefix. Built-in metadataformats could be overridable, those developed
#: as a plugin should not.
overridable = False
mdprefix = None
mdschema = None
mdnamespace = None
study_class = Study
variable_class = Variable
question_class = Question
list_size = OAI_RESPONSE_LIST_SIZE
_deleted_records_default = OAI_DEL_RECORDS_DECL_TRANSIENT
datestamp_granularity = OAI_DATESTAMP_GRANULARITY_DATETIME
#: For convenience to facilitate easier subclassing of sets.
#: The MDSet is tightly coupled with this class.
MDSet = _mdsets.MDSet
# All sets in repository.
# Override to customize OAI sets. Note, that this attribute must
# contain same objects for all metadataformat-objects in the
# OAI-PMH repository.
sets = [_mdsets.LanguageSet, _mdsets.StudyGroupsSet, _mdsets.DataKindSet, _mdsets.OpenAIREDataSet]
def __init__(self, oai, corr_id_header):
"""Initialize base MDFormat.
The oai argument wraps the oai-protocol. Its keys include
'arguments', 'headers', 'errors', 'response', and values correspond to following
objects:
* arguments: :obj:`kuha_oai_pmh_repo_handler.oai.protocol.OAIArguments`
* headers: :class:`kuha_oai_pmh_repo_handler.oai.protocol.OAIHeaders`
* response: :obj:`kuha_oai_pmh_repo_handler.oai.protocol.OAIResponse`
* errors: :mod:`kuha_oai_pmh_repo_handler.errors`
:param oai: Object that wraps the oai-protocol.
:param dict corr_id_header: CorrelationId HTTP-header.
"""
if any(x is None for x in (self.mdprefix, self.mdschema, self.mdnamespace)):
raise NotImplementedError("mdprefix, mdschema and mdnamespace must be defined in "
"subclass")
self._oai = oai
self.corr_id_header = corr_id_header
[docs] @classmethod
def add_cli_args(cls, parser):
"""Add command line arguments to parser.
Adds required command line arguments regarding metadataformats & sets.
This should be called on program startup along with other
command line argument definitions if the program is allowing
configuration of metadataformats & sets.
:param :obj:`configargparse.ArgumentParser` parser: Active command line parser.
"""
if _STORED['args_added']:
return
# TODO where should the default template folder be declared?
parser.add('--template-folder',
help='Folder containing XML templates',
default=cls.default_template_folders,
env_var='OPRH_TEMPLATES',
action='append',
type=str)
parser.add('--oai-pmh-deleted-records',
help='Deleted records declaration for Identify verb.',
default=cls._deleted_records_default,
env_var='OPRH_DELETED_RECORDS',
choices=[OAI_DEL_RECORDS_DECL_NO,
OAI_DEL_RECORDS_DECL_TRANSIENT,
OAI_DEL_RECORDS_DECL_PERSISTENT])
client.add_cli_args()
query.add_cli_args()
for set_ in cls.sets:
set_.add_cli_args(parser)
_STORED['args_added'] = True
[docs] @classmethod
def configure_sets(cls, settings):
"""Configure & load sets using settings.
Calls configure() of each MDSet class stored in class variable
'sets'. The configure() will be called with 'settings'-parameter.
If the configure() return False the set will not be loaded,
but will be discarded instead. Otherwise, the configured set
will be stored in module level variable and used to serve
OAI requests.
:param :obj:`argparse.Namespace` setting: Loaded settings
:raises: :exc:`DuplicateSetSpec` if two configured sets should
have duplicate value in 'spec' class level variable.
"""
if _STORED['loaded_sets'] is not None:
raise ValueError("Sets already loaded")
_STORED['loaded_sets'] = {}
for set_ in cls.sets:
if set_.configure(settings) is False:
# Discarding set
_logger.info("Discarding OAI set '%s' with spec '%s'", set_, set_.spec)
continue
if set_.spec in _STORED['loaded_sets']:
raise DuplicateSetSpec("Found duplicate spec value '%s'" % (set_.spec,))
_STORED['loaded_sets'][set_.spec] = set_
[docs] @classmethod
def configure(cls, settings):
"""Configure metadataformats & sets using settings.
:param :obj:`argparse.Namespace` setting: Loaded settings
"""
if _STORED['configured']:
return
add_template_folders(*settings.template_folder)
client.configure(settings)
query.configure(settings)
cls.configure_sets(settings)
_STORED['deleted_records'] = settings.oai_pmh_deleted_records
_STORED['configured'] = True
[docs] @staticmethod
def get_deleted_record():
"""Get DeletedRecord OAI-PMH property
"""
if _STORED['deleted_records'] is None:
raise ValueError("DeletedRecord is not configured. Call configure() first.")
return _STORED['deleted_records']
async def _header_fields(self):
fields = [self.study_class._metadata,
self.study_class.study_number]
for set_ in self._iter_initialized_sets():
fields.extend(await set_.fields())
return fields
@property
def _record_fields(self):
"""Implement in subclass"""
raise NotImplementedError
@staticmethod
async def _min_increment_step(datetime_str):
"""Count smallest increment step from datetime string.
:param datetime_str: string representation of a datetime.
Datetime must be represented either
by day's precision or by second's precision.
:type datetime_str: str
:returns: smallest increment step.
:rtype: :obj:`datetime.timedelta`
:raises: :exc:`ValueError` if string lenght is invalid.
"""
if len(datetime_str) == 10:
# day's precision
increment = datetime.timedelta(days=1)
elif len(datetime_str) == 20:
# second's precision
increment = datetime.timedelta(seconds=1)
else:
ValueError("Invalid datetime string: {}".format(datetime_str))
return increment
[docs] @classmethod
def get_set(cls, setspec):
"""Get set matching 'setspec' value.
:param str setspec: Set to lookup.
:returns: Found set, which is a subclass of :class:`MDSet`
:raises: :exc:`exc.NoSuchSet` if a set is not found.
"""
for set_ in cls.sets:
if setspec == set_.spec:
return set_
raise exc.NoSuchSet("Could not find set matching setspec '%s'" % (setspec,))
@staticmethod
def _iter_loaded_sets():
loaded_sets = _STORED['loaded_sets'].values() if _STORED['loaded_sets'] else []
for set_ in loaded_sets:
yield set_
def _iter_initialized_sets(self):
for set_ in self._iter_loaded_sets():
yield set_(self)
@staticmethod
def _get_loaded_set(setspec):
loaded_set = _STORED['loaded_sets'].get(setspec)
if loaded_set is None:
raise exc.NoSuchSet("Could not find set matching setspec '%s'" % (setspec,))
return loaded_set
def _get_initialized_set(self, setspec):
return self._get_loaded_set(setspec)(self)
async def _set_filter(self, requested_set):
colon_count = requested_set.count(':')
if colon_count == 0:
set_key = requested_set
value = None
elif colon_count == 1:
set_key, value = requested_set.split(':')
else:
raise self._oai.errors.NoRecordsMatch()
try:
set_ = self._get_initialized_set(set_key)
except exc.NoSuchSet:
# This method is called when HTTP Request is using a set.
# Therefore the condition is not a programming error but
# an oaierror. Mask NoSuchSet and raise NoRecordsMatch.
raise self._oai.errors.NoRecordsMatch()
return await set_.filter(value)
async def _prepare_get_record(self):
if self._oai.response.records == []:
raise self._oai.errors.IdDoesNotExist(context=self._oai.arguments.identifier)
return await self._oai.response.get_record_response()
async def _prepare_list_records(self):
if self._oai.arguments.is_selective() and self._oai.response.records == []:
raise self._oai.errors.NoRecordsMatch()
return await self._oai.response.list_records_response()
async def _metadata_response(self):
await self._oai.response.set_metadata_format(self.mdschema, self.mdnamespace)
_prepare_call = {
self._oai.arguments.verb_value_get_record: self._prepare_get_record,
self._oai.arguments.verb_value_list_records: self._prepare_list_records
}[self._oai.arguments.verb]
return await _prepare_call()
async def _add_record(self, identifier, datestamp, record_objects, setspecs, deleted):
headers = self._oai.headers(identifier, datestamp, deleted)
for set_ in self._iter_loaded_sets():
if set_.spec not in setspecs:
raise ValueError("Setspecs for '%s' is missing. Cannot build sets."
% (set_.spec,))
for val in setspecs.pop(set_.spec):
headers.add_set_spec(set_.spec, val)
if setspecs != {}:
raise ValueError("Found extra set information in metadataformat: '%s'" % (setspecs,))
record_objects.update({'headers': headers})
self._oai.response.records.append(record_objects)
async def _get_identifier(self, study, **record_objs):
"""Get identifier from record objects.
Override in subclass to declare specific identifier
:param study: Study from document store.
:returns: Identifier
"""
return study.study_number.get_value()
async def _on_record(self, study, **record_objs):
identifier = await self._get_identifier(study, **record_objs)
setspecs = {}
for set_ in self._iter_initialized_sets():
setspecs.update({set_.spec: await set_.get(study)})
record_objs['study'] = study
datestamp = study.get_deleted() if study.is_deleted() else study.get_updated()
await self._add_record(identifier, datestamp, record_objs, setspecs, study.is_deleted())
async def _has_record(self):
_filter = await self._valid_record_filter()
result = await QueryController().query_single(
self.study_class, headers=self.corr_id_header,
_filter=_filter, fields=self.study_class._id)
return bool(result)
async def _queryparams_from_resumption_token(self):
MDQueryParams = namedtuple('MDQueryParams', ['skip', 'from_', 'until', 'set_'])
self._oai.arguments.resumption_token.response_list_size = self.list_size
_until = self._oai.arguments.resumption_token.until + await self._min_increment_step(
self._oai.arguments.resumption_token.until_str)
return MDQueryParams(skip=self._oai.arguments.resumption_token.cursor,
from_=self._oai.arguments.resumption_token.from_,
set_=self._oai.arguments.resumption_token.set_,
until=_until)
async def _get_record(self):
fields = await self._header_fields() + self._record_fields
_filter = await self._valid_record_filter()
await QueryController().query_single(
self.study_class, on_record=self._on_record, headers=self.corr_id_header,
_filter=_filter, fields=list(set(fields)))
async def _valid_records_filter(self):
"""Return query filter that returns all valid records.
Override in subclass to define specific filter requirements.
:returns: Query filter
:rtype: dict
"""
return {}
async def _valid_record_filter(self):
"""Return query filter that return a valid record.
Override in subclass to define specific filter requirements.
:returns: Query filter
:rtype: dict
"""
return {self.study_class.study_number: self._oai.arguments.get_local_identifier()}
async def _list_request_filter(self, qparams):
_filter = await self._valid_records_filter()
if qparams.set_:
_filter.update(await self._set_filter(qparams.set_))
if qparams.from_ or qparams.until:
_filter.update({self.study_class._metadata.attr_updated: {
QueryController.fk_constants.from_: qparams.from_,
QueryController.fk_constants.until: qparams.until}})
return _filter
async def _query_records(self, add_fields=None):
add_fields = add_fields or []
qparams = await self._queryparams_from_resumption_token()
_filter = await self._list_request_filter(qparams)
queryctrl = QueryController(headers=self.corr_id_header)
count = await queryctrl.query_count(self.study_class, _filter=_filter)
self._oai.arguments.resumption_token.complete_list_size = count
fields = await self._header_fields() + add_fields
await queryctrl.query_multiple(
self.study_class, on_record=self._on_record, _filter=_filter,
fields=list(set(fields)), limit=self.list_size, skip=qparams.skip)
async def _list_records(self):
await self._query_records(self._record_fields)
[docs] async def get_earliest_datestamp(self):
"""Get earliest datestamp as python datetime object.
:returns: earliest datestamp for this metadataformat.
:rtype: :obj:`datetime.datetime`
"""
datestamp = None
study = await QueryController().query_single(
self.study_class, headers=self.corr_id_header, fields=self.study_class._metadata,
sort_order=1, sort_by=self.study_class._metadata.attr_updated)
if study:
datestamp = study._metadata.attr_updated.get_value()
return datestamp
[docs] async def list_sets(self):
"""Outputs all sets from all records in the whole repository.
If overridden, this should be overridden in all subclasses.
It should also have the same behaviour in all subclasses::
async def _list_sets():
...
class MyMetadataFormat(MDFormat):
list_sets = _list_sets
"""
if list(self._iter_loaded_sets()) == []:
raise self._oai.errors.NoSetHierarchy()
for set_ in self._iter_initialized_sets():
await set_.query(self._oai.response.add_sets_element)
[docs] async def list_identifiers(self):
"""Query record identifiers from backend.
Queries records and raises NoRecordsMatch oai error if the
request is selective and no records were found.
"""
await self._query_records()
if self._oai.arguments.is_selective() and self._oai.response.records == []:
raise self._oai.errors.NoRecordsMatch()
[docs] async def list_metadata_formats(self):
"""Adds information regarding this metadataformat to response.
If the request contains an identifiers, first makes sure the
record exists in backend, then adds the metadataformat
information to response.
"""
if self._oai.arguments.identifier:
if await self._has_record():
await self._oai.response.add_available_metadata_format(
self.mdprefix, self.mdschema, self.mdnamespace)
return
await self._oai.response.add_available_metadata_format(
self.mdprefix, self.mdschema, self.mdnamespace)
# Subclass defines implementation for the following methods.
[docs] async def get_record(self):
"""Adds record to response.
This is an abstract method that must be implemented in
subclass. Note that also the correct templates needs to be
defined in subclass via decoration.
The implementation must query the backend for the requested
record, raise OAI errors if needed and return the correct
oai.response.context.
:raises: :exc:`NotImplementedError`
"""
raise NotImplementedError
[docs] async def list_records(self):
"""Adds records to response.
This is an abstract method that must be implemented in
subclass. The subclass must also define the correct template
via decoration.
The implementation must query the backend for the requested
records, raise OAI errors when needed and return the correct
oai.response.context.
:raises: :exc:`NotImplementedError`
"""
raise NotImplementedError
[docs]class DCMetadataFormat(MDFormat):
overridable = True
mdprefix = 'oai_dc'
mdschema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
mdnamespace = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
@property
def _record_fields(self):
return [self.study_class.identifiers,
self.study_class.principal_investigators,
self.study_class.publishers,
self.study_class.document_uris,
self.study_class.abstract,
self.study_class.keywords,
self.study_class.publication_years,
self.study_class.study_area_countries,
self.study_class.data_collection_copyrights]
[docs] @classmethod
def add_cli_args(cls, parser):
super().add_cli_args(parser)
parser.add('--oai-pmh-list-size-oai-dc',
help='How many results should a list response contain for '
'OAI DC metadata',
default=OAI_RESPONSE_LIST_SIZE,
env_var='OPRH_OP_LIST_SIZE_OAI_DC',
type=int)
[docs] @classmethod
def configure(cls, settings):
cls.list_size = settings.oai_pmh_list_size_oai_dc
super().configure(settings)
@GenPlate('get_record.xml', subtemplate='oai_dc.xml')
async def get_record(self):
await super()._get_record()
return await super()._metadata_response()
@GenPlate('list_records.xml', subtemplate='oai_dc.xml')
async def list_records(self):
await super()._list_records()
return await super()._metadata_response()
[docs]class EAD3MetadataFormat(MDFormat):
overridable = True
mdprefix = 'ead3'
mdschema = 'http://www.loc.gov/ead/ead3.xsd'
mdnamespace = 'http://ead3.archivists.org/schema/'
@property
def _record_fields(self):
return [self.study_class.publishers,
self.study_class.file_names,
self.study_class.document_uris,
self.study_class.collection_periods,
self.study_class.principal_investigators,
self.study_class.keywords,
self.study_class.classifications,
self.study_class.study_area_countries,
self.study_class.geographic_coverages,
self.study_class.data_access,
self.study_class.data_collection_copyrights,
self.study_class.citation_requirements,
self.study_class.deposit_requirements,
self.study_class.abstract]
[docs] @classmethod
def add_cli_args(cls, parser):
super().add_cli_args(parser)
parser.add('--oai-pmh-list-size-ead3',
help='How many results should a list response contain for '
'EAD3 metadata',
default=OAI_RESPONSE_LIST_SIZE,
env_var='OPRH_OP_LIST_SIZE_EAD3',
type=int)
[docs] @classmethod
def configure(cls, settings):
cls.list_size = settings.oai_pmh_list_size_ead3
super().configure(settings)
@GenPlate('get_record.xml', subtemplate='ead3.xml')
async def get_record(self):
await super()._get_record()
return await super()._metadata_response()
@GenPlate('list_records.xml', subtemplate='ead3.xml')
async def list_records(self):
await super()._list_records()
return await super()._metadata_response()
[docs] @staticmethod
def get_daterange_pairs(colldates):
"""Record helper method extracts daterange pairs from a list of
Study.collection_periods.
Returns a list of two-tuples [(start, end)]. Both items inside tuple
are instances of Study.collection_periods values.
:param list colldates: collection periods list
:returns: List of date range pairs in two-tuples (start, end)
:rtype: list
"""
pairs = []
cur_pair = {'start': None, 'end': None}
def _push(start=None, end=None):
if start is not None:
if cur_pair['start'] is not None:
pairs.append((cur_pair['start'], cur_pair['end']))
cur_pair['end'] = None
cur_pair['start'] = start
return
if end is not None:
if cur_pair['end'] is not None:
pairs.append((cur_pair['start'], cur_pair['end']))
cur_pair['start'] = None
cur_pair['end'] = end
return
if any(x is not None for x in [cur_pair['start'], cur_pair['end']]):
pairs.append((cur_pair['start'], cur_pair['end']))
for colldate in colldates:
if colldate.attr_event.get_value() == 'start':
_push(start=colldate)
elif colldate.attr_event.get_value() == 'end':
_push(end=colldate)
_push()
return pairs
[docs] @staticmethod
def get_singledates(colldates):
"""Record helper method extracts single dates from
a list of Study.collection_periods.
Returns a list Study.collection_periods values.
:param list colldates: collection periods list
:returns: List of single dates
:rtype: list
"""
dates = []
for colldate in colldates:
if colldate.attr_event.get_value() == 'single':
dates.append(colldate)
return dates
async def _on_record(self, study):
await super()._on_record(study, get_daterange_pairs=self.get_daterange_pairs,
get_singledates=self.get_singledates)
[docs]class DDICMetadataFormat(MDFormat):
overridable = True
mdprefix = 'ddi_c'
mdschema = 'http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd'
mdnamespace = 'ddi:codebook:2_5'
@property
def _record_fields(self):
return [self.study_class.identifiers,
self.study_class.document_titles,
self.study_class.publishers,
self.study_class.document_uris,
self.study_class.study_uris,
self.study_class.distributors,
self.study_class.copyrights,
self.study_class.parallel_titles,
self.study_class.principal_investigators,
self.study_class.publication_dates,
self.study_class.publication_years,
self.study_class.keywords,
self.study_class.time_methods,
self.study_class.sampling_procedures,
self.study_class.collection_modes,
self.study_class.analysis_units,
self.study_class.collection_periods,
self.study_class.classifications,
self.study_class.abstract,
self.study_class.study_area_countries,
self.study_class.universes,
self.study_class.data_access,
self.study_class.data_access_descriptions,
self.study_class.file_names,
self.study_class.data_collection_copyrights,
self.study_class.citation_requirements,
self.study_class.deposit_requirements,
self.study_class.geographic_coverages,
self.study_class.instruments,
self.study_class.related_publications,
self.study_class.grant_numbers,
self.study_class.funding_agencies]
[docs] @classmethod
def add_cli_args(cls, parser):
super().add_cli_args(parser)
parser.add('--oai-pmh-list-size-ddi-c',
help='How many results should a list response contain for '
'DDI_C metadata',
default=OAI_RESPONSE_LIST_SIZE,
env_var='OPRH_OP_LIST_SIZE_DDI_C',
type=int)
[docs] @classmethod
def configure(cls, settings):
cls.list_size = settings.oai_pmh_list_size_ddi_c
super().configure(settings)
@staticmethod
def iter_relpubls(study):
relpubls = {}
for relpubl in study.related_publications:
key = (relpubl.get_language(), relpubl.attr_description.get_value())
if key in relpubls:
relpubls[key].append(relpubl)
else:
relpubls.update({key: [relpubl]})
for lang_desc, relpubls in relpubls.items():
yield lang_desc, relpubls
async def _on_record(self, study):
if self._oai.arguments.verb == self._oai.arguments.verb_value_list_identifiers\
or study.is_deleted():
await super()._on_record(study)
return
variables, questions = [], {}
async def _add_question(question):
varname = question.variable_name.get_value()
if varname is None:
return
if varname in questions:
questions[varname].append(question)
return
questions.update({varname: [question]})
await QueryController().query_multiple(
self.variable_class, on_record=variables.append,
headers=self.corr_id_header,
_filter={
self.variable_class.study_number: study.study_number.get_value(),
self.variable_class._metadata.attr_status: {
QueryController.fk_constants.not_equal: REC_STATUS_DELETED}},
fields=[self.variable_class.variable_name,
self.variable_class.variable_labels,
self.variable_class.codelist_codes],
sort_by=self.variable_class._metadata.attr_created)
await QueryController().query_multiple(
self.question_class, on_record=_add_question,
headers=self.corr_id_header,
_filter={
self.question_class.study_number: study.study_number.get_value(),
self.question_class._metadata.attr_status: {
QueryController.fk_constants.not_equal: REC_STATUS_DELETED}},
fields=[self.question_class.question_identifier,
self.question_class.question_texts,
self.question_class.variable_name],
sort_by=self.question_class._metadata.attr_created)
await super()._on_record(study, iter_relpubls=DDICMetadataFormat.iter_relpubls,
variables=variables, questions=questions)
@GenPlate('get_record.xml', subtemplate='ddi_c.xml')
async def get_record(self):
await super()._get_record()
return await super()._metadata_response()
@GenPlate('list_records.xml', subtemplate='ddi_c.xml')
async def list_records(self):
await super()._list_records()
return await super()._metadata_response()
[docs]class OAIDDI25MetadataFormat(MDFormat):
overridable = True
mdprefix = 'oai_ddi25'
mdschema = DDICMetadataFormat.mdschema
mdnamespace = DDICMetadataFormat.mdnamespace
@property
def _record_fields(self):
return [self.study_class.identifiers,
self.study_class.document_titles,
self.study_class.publishers,
self.study_class.document_uris,
self.study_class.study_uris,
self.study_class.distributors,
self.study_class.copyrights,
self.study_class.parallel_titles,
self.study_class.principal_investigators,
self.study_class.publication_dates,
self.study_class.publication_years,
self.study_class.keywords,
self.study_class.time_methods,
self.study_class.sampling_procedures,
self.study_class.collection_modes,
self.study_class.analysis_units,
self.study_class.collection_periods,
self.study_class.classifications,
self.study_class.abstract,
self.study_class.study_area_countries,
self.study_class.universes,
self.study_class.data_access,
self.study_class.data_access_descriptions,
self.study_class.file_names,
self.study_class.data_collection_copyrights,
self.study_class.citation_requirements,
self.study_class.deposit_requirements,
self.study_class.geographic_coverages,
self.study_class.instruments,
self.study_class.grant_numbers,
self.study_class.related_publications,
self.study_class.funding_agencies]
[docs] @classmethod
def add_cli_args(cls, parser):
super().add_cli_args(parser)
parser.add('--oai-pmh-list-size-oai-ddi25',
help='How many results should a list response contain for '
'OAI DDI25 metadata',
default=OAI_RESPONSE_LIST_SIZE,
env_var='OPRH_OP_LIST_SIZE_OAI_DDI25',
type=int)
[docs] @classmethod
def configure(cls, settings):
cls.list_size = settings.oai_pmh_list_size_oai_ddi25
super().configure(settings)
async def _on_record(self, study):
await super()._on_record(study, iter_relpubls=DDICMetadataFormat.iter_relpubls)
@GenPlate('get_record.xml', subtemplate='oai_ddi25.xml')
async def get_record(self):
await super()._get_record()
return await super()._metadata_response()
@GenPlate('list_records.xml', subtemplate='oai_ddi25.xml')
async def list_records(self):
await super()._list_records()
return await super()._metadata_response()
[docs]class OAIDataciteMetadataFormat(MDFormat):
"""Metadataformat for OpenAIRE DataCite"""
overridable = True
mdprefix = 'oai_datacite'
mdschema = 'http://schema.datacite.org/meta/kernel-3/metadata.xsd'
mdnamespace = 'http://datacite.org/schema/kernel-3'
async def _header_fields(self):
fields = await super()._header_fields()
fields.append(self.study_class.identifiers)
return fields
@property
def _record_fields(self):
return [self.study_class.identifiers,
self.study_class.principal_investigators,
self.study_class.distributors,
self.study_class.publishers,
self.study_class.publication_years,
self.study_class.keywords,
self.study_class.classifications,
self.study_class.data_access,
self.study_class.abstract,
self.study_class.geographic_coverages,
self.study_class.study_titles,
self.study_class.related_publications,
self.study_class.grant_numbers]
[docs] @classmethod
def add_cli_args(cls, parser):
super().add_cli_args(parser)
parser.add('--oai-pmh-list-size-oai-datacite',
help='How many results should a list response contain for '
'OAI Datacite metadata',
default=OAI_RESPONSE_LIST_SIZE,
env_var='OPRH_OP_LIST_SIZE_OAI_DATACITE',
type=int)
[docs] @classmethod
def configure(cls, settings):
cls.list_size = settings.oai_pmh_list_size_oai_datacite
super().configure(settings)
[docs] @classmethod
async def get_preferred_identifier(cls, study):
"""OpenAIRE datacite requires a certain type of ID.
Identifier type must be one of (also the lookup order):
* DOI
* ARK
* Handle
* PURL
* URN
* URL
:param :obj:`kuha_common.document_store.records.Study` study:
Currently serialized study.
:returns: (<str:type>, <str:id>)
:rtype: tuple
"""
types_ids = {}
for identifier in study.identifiers:
typ = identifier.attr_agency.get_value()
val = identifier.get_value()
if typ in const.valid_openaire_id_types and val != types_ids.get(typ, None):
types_ids.update({typ: val})
for preferred in const.valid_openaire_id_types:
if preferred in types_ids:
return (preferred, types_ids[preferred])
return ()
@staticmethod
async def get_publication_year(study):
for publyear in study.publication_years:
candidate = publyear.attr_distribution_date.get_value() or publyear.get_value()
if candidate:
return candidate[:4] if len(candidate) > 4 else candidate
[docs] @staticmethod
async def get_publisher_lang_value_pair(study):
"""Get publisher language & value pair as tuple.
:param :obj:`kuha_common.document_store.records.Study` study:
Currently serialized study.
:returns: (<str:language>, <str:publisher>)
:rtype: tuple
"""
# Distributor is the primary source
candidate = ()
for distributor in study.distributors:
if distributor.get_language() == 'en':
candidate = ('en', distributor.get_value())
break
if not candidate:
candidate = (distributor.get_language(), distributor.get_value())
if candidate:
return candidate
for publisher in study.publishers:
if publisher.get_language() == 'en':
candidate = ('en', publisher.get_value())
break
if not candidate:
candidate = (publisher.get_language(), publisher.get_value())
return candidate
[docs] @staticmethod
async def get_funders(study):
"""Get OpenAIRE Datacite funders.
OpenAIRE Datacite requires a certain nameIdentifier for
Contributor. The syntax is described at
https://guidelines.openaire.eu/en/latest/data/field_contributor.html#nameidentifier-ma-o
This method filters in study.grant_number values that conform
to the syntax.
:param :obj:`kuha_common.document_store.records.Study` study: Currently serialized study.
:returns: list of three-tuples [(<str:language>,
<str:nameidentifier>, <str:agency>)]
:rtype: list
"""
rval = []
for grant_no in study.grant_numbers:
val = grant_no.get_value()
if val and val.startswith('info:eu-repo/grantAgreement/'):
rval.append((grant_no.get_language(), val, grant_no.attr_agency.get_value()))
return rval
async def _on_record(self, study):
preferred_id = await self.get_preferred_identifier(study)
if preferred_id != ():
# Only add records that have some valid id.
# For GetRecord, this leads to idDoesNotExist
# For ListRecords & ListIdentifiers this may lead to false record count,
# however, ListRecords & ListIdentifiers should use _valid_records_filter() to
# make sure this will never happen.
publication_year = await self.get_publication_year(study)
publisher = await self.get_publisher_lang_value_pair(study)
related_identifier_types_ids = await self.get_related_identifiers_types(study)
funders = await self.get_funders(study)
await super()._on_record(study, preferred_identifier=preferred_id,
publication_year=publication_year,
publisher_lang_val=publisher,
related_identifier_types_ids=related_identifier_types_ids,
funders=funders)
@GenPlate('get_record.xml', subtemplate='oai_datacite.xml')
async def get_record(self):
await super()._get_record()
return await super()._metadata_response()
async def _valid_records_filter(self):
_filter = await super()._valid_records_filter()
_filter.update({
self.study_class.identifiers.attr_agency: {
QueryController.fk_constants.in_: list(const.valid_openaire_id_types)}})
return _filter
@GenPlate('list_records.xml', subtemplate='oai_datacite.xml')
async def list_records(self):
await super()._list_records()
return await super()._metadata_response()