Source code for kuha_oai_pmh_repo_handler.oai.metadata_formats
#!/usr/bin/env python3
# Author(s): Toni Sissala
# Copyright 2020 Finnish Social Science Data Archive FSD / University of Tampere
# Licensed under the EUPL. See LICENSE.txt for full license.
"""Define supported metadata formats.
"""
from kuha_common.document_store.records import (
Study,
Variable,
Question
)
[docs]class MetadataFormatBase:
"""Base class for metadata formats.
Defines common attributes and methods.
:note: This class must be subclassed and the class attributes overriden.
"""
#: Prefix for metadata format. Override in sublass.
prefix = None
#: Schema URL for metadata format. Override in subclass.
schema = None
#: Namespace for metadata format. Override in subclass.
namespace = None
#: Record fields. Override in subclass
record_fields = None
#: Relative records. Override in subclass. Set empty list
#: if no relative records.
relative_records = []
def __init__(self):
if None in [self.prefix,
self.schema,
self.namespace,
self.record_fields]:
raise NotImplementedError("Attributes must be declared in subclass.")
[docs] def get_prefix(self):
"""Get metadata prefix.
:returns: metadata prefix.
:rtype: str
"""
return self.prefix
[docs] def get_schema(self):
"""Get metadata schema URL.
:returns: URL to metadata schema.
:rtype: str
"""
return self.schema
[docs] def get_namespace(self):
"""Get metadata namespace.
:returns: Metadata namespace.
:rtype: str
"""
return self.namespace
[docs] def get_relative_records(self):
"""Get document store records required by this schema.
These fields are required to represent the record in
this metadata schema.
:returns: list of relative records.
:rtype: list
"""
return self.relative_records
[docs] def get_record_fields(self, record=Study):
"""Get fields for querying Document Store.
These fields are required to represent the record in
this metadata schema.
:param record: Get fields for this Document Store record.
Defaults to :obj:`kuha_common.document_store.records.Study`
:type record: :obj:`kuha_common.document_store.records.Study` or
:obj:`kuha_common.document_store.records.Variable` or
:obj:`kuha_common.document_store.records.Question` or
:obj:`kuha_common.document_store.records.StudyGroup`
:returns: document store record fields
:rtype: list
:raises: :exc:`KeyError` if record is not defined in ``record_fields``
"""
return self.record_fields[record.collection]
[docs] def as_dict(self):
"""Return metadata attributes in dictionary representation.
:returns: metadata attributes.
:rtype: dict
"""
return {'prefix': self.get_prefix(),
'schema': self.get_schema(),
'namespace': self.get_namespace()}
[docs]class DCMetadataFormat(MetadataFormatBase):
"""Metadata format for OAI-DC.
"""
#: Metadata prefix for OAI-DC
prefix = 'oai_dc'
#: Metadata schema url for OAI-DC
schema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
#: Namespace for OAI-DC
namespace = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
record_fields = {
Study.collection: [
Study.identifiers,
Study.study_titles,
Study.principal_investigators,
Study.publishers,
Study.document_uris,
Study.abstract,
Study.keywords,
Study.publication_years,
Study.study_area_countries,
Study.data_collection_copyrights
]}
[docs]class DDIMetadataFormat(MetadataFormatBase):
"""Metadata format for DDI-C.
"""
#: Metadata prefix for DDI-C
prefix = 'ddi_c'
#: Metadata schema url for DDI-C
schema = 'http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd'
#: Namespace for DDI-C
namespace = 'ddi:codebook:2_5'
relative_records = [Variable, Question]
record_fields = {
Study.collection: [
Study.study_number,
Study.identifiers,
Study.publishers,
Study.document_uris,
Study.study_uris,
Study.distributors,
Study.copyrights,
Study.study_titles,
Study.document_titles,
Study.parallel_titles,
Study.principal_investigators,
Study.study_groups,
Study.publication_dates,
Study.publication_years,
Study.keywords,
Study.time_methods,
Study.sampling_procedures,
Study.collection_modes,
Study.analysis_units,
Study.collection_periods,
Study.classifications,
Study.abstract,
Study.study_area_countries,
Study.universes,
Study.data_access,
Study.data_access_descriptions,
Study.file_names,
Study.data_kinds,
Study.data_collection_copyrights,
Study.citation_requirements,
Study.deposit_requirements,
Study.geographic_coverages,
Study.instruments,
Study.related_publications
],
Variable.collection: [
Variable.variable_name,
Variable.variable_labels,
Variable.codelist_codes
],
Question.collection: [
Question.question_identifier,
Question.question_texts,
Question.variable_name
]
}
[docs]class CDCDDI25MetadataFormat(MetadataFormatBase):
"""Metadata format for Cessda Data Catalogue DDI 2.5
"""
#: Metadata prefix for CESSDA Data Catalogue
prefix = 'oai_ddi25'
#: Metadata schema url for DDI-C
schema = 'http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd'
#: Namespace for DDI-C
namespace = 'ddi:codebook:2_5'
record_fields = {Study.collection: DDIMetadataFormat.record_fields[Study.collection]}
[docs]class EAD3MetadataFormat(MetadataFormatBase):
"""Metadata format for EAD3"""
prefix = 'ead3'
schema = 'http://www.loc.gov/ead/ead3.xsd'
namespace = 'http://ead3.archivists.org/schema/'
record_fields = {
Study.collection: [
Study.study_number,
Study.study_titles,
Study.publishers,
Study.file_names,
Study.document_uris,
Study.collection_periods,
Study.principal_investigators,
Study.keywords,
Study.classifications,
Study.study_area_countries,
Study.geographic_coverages,
Study.data_access,
Study.data_collection_copyrights,
Study.citation_requirements,
Study.deposit_requirements,
Study.abstract,
Study.study_groups
]
}