Source code for kuha_common.document_store.mappings.ddi.ddi33

#!/usr/bin/env python3
# Author(s): Toni Sissala
# Copyright 2025 Finnish Social Science Data Archive FSD / University of Tampere
# Licensed under the EUPL. See LICENSE.txt for full license.
#
"""Mapping profiles for DDI 3.3"""

from itertools import chain
from kuha_common.document_store.mappings.xmlbase import MappedParams
from kuha_common.document_store.mappings.ddi.lifecycle import DDILifecycle32ParserBase


[docs] class DDI33RecordParser(DDILifecycle32ParserBase): """XML Record Parser for DDI 3.3 files DDI 3.3 introduces OtherMaterialSchemes and uses attributes controlledVocabularyName and controlledVocabularyURN attributes to define controlled vocabularies. """ #: XML namespaces NS = { 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', 'xml': 'http://www.w3.org/XML/1998/namespace', 'xhtml': 'http://www.w3.org/1999/xhtml', 'ddi': 'ddi:instance:3_3', 's': 'ddi:studyunit:3_3', 'pd': 'ddi:physicaldataproduct:3_3', 'pi': 'ddi:physicalinstance:3_3', 'c': 'ddi:conceptualcomponent:3_3', 'l': 'ddi:logicalproduct:3_3', 'r': 'ddi:reusable:3_3', 'g': 'ddi:group:3_3', 'dc': 'ddi:datacollection:3_3', 'a': 'ddi:archive:3_3', } @property def _att_cv_name(self): return "controlledVocabularyName" @property def _att_cv_urn(self): return "controlledVocabularyURN" def _iter_other_materials_from_study_unit(self, study_unit_element): for oth_mat_el in self._findall('./r:OtherMaterialScheme/r:OtherMaterial', study_unit_element): yield oth_mat_el for oth_mat_el in self._findall_from_elements( self._find_and_iter_referred_els( './r:OtherMaterialSchemeReference', './/r:OtherMaterialScheme', study_unit_element ), './r:OtherMaterial', ): yield oth_mat_el def _iter_instruments_as_mapped_params(self): """Generator yields instruments as mapped parameters Inclusive lookup locations in order: DataCollection/Instrument DataCollection/InstrumentReference DataCollection/InstrumentScheme/Instrument DataCollection/InstrumentSchemeReference DataCollection/CollectionEvent/InstrumentReference :returns: Generator yielding instruments one by one. """ data_coll_els = list(self._iter_data_collections_from_element(self.study_unit_element)) yield from chain.from_iterable( [ # DataCollection/Instrument self._iter_instruments_as_mapped_params_from_instrument_els( self._findall_from_elements(data_coll_els, './dc:Instrument') ), # DataCollection/InstrumentReference self._iter_instruments_as_mapped_params_from_instrument_els( self._find_and_iter_referred_els('./dc:InstrumentReference', './/dc:Instrument', *data_coll_els) ), # DataCollection/InstrumentScheme/Instrument self._iter_instruments_as_mapped_params_from_instrument_els( self._findall_from_elements(data_coll_els, './dc:InstrumentScheme/dc:Instrument') ), # DataCollection/InstrumentSchemeReference self._iter_instruments_as_mapped_params_from_instrument_els( self._findall_from_elements( self._find_and_iter_referred_els( './r:InstrumentSchemeReference', './/dc:InstrumentScheme', *data_coll_els ), './dc:Instrument', ) ), # DataCollection/CollectionEvent/InstrumentReference self._iter_instruments_as_mapped_params_from_instrument_els( self._find_and_iter_referred_els( './dc:CollectionEvent/dc:InstrumentReference', './/dc:Instrument', *data_coll_els ) ), ] ) def _iter_other_materials_for_document_uris(self): if self._ddiinstance_element is None: return for oth_mat_el in self._findall( './g:ResourcePackage/r:OtherMaterialScheme/r:OtherMaterial', self._ddiinstance_element ): type_of_material_el = self._find('./r:TypeOfMaterial', oth_mat_el) if type_of_material_el is None or ''.join(type_of_material_el.itertext()) != 'Document': continue yield oth_mat_el def _iter_other_materials_for_related_publications(self): """Helper iterates OtherMaterial elements. If one is found from StudyUnit (inline or via reference) yields it and bypasses DDIInstance/g:ResourcePackage lookup. Otherwise also traverses DDIInstance/g:ResourcePackage/r:OtherMaterialScheme/r:OtherMaterial """ found = False def _is_related_publication(oth_mat_el): type_of_material_el = self._find('./r:TypeOfMaterial', oth_mat_el) return ( type_of_material_el is not None and ''.join(type_of_material_el.itertext()).strip() == 'Related Publication' ) for oth_mat_el in self._iter_other_materials_from_study_unit(self.study_unit_element): if _is_related_publication(oth_mat_el): yield oth_mat_el found = True if not found and self._ddiinstance_element is not None: for oth_mat_el in self._findall( './g:ResourcePackage/r:OtherMaterialScheme/r:OtherMaterial', self._ddiinstance_element ): if _is_related_publication(oth_mat_el): yield oth_mat_el def _iter_data_access_descriptions_as_mapped_params(self): for typeofaccess_el in self._findall( './a:Archive/a:ArchiveSpecific/a:Item/a:Access/a:TypeOfAccess', element=self.study_unit_element ): params = MappedParams(''.join(typeofaccess_el.itertext())) params.set_language(self._get_xmllang(typeofaccess_el, default=self._study_unit_language)) params.keyword_arguments.update( { self._study_cls.data_access_descriptions.attr_element_version.name: typeofaccess_el.get( self._att_cv_name ) } ) yield params return yield from super()._iter_data_access_descriptions_as_mapped_params()