Source code for kuha_oai_pmh_repo_handler.handlers

#!/usr/bin/env python3
# Author(s): Toni Sissala
# Copyright 2020 Finnish Social Science Data Archive FSD / University of Tampere
# Licensed under the EUPL. See LICENSE.txt for full license.
"""Define handlers for responding to HTTP-requests.
"""
import logging

import kuha_common
from kuha_common.document_store.records import (
    Study,
    Variable,
    Question
)

from kuha_oai_pmh_repo_handler.genshi_loader import OAITemplate

from kuha_oai_pmh_repo_handler.oai.protocol import (
    OAIRequest,
    OAIResponse,
    decode_uri
)
from kuha_oai_pmh_repo_handler.oai.records import (
    OAIHeaders,
    OAIRecord,
    SETS,
    get_query_filter_for_set,
    get_sets_list_from_query_result
)
from kuha_oai_pmh_repo_handler.oai import errors as oaierrors

from kuha_oai_pmh_repo_handler.oai.constants import OAI_DEL_RECORDS_DECL_NO


[docs]class OAIRouteHandler(kuha_common.RequestHandler): """Handle requests to OAI endpoint. :class:`OAIRouteHandler` extends :class:`kuha_common.server.RequestHandler`. Input and output goes throught this class. It is responsible for accepting requests via HTTP and routing the requests to OAI-protocol and to the correct verb-handler. Verb-handlers are defined in this class. Verb-handlers are responsible for calling the :class:`kuha_common.query.QueryController` and again routing the records to OAI-protocol. Verb-handlers also define the templates used to serialize XML, which is then sent as HTTP-response via :meth:`template_writer`. The oai protocol is defined in :mod:`kuha_oai_pmh_repo_handler.oai.protocol`. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._oai_request = None self._oai_response = None self._query_ctrl = None self._verbs_routes = { OAIRequest.verb_value_identify: self._identify, OAIRequest.verb_value_list_sets: self._list_sets, OAIRequest.verb_value_list_metadata_formats: self._list_metadata_formats, OAIRequest.verb_value_list_identifiers: self._list_identifiers, OAIRequest.verb_value_list_records: self._list_records, OAIRequest.verb_value_get_record: self._get_record }
[docs] async def prepare(self): """Prepare each response. Initialize response. Load query controller. Set output content type. """ await super().prepare() if self.application.settings['kuha_settings'].oai_pmh_respond_with_requested_url: self._oai_response = OAIResponse('{}://{}{}'.format(self.request.protocol, self.request.host, self.request.uri.split('?')[0])) else: self._oai_response = OAIResponse() self._query_ctrl = kuha_common.QueryController( headers=self._correlation_id.as_header() ) self.set_output_content_type(self.CONTENT_TYPE_XML)
[docs] def template_writer(self, generator): """Writes the output from genshi template. :param generator: generator object containing the XML-serialization. :type generator: :obj:`generator` """ for data in generator: self.write(data)
@OAITemplate('error.xml') async def _oai_error(self): return self._oai_response.get_response() @OAITemplate('identify.xml') async def _identify(self): study = await self._query_ctrl.query_single(Study, fields=Study._metadata, sort_order=1, sort_by=Study._metadata.attr_updated) datestamp = None if study: datestamp = study.get_updated() self._oai_response.set_earliest_datestamp(datestamp) self._oai_response.set_deleted_records_declaration(OAI_DEL_RECORDS_DECL_NO) self._oai_response.set_granularity('YYYY-MM-DDThh:mm:ssZ') return self._oai_response.get_response() @OAITemplate('get_record.xml') async def _get_record(self): metadata_format = self._oai_request.get_metadata_format() study_fields = list(set(OAIHeaders.get_header_fields() + metadata_format.get_record_fields(Study))) await self._query_ctrl.query_single( Study, on_record=self._query_relative_records, _filter={Study.study_number: self._oai_request.get_local_identifier()}, fields=study_fields ) if not self._oai_response.has_records(): raise oaierrors.IdDoesNotExist(context=self._oai_request.get_identifier()) self._oai_response.assert_single_record() return self._oai_response.get_response() @OAITemplate('list_sets.xml') async def _list_sets(self): for set_ in SETS: self._oai_response.add_sets_element(set_.setspec, set_.setname) for set_ in SETS: query_result = await self._query_ctrl.query_distinct(Study, fieldname=set_.set_values_query_field) if query_result != {}: sets_list = get_sets_list_from_query_result(set_, query_result) self._oai_response.extend_sets_element(sets_list) return self._oai_response.get_response() @OAITemplate('list_metadata_formats.xml') async def _list_metadata_formats(self): identifier = self._oai_request.get_identifier() if identifier: study = await self._query_ctrl.query_single( Study, _filter={ Study.study_number: self._oai_request.get_local_identifier() }, fields=Study._metadata, ) if not study: raise oaierrors.IdDoesNotExist(context=identifier) formats = [_format().as_dict() for _format in self._oai_request.iterate_supported_metadata_formats()] self._oai_response.set_metadata_formats(formats) return self._oai_response.get_response() @OAITemplate('list_identifiers.xml') async def _list_identifiers(self): await self._handle_list_request(self._store_record) return self._oai_response.get_response() @OAITemplate('list_records.xml') async def _list_records(self): await self._handle_list_request(self._query_relative_records) return self._oai_response.get_response() async def _store_record(self, study): if not study: return record = OAIRecord(study) self._oai_response.add_record(record) async def _query_relative_records(self, study): if not study: return record = OAIRecord(study) relative_records = self._oai_request.metadata_format.get_relative_records() for relative_record in relative_records: fields = self._oai_request.metadata_format.get_record_fields(relative_record) if relative_record.collection == Variable.collection: await self._query_ctrl.query_multiple( Variable, on_record=record.add_variable, _filter={Variable.study_number: record.study.study_number.get_value()}, fields=fields, sort_by=Variable._metadata.attr_created ) elif relative_record.collection == Question.collection: await self._query_ctrl.query_multiple( Question, on_record=record.add_question, _filter={Question.study_number: record.study.study_number.get_value()}, fields=fields, sort_by=Question._metadata.attr_created ) self._oai_response.add_record(record) async def _handle_list_request(self, on_record_callback): fields = OAIHeaders.get_header_fields() if self._oai_request.get_verb() == self._oai_request.verb_value_list_records: fields.extend(self._oai_request.metadata_format.get_record_fields()) skip = self._oai_request.get_cursor() from_ = self._oai_request.get_from() until = self._oai_request.get_query_param_until() _filter = {} if self._oai_request.has_set(): set_filter = get_query_filter_for_set(self._oai_request.get_set()) if set_filter is None: # requested set does not exists raise oaierrors.NoRecordsMatch() _filter.update(set_filter) if from_ or until: _filter.update({ Study._metadata.attr_updated: { self._query_ctrl.fk_constants.from_: from_, self._query_ctrl.fk_constants.until: until } }) count = await self._query_ctrl.query_count( Study, _filter=_filter ) resumption_token = self._oai_request.get_resumption_token() resumption_token.set_complete_list_size(count) await self._query_ctrl.query_multiple( Study, on_record=on_record_callback, _filter=_filter, fields=fields, limit=self.application.settings['kuha_settings'].oai_pmh_results_per_list, skip=skip, ) if not self._oai_response.has_records() and self._oai_request.is_selective(): raise oaierrors.NoRecordsMatch() self._oai_response.set_resumption_token(resumption_token) async def _router(self, args): """Route based on oai request :param args: List of 2-item tuples [(key, value]] :type args: list """ try: self._oai_request = OAIRequest(args) self._oai_response.set_request_params(self._oai_request) _route_callable = self._verbs_routes[self._oai_request.get_verb()] await _route_callable() except oaierrors.OAIError as exc: logging.warning("OAIError: %s", exc) self._oai_response.set_error(exc) await self._oai_error()
[docs] async def get(self): """HTTP-GET handler Gathers request arguments. Calls router. Finishes the response. "URLs for GET requests have keyword arguments appended to the base URL" -- http://www.openarchives.org/OAI/openarchivesprotocol.html#ProtocolFeatures """ args = [] for key, values in self.request.arguments.items(): for value in values: arg_value = value.decode('utf-8') args.append((key, decode_uri(arg_value))) await self._router(args) self.finish()
[docs] async def post(self): """HTTP-POST handler Validates request content type. Gathers request arguments. Calls router. Finishes the response. "Keyword arguments are carried in the message body of the HTTP POST. The Content-Type of the request must be application/x-www-form-urlencoded." -- http://www.openarchives.org/OAI/openarchivesprotocol.html#ProtocolFeatures """ self.assert_request_content_type('application/x-www-form-urlencoded') args = [] _body = self.request.body.decode('utf-8') for submitted_arg in _body.split('&'): if not submitted_arg: continue key = submitted_arg.split('=')[0] value = self.get_body_argument(key) args.append((key, decode_uri(value))) await self._router(args) self.finish()