Commit 26a46d51 authored by Carina Antunes's avatar Carina Antunes
Browse files

support copy to fields in indico

parent 2d41d857
......@@ -109,6 +109,9 @@ SEARCH_INSTANCE_IMMUTABLE = ast.literal_eval(os.getenv("CERN_SEARCH_INSTANCE_IMM
# File indexer capabilities enabled
SEARCH_FILE_INDEXER = ast.literal_eval(os.getenv("CERN_SEARCH_FILE_INDEXER", "True"))
# Copy to fields are moved to metadata
SEARCH_COPY_TO_METADATA = ast.literal_eval(os.getenv("CERN_SEARCH_COPY_TO_METADATA", "False"))
# Records REST configuration
# ===========================
......
......@@ -97,6 +97,7 @@
},
"name": {
"type": "text",
"store" : true,
"analyzer": "english",
"fields": {
"french": {
......@@ -191,6 +192,7 @@
},
"authors": {
"type": "text",
"store" : true,
"fields": {
"exact_match": {
"type": "keyword",
......@@ -224,6 +226,7 @@
},
"collection": {
"type": "keyword",
"store" : true,
"eager_global_ordinals": true
},
"type_format": {
......
......@@ -97,6 +97,7 @@
},
"name": {
"type": "text",
"store" : true,
"analyzer": "english",
"fields": {
"french": {
......@@ -189,6 +190,7 @@
},
"authors": {
"type": "text",
"store" : true,
"fields": {
"exact_match": {
"type": "keyword",
......@@ -222,6 +224,7 @@
},
"collection": {
"type": "keyword",
"store" : true,
"eager_global_ordinals": true
},
"type_format": {
......
......@@ -189,6 +189,7 @@
},
"authors": {
"type": "text",
"store" : true,
"fields": {
"exact_match": {
"type": "keyword",
......@@ -232,6 +233,7 @@
},
"collection": {
"type": "keyword",
"store" : true,
"eager_global_ordinals": true
},
"type_format": {
......
......@@ -97,6 +97,7 @@
},
"name": {
"type": "text",
"store" : true,
"analyzer": "english",
"fields": {
"french": {
......@@ -147,6 +148,7 @@
},
"collection": {
"type": "keyword",
"store": true,
"eager_global_ordinals": true
},
"note_id": {
......
......@@ -97,6 +97,7 @@
},
"name": {
"type": "text",
"store" : true,
"analyzer": "english",
"fields": {
"french": {
......@@ -189,6 +190,7 @@
},
"authors": {
"type": "text",
"store" : true,
"fields": {
"exact_match": {
"type": "keyword",
......@@ -225,6 +227,7 @@
},
"collection": {
"type": "keyword",
"store" : true,
"eager_global_ordinals": true
},
"event_id": {
......
......@@ -32,7 +32,12 @@
"type": "object",
"properties": {
"title": {
"type": "keyword"
"type": "keyword",
"copy_to": "_data.name"
},
"name": {
"type": "keyword",
"store": true
},
"description": {
"type": "text",
......
......@@ -41,3 +41,4 @@ class CSASRecordSearchSchemaJSONV1(RecordSchemaJSONV1):
highlight = fields.Raw()
explanation = fields.Raw()
stored = fields.Raw()
......@@ -134,6 +134,8 @@ def search_factory(self, search: RecordCERNSearch, query_parser=None):
if explain:
search = search.extra(explain=explain)
search = search.extra(stored_fields=["*"], _source=True)
return search, urlkwargs
......
......@@ -10,6 +10,7 @@
from __future__ import absolute_import, print_function
from flask import current_app
from invenio_records_rest.serializers.base import PreprocessorMixin
......@@ -23,6 +24,22 @@ class CernPreprocessorMixin(PreprocessorMixin):
pid, record_hit, links_factory=None, **kwargs
)
if record_hit.get("fields"):
# Move attrs from fields to metadata.
current_app.logger.debug("SEARCH_COPY_TO_METADATA %s", current_app.config["SEARCH_COPY_TO_METADATA"])
if current_app.config["SEARCH_COPY_TO_METADATA"]:
for key, value in record_hit["fields"].items():
key_path = key.split(".")
curr_path = record["metadata"]
for path in key_path[:-1]:
curr_path[path] = curr_path[path] if curr_path.get(path) else {}
curr_path = curr_path[path]
curr_path[key_path[-1]] = value[0] if len(value) == 1 else value
del record["metadata"][key]
else:
record["stored"] = record_hit.get("fields", dict())
record["highlight"] = record_hit.get("highlight", dict())
record["explanation"] = record_hit.get("_explanation", dict())
......
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# This file is part of CERN Search.
# Copyright (C) 2018-2021 CERN.
#
# Citadel Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Search tests."""
import json
from http import HTTPStatus
import pytest
from tests.api.helpers import get_headers, get_schemas_endpoint
@pytest.fixture(scope="module")
def app_config(app_config):
"""Application configuration fixture."""
app_config["SEARCH_COPY_TO_METADATA"] = False
return app_config
def test_testclient(app, appctx, client, user):
"""Test search over public documents.
Test stored fields default behaviour
"""
body = {
"_access": {
"owner": ["CernSearch-Administrators@cern.ch"],
"update": ["CernSearch-Administrators@cern.ch"],
"delete": ["CernSearch-Administrators@cern.ch"],
},
"_data": {
"title": "Test default search field",
"description": "This contains CernSearch and should appear",
},
"$schema": get_schemas_endpoint("test/doc_v0.0.2.json"),
}
# Create first test record
resp = client.post("/records/", headers=get_headers(), data=json.dumps(body))
assert resp.status_code == HTTPStatus.CREATED
resp_body = resp.json["metadata"]
assert resp_body.get("control_number") is not None
resp_data = resp_body.get("_data")
assert resp_data.get("title") == "Test default search field"
assert resp_data.get("description") == "This contains CernSearch and should appear"
# Needed to allow ES to process the file
import time
time.sleep(2)
resp = client.get("/records/?q=CernSearch", headers=get_headers())
assert resp.status_code == HTTPStatus.OK
resp_hits = resp.json["hits"]
assert resp_hits.get("total") == 1
title = resp_hits["hits"][0]["metadata"].get("_data").get("title")
assert title is not None
assert title == "Test default search field"
# copy to
assert resp_hits["hits"][0]["metadata"].get("_data").get("name") is None
name = resp_hits["hits"][0]["metadata"].get("_data.name")
assert name is not None
assert name == ["Test default search field"]
store = resp_hits["hits"][0].get("stored")
assert store
name = resp_hits["hits"][0].get("stored").get("_data.name")
assert name is not None
assert name == ["Test default search field"]
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# This file is part of CERN Search.
# Copyright (C) 2018-2021 CERN.
#
# Citadel Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Search tests."""
import json
from http import HTTPStatus
import pytest
from tests.api.helpers import get_headers, get_schemas_endpoint
@pytest.fixture(scope="module")
def app_config(app_config):
"""Application configuration fixture."""
app_config["SEARCH_COPY_TO_METADATA"] = True
return app_config
def test_testclient(app, client, user):
"""Test search over public documents.
Test stored fields added to metadata behaviour
"""
body = {
"_access": {
"owner": ["CernSearch-Administrators@cern.ch"],
"update": ["CernSearch-Administrators@cern.ch"],
"delete": ["CernSearch-Administrators@cern.ch"],
},
"_data": {
"title": "Test default search field",
"description": "This contains CernSearch and should appear",
},
"$schema": get_schemas_endpoint("test/doc_v0.0.2.json"),
}
# Create first test record
resp = client.post("/records/", headers=get_headers(), data=json.dumps(body))
assert resp.status_code == HTTPStatus.CREATED
resp_body = resp.json["metadata"]
assert resp_body.get("control_number") is not None
resp_data = resp_body.get("_data")
assert resp_data.get("title") == "Test default search field"
assert resp_data.get("description") == "This contains CernSearch and should appear"
# # Needed to allow ES to process the file
import time
time.sleep(2)
resp = client.get("/records/?q=CernSearch", headers=get_headers())
assert resp.status_code == HTTPStatus.OK
resp_hits = resp.json["hits"]
assert resp_hits.get("total") == 1
title = resp_hits["hits"][0]["metadata"].get("_data").get("title")
name = resp_hits["hits"][0]["metadata"].get("_data").get("name")
assert title is not None
assert title == "Test default search field"
# copy to
assert name is not None
assert name == "Test default search field"
assert resp_hits["hits"][0].get("stored") is None
assert resp_hits["hits"][0]["metadata"].get("_data.name") is None
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment