Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
webservices
cern-search
cern-search-rest-api
Commits
78646751
Commit
78646751
authored
Apr 06, 2020
by
Carina Antunes
Browse files
[
SEARCH-66
] Add highlight and explain params to search api
parent
056c747f
Changes
10
Hide whitespace changes
Inline
Side-by-side
cern_search_rest_api/config.py
View file @
78646751
...
...
@@ -14,6 +14,8 @@ import ast
import
copy
import
os
from
cern_search_rest_api.modules.cernsearch.api
import
CernSearchRecord
from
cern_search_rest_api.modules.cernsearch.indexer
import
CernSearchRecordIndexer
from
cern_search_rest_api.modules.cernsearch.permissions
import
(
record_create_permission_factory
,
record_delete_permission_factory
,
record_list_permission_factory
,
...
...
@@ -103,7 +105,8 @@ RECORDS_REST_ENDPOINTS = dict(
item_route
=
'/record/<{0}:pid_value>'
.
format
(
_Record_PID
),
list_route
=
'/records/'
,
links_factory_imp
=
'invenio_records_rest.links:default_links_factory'
,
record_class
=
'cern_search_rest_api.modules.cernsearch.api:CernSearchRecord'
,
record_class
=
CernSearchRecord
,
indexer_class
=
CernSearchRecordIndexer
,
record_serializers
=
{
'application/json'
:
(
'cern_search_rest_api.modules.cernsearch.serializers'
':json_v1_response'
),
...
...
cern_search_rest_api/modules/cernsearch/indexer.py
View file @
78646751
...
...
@@ -10,6 +10,7 @@
from
cern_search_rest_api.modules.cernsearch.api
import
CernSearchRecord
from
flask
import
current_app
from
invenio_files_rest.storage
import
FileStorage
from
invenio_indexer.api
import
RecordIndexer
READ_MODE_BINARY
=
'rb'
ATTACHMENT_KEY
=
'_attachment'
...
...
@@ -17,9 +18,18 @@ FILE_KEY = '_file'
DATA_KEY
=
'_data'
class
CernSearchRecordIndexer
(
RecordIndexer
):
"""Record Indexer."""
record_cls
=
CernSearchRecord
def
index_file_content
(
sender
,
json
=
None
,
record
:
CernSearchRecord
=
None
,
index
=
None
,
doc_type
=
None
,
arguments
=
None
,
**
kwargs
):
"""Index file content in search."""
if
not
record
.
files_content
:
return
for
file_obj
in
record
.
files_content
:
current_app
.
logger
.
debug
(
f
"Index file content
{
file_obj
.
obj
.
basename
}
in
{
record
.
id
}
"
)
...
...
cern_search_rest_api/modules/cernsearch/mappings/test/v6/test/file_v0.0.2.json
deleted
100644 → 0
View file @
056c747f
{
"settings"
:
{
"index.percolator.map_unmapped_fields_as_string"
:
true
,
"index.mapping.total_fields.limit"
:
3000
},
"mappings"
:
{
"file_v0.0.2"
:
{
"dynamic"
:
"strict"
,
"numeric_detection"
:
true
,
"_meta"
:
{
"_owner"
:
"CernSearch-Administrators@cern.ch"
},
"properties"
:
{
"_access"
:
{
"type"
:
"object"
,
"properties"
:
{
"owner"
:
{
"type"
:
"keyword"
},
"read"
:
{
"type"
:
"keyword"
},
"update"
:
{
"type"
:
"keyword"
},
"delete"
:
{
"type"
:
"keyword"
}
}
},
"_data"
:
{
"type"
:
"object"
,
"properties"
:
{
"title"
:
{
"type"
:
"keyword"
}
}
},
"_bucket"
:
{
"type"
:
"keyword"
},
"_updated"
:
{
"type"
:
"date"
},
"_created"
:
{
"type"
:
"date"
},
"control_number"
:
{
"type"
:
"keyword"
},
"$schema"
:
{
"enabled"
:
false
}
}
}
}
}
cern_search_rest_api/modules/cernsearch/mappings/test/v6/test/file_v0.0.3.json
deleted
100644 → 0
View file @
056c747f
{
"settings"
:
{
"index.percolator.map_unmapped_fields_as_string"
:
true
,
"index.mapping.total_fields.limit"
:
3000
},
"mappings"
:
{
"file_v0.0.3"
:
{
"dynamic"
:
"strict"
,
"numeric_detection"
:
true
,
"_meta"
:
{
"_owner"
:
"CernSearch-Administrators@cern.ch"
},
"properties"
:
{
"_access"
:
{
"type"
:
"object"
,
"properties"
:
{
"owner"
:
{
"type"
:
"keyword"
},
"read"
:
{
"type"
:
"keyword"
},
"update"
:
{
"type"
:
"keyword"
},
"delete"
:
{
"type"
:
"keyword"
}
}
},
"_data"
:
{
"type"
:
"object"
,
"properties"
:
{
"title"
:
{
"type"
:
"keyword"
}
}
},
"_bucket"
:
{
"type"
:
"keyword"
},
"_bucket_content"
:
{
"type"
:
"keyword"
},
"_updated"
:
{
"type"
:
"date"
},
"_created"
:
{
"type"
:
"date"
},
"control_number"
:
{
"type"
:
"keyword"
},
"$schema"
:
{
"enabled"
:
false
}
}
}
}
}
cern_search_rest_api/modules/cernsearch/marshmallow/json.py
View file @
78646751
...
...
@@ -4,30 +4,22 @@
# This file is part of CERN Search.
# Copyright (C) 2018-2019 CERN.
#
# C
ERN
Search is free software; you can redistribute it and/or modify it
# C
itadel
Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Marshmallow for records and search results."""
from
cern_search_rest_api.modules.cernsearch.utils
import
record_from_index
from
flask
import
current_app
from
invenio_indexer.utils
import
default_record_to_index
from
invenio_records_rest.schemas
import
RecordMetadataSchemaJSONV1
from
invenio_records_rest.schemas.json
import
RecordSchemaJSONV1
from
marshmallow
import
ValidationError
,
post_dump
,
validates_schema
def
has_and_needs_binary
(
original_data
):
es_index
,
doc
=
default_record_to_index
(
original_data
)
binary_index_list
=
current_app
.
config
[
'SEARCH_DOC_PIPELINES'
]
if
doc
in
binary_index_list
and
not
original_data
.
get
(
"_data"
).
get
(
'b64'
):
return
False
return
True
from
marshmallow
import
ValidationError
,
fields
,
validates_schema
class
CSASRecordSchemaV1
(
RecordMetadataSchemaJSONV1
):
"""Record schema."""
@
validates_schema
(
pass_original
=
True
)
def
validate_record
(
self
,
data
,
original_data
):
"""Validate record."""
if
not
original_data
.
get
(
'_access'
):
raise
ValidationError
(
'Missing field _access'
)
delete
=
original_data
.
get
(
'_access'
).
get
(
'delete'
)
...
...
@@ -44,19 +36,11 @@ class CSASRecordSchemaV1(RecordMetadataSchemaJSONV1):
'field _access.owner'
)
if
not
original_data
.
get
(
'_data'
):
raise
ValidationError
(
'Missing field _data'
)
if
not
has_and_needs_binary
(
original_data
):
raise
ValidationError
(
'Record to be index belongs to binary index '
'but does not contain the [b64] field'
)
return
class
CSASRecordSearchSchemaJSONV1
(
RecordSchemaJSONV1
):
"""Record Search schema."""
@
post_dump
()
def
remove_base64
(
self
,
data
):
""" If needed remove the base64 data from the response """
es_index
,
doc
=
record_from_index
(
data
)
binary_index_list
=
current_app
.
config
[
'SEARCH_DOC_PIPELINES'
]
if
doc
in
binary_index_list
:
data
.
get
(
'metadata'
).
get
(
"_data"
).
pop
(
'b64'
)
return
data
highlight
=
fields
.
Raw
()
explanation
=
fields
.
Raw
()
cern_search_rest_api/modules/cernsearch/search.py
View file @
78646751
...
...
@@ -4,8 +4,9 @@
# This file is part of CERN Search.
# Copyright (C) 2018-2019 CERN.
#
# C
ERN
Search is free software; you can redistribute it and/or modify it
# C
itadel
Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Search utilities."""
from
cern_search_rest_api.modules.cernsearch.utils
import
get_user_provides
from
elasticsearch_dsl
import
Q
...
...
@@ -13,6 +14,7 @@ from flask import current_app, request
from
invenio_records_rest.query
import
default_search_factory
from
invenio_search
import
RecordsSearch
from
invenio_search.api
import
DefaultFilter
from
werkzeug.datastructures
import
MultiDict
"""
...
...
@@ -62,6 +64,7 @@ def cern_search_filter():
def
get_egroups
():
"""Get egroups from access param, config or authenticated user."""
egroups
=
request
.
args
.
get
(
'access'
,
None
)
# If access rights are sent or is a search query
if
egroups
or
(
request
.
path
==
'/records/'
and
request
.
method
==
'GET'
):
...
...
@@ -76,10 +79,26 @@ def get_egroups():
return
get_user_provides
()
def
search_factory
(
self
,
search
,
query_parser
=
None
):
class
RecordCERNSearch
(
RecordsSearch
):
"""CERN search class with Elasticsearch DSL."""
class
Meta
:
"""Configuration for ``Search`` and ``FacetedSearch`` classes."""
doc_types
=
None
default_filter
=
DefaultFilter
(
cern_search_filter
)
def
search_factory
(
self
,
search
:
RecordCERNSearch
,
query_parser
=
None
):
"""Parse query using elasticsearch DSL query.
:param self: REST view.
:param search: Elastic search DSL search instance.
:returns: Tuple with search instance and URL arguments.
"""
def
_csas_query_parser
(
qstr
=
None
):
"""
Default parser that uses
th
e
Q() from elasticsearch_dsl."""
"""
Parse wi
th Q() from elasticsearch_dsl."""
if
qstr
:
return
Q
(
'query_string'
,
...
...
@@ -93,15 +112,15 @@ def search_factory(self, search, query_parser=None):
search
=
search
.
params
(
search_type
=
"dfs_query_then_fetch"
)
# search across all shards
return
search
,
urlkwargs
highlights
=
request
.
args
.
getlist
(
'highlight'
,
None
)
if
highlights
:
search
=
search
.
highlight
(
*
highlights
)
csas_search_factory
=
search_factory
explain
=
request
.
args
.
get
(
'explain'
,
None
)
if
explain
:
search
=
search
.
extra
(
explain
=
explain
)
return
search
,
urlkwargs
class
RecordCERNSearch
(
RecordsSearch
):
"""CERN search class."""
class
Meta
:
doc_types
=
None
default_filter
=
DefaultFilter
(
cern_search_filter
)
csas_search_factory
=
search_factory
cern_search_rest_api/modules/cernsearch/serializers/__init__.py
View file @
78646751
...
...
@@ -6,19 +6,20 @@
#
# CERN Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Serializers for records and search results."""
from
__future__
import
absolute_import
,
print_function
from
cern_search_rest_api.modules.cernsearch.marshmallow
import
CSASRecordSchemaV1
,
CSASRecordSearchSchemaJSONV1
from
invenio_records_rest
.serializers.json
import
JSONSerializer
from
cern_search_rest_api.modules.cernsearch
.serializers.json
import
Cern
JSONSerializer
from
invenio_records_rest.serializers.response
import
record_responsify
,
search_responsify
# Serializers
# ===========
#: JSON serializer definition.
json_v1
=
JSONSerializer
(
CSASRecordSchemaV1
,
replace_refs
=
True
)
json_v1_records
=
JSONSerializer
(
CSASRecordSearchSchemaJSONV1
)
json_v1
=
Cern
JSONSerializer
(
CSASRecordSchemaV1
,
replace_refs
=
True
)
json_v1_records
=
Cern
JSONSerializer
(
CSASRecordSearchSchemaJSONV1
)
# Records-REST serializers
# ========================
...
...
cern_search_rest_api/modules/cernsearch/serializers/base.py
0 → 100644
View file @
78646751
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# This file is part of CERN Search.
# Copyright (C) 2018-2019 CERN.
#
# Citadel Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Mixin helper class for preprocessing records and search results."""
from
__future__
import
absolute_import
,
print_function
from
invenio_records_rest.serializers.base
import
PreprocessorMixin
class
CernPreprocessorMixin
(
PreprocessorMixin
):
"""Base class for serializers."""
@
staticmethod
def
preprocess_search_hit
(
pid
,
record_hit
,
links_factory
=
None
,
**
kwargs
):
"""Prepare a record hit from Elasticsearch for serialization."""
record
=
super
(
CernPreprocessorMixin
,
CernPreprocessorMixin
).
preprocess_search_hit
(
pid
,
record_hit
,
links_factory
=
None
,
**
kwargs
)
record
[
"highlight"
]
=
record_hit
.
get
(
'highlight'
,
dict
())
record
[
"explanation"
]
=
record_hit
.
get
(
'_explanation'
,
dict
())
return
record
cern_search_rest_api/modules/cernsearch/serializers/json.py
0 → 100644
View file @
78646751
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# This file is part of CERN Search.
# Copyright (C) 2018-2019 CERN.
#
# Citadel Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Marshmallow based JSON serializer for records."""
from
__future__
import
absolute_import
,
print_function
from
cern_search_rest_api.modules.cernsearch.serializers.base
import
CernPreprocessorMixin
from
invenio_records_rest.serializers
import
JSONSerializer
class
CernJSONSerializer
(
JSONSerializer
,
CernPreprocessorMixin
):
"""Marshmallow based JSON serializer for records."""
tests/api/test_search.py
View file @
78646751
...
...
@@ -86,6 +86,22 @@ def test_testclient(app, client, user):
assert
description
is
not
None
assert
description
==
'This contains CernSearch and should appear'
# Test query params
resp
=
client
.
get
(
'/records/'
,
headers
=
get_headers
(),
query_string
=
{
'q'
:
'CernSearch'
,
'explain'
:
'true'
,
'highlight'
:
'*'
}
)
assert
resp
.
status_code
==
HTTPStatus
.
OK
resp_hits
=
resp
.
json
[
'hits'
]
explanation
=
resp_hits
[
'hits'
][
0
].
get
(
'explanation'
)
print
(
resp_hits
[
'hits'
][
0
])
assert
explanation
highlight
=
resp_hits
[
'hits'
][
0
].
get
(
'highlight'
)
assert
highlight
# Clean the instance. Delete record
resp
=
client
.
delete
(
'/record/{control_number}'
.
format
(
control_number
=
control_number_one
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment