Commit f5cd6512 authored by Pablo Panero's avatar Pablo Panero
Browse files

Search

parent 330a8e80
......@@ -23,7 +23,7 @@ secrets/
# Local env source
env.sh
env-*.sh
# Debug and other logs
......
......@@ -79,11 +79,20 @@ In order to upload a document we need to perform a *POST* operation. For example
curl -X POST -H 'Content-Type: application/json' -H 'Accept: application/json' \
-i 'http://<host:port>/api/records/' --data '
{
"_access": {
"delete": "test-egroup@cern.ch",
"owner": "test-egroup@cern.ch",
"read": "test-egroup@cern.ch",
"update": "test-egroup@cern.ch"
},
"description": "This is an awesome description for our first uploaded document",
"title": "Demo document"
"$schema": "http://0.0.0.0/schemas/test-doc_v0.0.1.json"
}
'
```
Note: The ``$schema`` field is not mandatory, if it is not set the documents will be inserted in the default schema
(defined upon instance creation).
The response should be a code 200 with a selflink to the new inserted document.
It should look something similar to the url of the next query. With it we can obtain the document:
......@@ -98,9 +107,29 @@ curl -X GET -H 'Content-Type: application/json' -H 'Accept: application/json' \
In order to query documents we need to perform a *GET* operation. We can specify the amount of
documents to be returned (in total and per page), among other options. For a full list check
[here](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html).
[here](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html). Note that all the indices of an
instance have the same alias, and one per folder of the mappings tree. Therefore, the ``invenio-records-rest`` library can be set with only one search index
(that allow searching over multiple indices, but only the allowed ones).
An example query for the terms _awesome_ and _document_ looks like this:
```
/cernsearch-test/
|
'---> /type_one/
| |
| '---> mapping_one_a.json
| '---> mapping_one_b.json
'---> /type_two/
| |
| '---> mapping_two.json
|
'---> mapping_test.json
```
Indices ``mapping_one_a`` and ``mapping_one_b`` will have ``cernsearch-test`` and ``type_one`` aliases, ``mapping_two``
will have ``cernsearch-test`` and ``type_two`` aliases, and finally ``mapping_test`` will have ``cernsearch-test`` as
alias.
Concerning the queries, an example query for the terms _awesome_ and _document_ looks like this:
```bash
curl -X GET -H 'Content-Type: application/json' -H 'Accept: application/json' \
......@@ -230,6 +259,9 @@ An example mapping containing the permission fields is:
"_access": {
"type": "nested",
"properties": {
"owner":{
"type": "string"
},
"read":{
"type": "string"
},
......@@ -251,7 +283,10 @@ An example mapping containing the permission fields is:
}
```
Note that there is no _create_ permission, that is specified by the __owner_ field in the metadata.
Note that there is no _create_ permission, that is specified by the __owner_ field in the metadata. The owner field in
the document schema is still needed for querying purposes and should be the owner of the document (in most cases it
will be the same than the owner of the document collection or index but it is specified at document indexing time
rather than upon index creation).
### Importante note
......@@ -331,3 +366,19 @@ Starting command throw ssl:
```bash
gunicorn -b :5000 --certfile=ssl.crt --keyfile=ssl.key cern_search_rest.wsgi
```
## Configuration
CERN Search specific parameters:
-
-
-
The rest of the configuration comes from the parameters are configurable thought the Invenio Framework. The full list of
the overwriten ones is show below, nonetheless, if needed others can be overwriten (check documentation of the
corresponding project in the [invenio repository](www.github.com/inveniosoftware)):
-
-
-
\ No newline at end of file
......@@ -61,12 +61,12 @@ JSONSCHEMAS_REGISTER_ENDPOINTS_UI = False
# TODO use ES central service. Change INDEXER_RECORD_TO_INDEX = 'invenio_indexer.utils.default_record_to_index'
INDEXER_DEFAULT_DOC_TYPE = 'test-doc_v0.0.1'
INDEXER_DEFAULT_INDEX = 'cernsearch-test-doc_v0.0.1'
INDEXER_DEFAULT_INDEX = 'cernsearch-test-test-doc_v0.0.1'
# Search configuration
# =====================
SEARCH_MAPPINGS = ['cernsearch']
SEARCH_MAPPINGS = ['cernsearch-test']
# Records REST configuration
# ===========================
......@@ -86,28 +86,21 @@ RECORDS_REST_ENDPOINTS = dict(
list_route='/records/',
links_factory_imp='invenio_records_rest.links:default_links_factory',
record_class='cern_search_rest.modules.cernsearch.api:CernSearchRecord', # TODO
# record_loaders={ # TODO
# 'application/json': 'mypackage.loaders:json_loader'
# },
record_serializers={
'application/json': ('invenio_records_rest.serializers'
':json_v1_response'),
},
search_class='invenio_search.api.RecordsSearch',
# search_factory_imp=search_factory(), # Default TODO
search_index='cernsearch-test-doc_v0.0.1',
search_class='cern_search_rest.modules.cernsearch.search.RecordCERNSearch',
search_index='cernsearch-test', # TODO: Parametrize this, along with the rest of the config file
search_serializers={
'application/json': ('invenio_records_rest.serializers'
':json_v1_search'),
},
# suggesters= {}, # TODO
# use_options_view=True, # TODO
max_result_window=10000,
read_permission_factory_imp=record_read_permission_factory,
create_permission_factory_imp=record_create_permission_factory,
update_permission_factory_imp=record_update_permission_factory,
delete_permission_factory_imp=record_delete_permission_factory,
# error_handlers={}, # TODO
)
)
......
......@@ -112,7 +112,7 @@ def cern_authorized_signup_handler(resp, remote, *args, **kwargs):
def egroup_admin():
admin_access_groups = current_app.config['ADMIN_VIEW_ACCESS_GROUPS']
admin_access_groups = current_app.config['ADMIN_ACCESS_GROUPS']
# Allow based in the '_access' key
user_provides = get_user_provides()
# set.isdisjoint() is faster than set.intersection()
......
{
"title": "Custom record schema for collection v0.0.1",
"id": "http://localhost:5000/schemas/cernsearch-test/collection_v0.0.1.json",
"$schema": "http://localhost:5000/schemas/cernsearch-test/collection_v0.0.1.json",
"type": "object",
"properties": {
"_access": {
"type": "object",
"properties": {
"owner":{
"type": "string"
},
"read":{
"type": "string"
},
"update":{
"type": "string"
},
"delete":{
"type": "string"
}
}
},
"minutes": {
"type": "string",
"description": "Record title."
},
"content": {
"type": "string",
"description": "Description for record."
},
"authors": {
"type": "string",
"description": "Description for record."
},
"custom_pid": {
"type": "string"
},
"$schema": {
"type": "string"
}
}
}
\ No newline at end of file
{
"title": "Custom record schema v0.0.1",
"id": "http://localhost:5000/schemas/test-doc_v0.0.1.json",
"$schema": "http://json-schema.org/draft-04/schema#",
"id": "http://localhost:5000/schemas/cernsearch-test/test-doc_v0.0.1.json",
"$schema": "http://localhost:5000/schemas/cernsearch-test/test-doc_v0.0.1.json",
"type": "object",
"properties": {
"_access": {
"type": "object",
"properties": {
"owner":{
"type": "string"
},
"read":{
"type": "string"
},
......
{
"settings": {
"index.percolator.map_unmapped_fields_as_string": true,
"index.mapping.total_fields.limit": 3000
},
"mappings": {
"collection_v0.0.1": {
"numeric_detection": true,
"_meta": {
"_owner": "CernSearch-Administrators@cern.ch"
},
"_all": {
"analyzer": "english"
},
"properties": {
"_access": {
"type": "nested",
"properties": {
"owner":{
"type": "string"
},
"read": {
"type": "string"
},
"update": {
"type": "string"
},
"delete": {
"type": "string"
}
}
},
"minutes": {
"type": "string",
"analyzer": "english"
},
"content": {
"type": "string",
"analyzer": "english"
},
"authors": {
"type": "string",
"analyzer": "english"
},
"custom_pid": {
"type": "string",
"index": "not_analyzed"
},
"$schema": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
\ No newline at end of file
......@@ -16,6 +16,9 @@
"_access": {
"type": "nested",
"properties": {
"owner":{
"type": "string"
},
"read": {
"type": "string"
},
......
......@@ -3,6 +3,7 @@
from flask_security import current_user
from flask import request, g, current_app
from invenio_indexer.utils import default_record_to_index
from invenio_search import current_search_client
from cern_search_rest.modules.cernsearch.utils import get_user_provides
......@@ -79,26 +80,22 @@ def has_owner_permission(user, record=None):
if user.is_authenticated:
# Allow based in the '_access' key
user_provides = get_user_provides()
user_index = request.args.get("index")
index_exists, es_index = parse_index(user_index)
if index_exists and current_search_client.indices.exists([es_index]):
es_index, doc = get_index_from_request(record)
if current_search_client.indices.exists([es_index]):
mapping = current_search_client.indices.get_mapping([es_index])
if mapping is not None:
# set.isdisjoint() is faster than set.intersection()
create_access_groups = mapping[es_index]['mappings'][user_index]['_meta']['_owner'].split(',')
create_access_groups = mapping[es_index]['mappings'][doc]['_meta']['_owner'].split(',')
if user_provides and not set(user_provides).isdisjoint(set(create_access_groups)):
return True
return False
INDEX_PREFIX = 'cernsearch'
def parse_index(index):
if index is not None:
return True, '{0}-{1}'.format(INDEX_PREFIX, index)
else:
return False, None
def get_index_from_request(record=None):
if record is not None and record.get('$schema', '') is not None:
return default_record_to_index(record)
return (current_app.config['INDEXER_DEFAULT_INDEX'],
current_app.config['INDEXER_DEFAULT_DOC_TYPE'])
def has_update_permission(user, record):
......@@ -108,8 +105,9 @@ def has_update_permission(user, record):
user_provides = get_user_provides()
# set.isdisjoint() is faster than set.intersection()
update_access_groups = record['_access']['update'].split(',')
if (user_provides and not set(user_provides).isdisjoint(set(update_access_groups))) \
or has_owner_permission(user):
if check_elasticsearch(record) and (
(user_provides and not set(user_provides).isdisjoint(set(update_access_groups))) \
or has_owner_permission(user)):
return True
return False
......@@ -121,8 +119,9 @@ def has_read_record_permission(user, record):
user_provides = get_user_provides()
# set.isdisjoint() is faster than set.intersection()
read_access_groups = record['_access']['read'].split(',')
if (user_provides and not set(user_provides).isdisjoint(set(read_access_groups))) \
or has_owner_permission(user):
if check_elasticsearch(record) and (
(user_provides and not set(user_provides).isdisjoint(set(read_access_groups)))
or has_owner_permission(user)):
return True
return False
......@@ -198,3 +197,12 @@ def is_public(data, action):
the action is not inside access or is empty.
"""
return '_access' not in data or not data.get('_access', {}).get(action)
def check_elasticsearch(record=None):
if record is not None:
"""Try to search for given record."""
search = request._methodview.search_class()
search = search.get_record(str(record.id))
return search.count() == 1
return False
#!/usr/bin/python
# -*- coding: utf-8 -*-
from elasticsearch_dsl import Q
from invenio_search import RecordsSearch
from invenio_search.api import DefaultFilter
from cern_search_rest.modules.cernsearch.utils import get_user_provides
def cern_search_filter():
"""Filter list of results."""
# Get CERN user's provides
provides = get_user_provides() # TODO CHANGE THIS BY LIST PROVIDED BY SERVICE
# Filter for public records
public = ~Q('exists', field='_access.read')
# Filter for restricted records, that the user has access to
read_restricted = Q('terms', **{'_access.read': provides})
write_restricted = Q('terms', **{'_access.update': provides})
# Filter records where the user is owner
owner = Q('terms', **{'_access.owner': provides})
# OR all the filters
combined_filter = public | read_restricted | write_restricted | owner
return Q('bool', filter=[combined_filter])
class RecordCERNSearch(RecordsSearch):
"""CERN search class."""
class Meta:
doc_types = None
default_filter = DefaultFilter(cern_search_filter)
......@@ -88,10 +88,10 @@ setup(
'cern_search_rest = cern_search_rest.config'
],
'invenio_search.mappings': [
'cernsearch = cern_search_rest.modules.cernsearch.mappings',
'cernsearch-test = cern_search_rest.modules.cernsearch.mappings',
],
'invenio_jsonschemas.schemas': [
'cern_search_rest_schemas = cern_search_rest.modules.cernsearch.jsonschemas'
'cernsearch-test = cern_search_rest.modules.cernsearch.jsonschemas'
],
},
extras_require=extras_require,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment