search.py 3.3 KB
Newer Older
Pablo Panero's avatar
Search  
Pablo Panero committed
1
2
#!/usr/bin/python
# -*- coding: utf-8 -*-
3
4
5
6
7
8
#
# This file is part of CERN Search.
# Copyright (C) 2018-2019 CERN.
#
# CERN Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
9

Carina Antunes's avatar
Carina Antunes committed
10
from cern_search_rest_api.modules.cernsearch.utils import get_user_provides
Pablo Panero's avatar
Search  
Pablo Panero committed
11
from elasticsearch_dsl import Q
Carina Antunes's avatar
Carina Antunes committed
12
from flask import current_app, request
13
from invenio_records_rest.query import default_search_factory
Pablo Panero's avatar
Search  
Pablo Panero committed
14
15
16
from invenio_search import RecordsSearch
from invenio_search.api import DefaultFilter

17

18
19
20
21
22
23
24
25
26
"""
The Filter emulates the following query:
curl -X GET "localhost:9200/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "bool": {
      "filter": {
        "bool": {
          "should": [
27
28
29
30
31
32
33
            {"terms": {"_access.read": ["egroup-read-one","egroup-read-two"]}},
            {"terms": {"_access.update": "egroup-write-one"}},
            {"bool": { # Public document
              "must_not": {
                "exists": {"field": "_access.read"}
              } # End must_not
            }} # End bool
34
35
36
37
38
39
40
41
          ] # End should
        } # End bool
      } # End filter
    } # End bool
  } # End query
}
'
"""
Pablo Panero's avatar
Search  
Pablo Panero committed
42
43
44
45


def cern_search_filter():
    """Filter list of results."""
46
    provides = get_egroups()
Pablo Panero's avatar
Search  
Pablo Panero committed
47
48
    # Filter for public records
    public = ~Q('exists', field='_access.read')
49
    cern_filter = public
50
51
52
53
54

    if provides is not None:
        # Filter for restricted records, that the user has access to
        read_restricted = Q('terms', **{'_access.read': provides})
        write_restricted = Q('terms', **{'_access.update': provides})
55
        delete_restricted = Q('terms', **{'_access.delete': provides})
56
57
58
        # Filter records where the user is owner
        owner = Q('terms', **{'_access.owner': provides})
        # OR all the filters
59
        cern_filter = public | read_restricted | write_restricted | delete_restricted | owner
60

61
    return Q('bool', filter=cern_filter)
62
63
64
65


def get_egroups():
    egroups = request.args.get('access', None)
66
    # If access rights are sent or is a search query
67
68
    if egroups or (request.path == '/records/' and request.method == 'GET'):
        try:
69
70
71
72
            if current_app.config['SEARCH_USE_EGROUPS']:
                return ['{0}@cern.ch'.format(egroup) for egroup in egroups.split(',')]
            else:
                return egroups.split(',')
73
74
        except AttributeError:
            return None
75
    # Else use user's token ACLs
76
    return get_user_provides()
Pablo Panero's avatar
Search  
Pablo Panero committed
77
78


79
80
81
82
83
def search_factory(self, search, query_parser=None):

    def _csas_query_parser(qstr=None):
        """Default parser that uses the Q() from elasticsearch_dsl."""
        if qstr:
Carina Antunes's avatar
Carina Antunes committed
84
85
86
87
88
89
            return Q(
                'query_string',
                query=qstr,
                default_field='_data.*',
                rewrite="scoring_boolean"  # calculates score for wildcards queries
            )
90
91
        return Q()

Carina Antunes's avatar
Carina Antunes committed
92
93
94
95
96
    search, urlkwargs = default_search_factory(self, search, _csas_query_parser)  # type: RecordCERNSearch, MultiDict

    search = search.params(search_type="dfs_query_then_fetch")  # search across all shards

    return search, urlkwargs
97
98
99
100
101


csas_search_factory = search_factory


Pablo Panero's avatar
Search  
Pablo Panero committed
102
103
104
105
106
107
class RecordCERNSearch(RecordsSearch):
    """CERN search class."""

    class Meta:
        doc_types = None
        default_filter = DefaultFilter(cern_search_filter)