facets.py 4.82 KB
Newer Older
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# This file is part of CERN Search.
Carina Antunes's avatar
Carina Antunes committed
5
# Copyright (C) 2018-2021 CERN.
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#
# Citadel Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Custom Facets and factories for result filtering and aggregation.

See :data:`invenio_records_rest.config.RECORDS_REST_FACETS` for more
information on how to specify aggregations and filters.
"""

from __future__ import absolute_import, print_function

from elasticsearch_dsl import A, Q
from flask import current_app, request
from six import text_type
from werkzeug.datastructures import MultiDict


def regex_aggregation(field, query_param):
    """Create a regex aggregation.

    :param field: Field name.
    :param query_param: Query param name.
    :returns: Function that returns the A query.
    """

    def inner():
        value = request.values.get(query_param, type=text_type)
        if value:
Carina Antunes's avatar
Carina Antunes committed
35
            return A("terms", field=field, include=f".*{value}.*")
36
        else:
Carina Antunes's avatar
Carina Antunes committed
37
            return A("terms", field=field)
38
39
40
41
42
43
44
45
46
47
48
49

    return inner


def match_filter(field):
    """Create a match query.

    :param field: Field name.
    :returns: Function that returns the match query.
    """

    def inner(values):
Carina Antunes's avatar
Carina Antunes committed
50
        return Q("match", **{field: " ".join(values)})
51
52
53
54

    return inner


55
56
57
58
59
60
61
62
63
def query_string(field):
    """Create a query_string query.

    :param field: Field name.
    :returns: Function that returns the match query.
    """

    def inner(values):
        return Q(
Carina Antunes's avatar
Carina Antunes committed
64
            "query_string",
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
            query=f"{field}:({' '.join(values)})",
            rewrite="top_terms_1000",  # calculates score for wildcards queries
        )

    return inner


def simple_query_string(field):
    """Create a query_string query.

    :param field: Field name.
    :returns: Function that returns the match query.
    """

    def inner(values):
Carina Antunes's avatar
Carina Antunes committed
80
        return Q("simple_query_string", query=" ".join(values), fields=[field])
81
82
83
84
85

    return inner


def match_phrase_filter(field):
Carina Antunes's avatar
Carina Antunes committed
86
    """Create a match_phrase or match query. [WIP: missing checking if inside value there's a string].
87
88
89
90
91
92

    :param field: Field name.
    :returns: Function that returns the match query.
    """

    def inner(values):
Carina Antunes's avatar
Carina Antunes committed
93
        current_app.logger.warning("match_phrase_filter: %s", values)
94
95
96
97

        matches = []
        phrase_matches = []
        for value in values:
Carina Antunes's avatar
Carina Antunes committed
98
            current_app.logger.warning("value: %s", value)
99

Carina Antunes's avatar
Carina Antunes committed
100
            if not value.startswith('"'):
101
102
103
104
                matches.append(value)

                continue

Carina Antunes's avatar
Carina Antunes committed
105
            if value.endswith('"') and len(value) > 1:
106
107
                phrase_matches.append(value)

Carina Antunes's avatar
Carina Antunes committed
108
109
        query_match = Q("match", **{field: " ".join(matches)})
        query_match_phrase = Q("match_phrase", **{field: " ".join(phrase_matches)})
110

Carina Antunes's avatar
Carina Antunes committed
111
        current_app.logger.warning(**{field: " ".join(matches)})
112
113

        if matches and phrase_matches:
Carina Antunes's avatar
Carina Antunes committed
114
            return Q("bool", must=[query_match, query_match_phrase])
115
116
117
118
119
120
121
122
123

        if phrase_matches:
            return query_match_phrase

        return query_match

    return inner


Carina Antunes's avatar
Carina Antunes committed
124
125
def _query_factory_dsl(urlkwargs, definitions):
    """Create a list with query definitions applied to url args."""
126
127
128
129
130
131
132
133
134
135
136
137
138
    filters = []
    for name, filter_factory in definitions.items():
        values = request.values.getlist(name, type=text_type)
        if values:
            filters.append(filter_factory(values))
            for v in values:
                urlkwargs.add(name, v)

    return (filters, urlkwargs)


def _match_filter(search, urlkwargs, definitions):
    """Ingest match filter in query."""
Carina Antunes's avatar
Carina Antunes committed
139
    matches, urlkwargs = _query_factory_dsl(urlkwargs, definitions)
140
141
142
143
144
145
146

    for match_ in matches:
        search = search.query(match_)

    return (search, urlkwargs)


Carina Antunes's avatar
Carina Antunes committed
147
148
149
150
151
152
153
154
155
156
157
def _nested_filter(search, urlkwargs, definitions):
    """Ingest nested bool filter in query."""
    for path, definition in definitions.items():
        nested, urlkwargs = _query_factory_dsl(urlkwargs, definition)

        if nested:
            search = search.query(Q("nested", path=path, query=Q("bool", filter=nested)))

    return (search, urlkwargs)


158
159
160
161
162
163
164
165
166
167
168
169
def saas_facets_factory(search, index):
    """Add custom items to query.

    It's possible to select facets which should be added to query
    by passing their name in `facets` parameter.
    :param search: Basic search object.
    :param index: Index name.
    :returns: A tuple containing the new search object and a dictionary with
        all fields and values used.
    """
    urlkwargs = MultiDict()

Carina Antunes's avatar
Carina Antunes committed
170
    facets = current_app.config["RECORDS_REST_FACETS"].get(index)
171
172
173
    if facets is not None:
        # Match filter
        search, urlkwargs = _match_filter(search, urlkwargs, facets.get("matches", {}))
Carina Antunes's avatar
Carina Antunes committed
174
175
        # Nested filter
        search, urlkwargs = _nested_filter(search, urlkwargs, facets.get("nested", {}))
176
177

    return (search, urlkwargs)