Commit 9f307718 authored by Pablo Panero's avatar Pablo Panero
Browse files

tests: add suggest schema and tests

parent d4f39ce6
{
"title": "Custom record schema v0.0.2",
"id": "http://localhost:5000/schemas/test/suggest_v0.0.2.json",
"$schema": "http://localhost:5000/schemas/test/suggest_v0.0.2.json",
"type": "object",
"properties": {
"_access": {
"type": "object",
"properties": {
"owner":{
"type": "array",
"items": {
"type": "string"
}
},
"read":{
"type": "array",
"items": {
"type": "string"
}
},
"update":{
"type": "array",
"items": {
"type": "string"
}
},
"delete":{
"type": "array",
"items": {
"type": "string"
}
}
}
},
"_data": {
"type": "object",
"title": {
"type": "string",
"description": "Record title."
}
},
"suggest": {
"type": "string"
},
"control_number": {
"type": "string"
},
"$schema": {
"type": "string"
}
}
}
\ No newline at end of file
{
"settings": {
"index.percolator.map_unmapped_fields_as_string": true,
"index.mapping.total_fields.limit": 3000,
"analysis": {
"analyzer": {
"autocomplete": {
"tokenizer": "autocomplete",
"filter": [
"lowercase"
]
},
"autocomplete_search": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"autocomplete": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20,
"token_chars": [
"letter"
]
}
}
}
},
"mappings": {
"suggest_v0.0.2": {
"numeric_detection": true,
"_meta": {
"_owner": "CernSearch-Administrators@cern.ch"
},
"properties": {
"_access": {
"type": "object",
"properties": {
"owner":{
"type": "keyword"
},
"read": {
"type": "keyword"
},
"update": {
"type": "keyword"
},
"delete": {
"type": "keyword"
}
}
},
"_data": {
"type": "object",
"properties": {
"title": {
"type": "keyword",
"copy_to": "suggest"
}
}
},
"suggest": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
},
"control_number": {
"type": "keyword"
},
"$schema": {
"enabled": false
}
}
}
}
}
\ No newline at end of file
# -*- coding: utf-8 -*-
#
# This file is part of CERN Search.
# Copyright (C) 2018-2019 CERN.
#
# CERN Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
import json
import pytest
import requests
import time
HEADERS = {
"Accept": "application/json",
"Content-Type": "application/json; charset=utf-8",
"Authorization": ''
}
def create_record(endpoint, api_key, title):
HEADERS['Authorization'] = 'Bearer {credentials}'.format(credentials=api_key)
body = {
"_access": {
"owner": ["CernSearch-Administrators@cern.ch"],
"update": ["CernSearch-Administrators@cern.ch"],
"delete": ["CernSearch-Administrators@cern.ch"]
},
"_data": {
"title": title
},
"$schema": "{endpoint}/schemas/test/suggest_v0.0.2.json".format(
endpoint=endpoint
)
}
# Create test record
resp = requests.post('{endpoint}/api/records/'.format(endpoint=endpoint),
headers=HEADERS, data=json.dumps(body))
assert resp.status_code == 201
# Check non presence of OCR content in DB record
resp_body = resp.json()['metadata']
assert resp_body.get('control_number') is not None
resp_data = resp_body.get("_data")
assert resp_data.get('title') == title
return resp_body.get("control_number")
# @pytest.mark.unit
def test_suggester(endpoint, api_key):
"""
Test search over public documents. Test that the ``_access.*`` field is
not searched over.
"""
HEADERS['Authorization'] = 'Bearer {credentials}'.format(credentials=api_key)
# Create records
control_numbers = [
create_record(endpoint, api_key, 'The First Suggestion'),
create_record(endpoint, api_key, 'Documentation site title'),
create_record(endpoint, api_key, 'CERN Search Documentation'),
create_record(endpoint, api_key, 'Invenio docs site'),
create_record(endpoint, api_key, 'The final suggester')
]
time.sleep(3)
query = {
"q": 'suggest:the f'
}
# 'the f' should return 1st and 5th record
resp = requests.get('{endpoint}/api/records/'.format(endpoint=endpoint),
params=query,
headers=HEADERS)
assert resp.status_code == 200
resp_hits = resp.json()['hits']
assert resp_hits.get('total') == 2
# 'doc' should return 2nd, 3rd and 4th record
query['q'] = 'suggest:doc'
resp = requests.get('{endpoint}/api/records/'.format(endpoint=endpoint),
params=query,
headers=HEADERS)
assert resp.status_code == 200
resp_hits = resp.json()['hits']
assert resp_hits.get('total') == 3
# 'f sugg' should return 1st and 5th record
query['q'] = 'suggest:f sugg'
resp = requests.get('{endpoint}/api/records/'.format(endpoint=endpoint),
params=query,
headers=HEADERS)
assert resp.status_code == 200
resp_hits = resp.json()['hits']
assert resp_hits.get('total') == 2
# delete records
for control_number in control_numbers:
resp = requests.delete('{endpoint}/api/record/{control_number}'
.format(endpoint=endpoint, control_number=control_number),
headers=HEADERS)
assert resp.status_code == 204
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment