Commit 91eb2779 authored by Carina Antunes's avatar Carina Antunes
Browse files

[EDMS] Wildcard improvements / Invenio-DB/SQLAlchemy Fix

parent f1d2002b
CERN_SEARCH_INSTANCE=indico
INVENIO_INDEXER_DEFAULT_DOC_TYPE=events_v1.0.0
INVENIO_INDEXER_DEFAULT_INDEX=indico-events_v1.0.0
INVENIO_SEARCH_INDEX_PREFIX=cernsearch-lcagenda_
[settings]
line_length=120
known_third_party = celery,click,elasticsearch,elasticsearch_dsl,flask,flask_login,flask_security,invenio_accounts,invenio_app,invenio_db,invenio_files_processor,invenio_files_rest,invenio_indexer,invenio_oauth2server,invenio_oauthclient,invenio_pidstore,invenio_records,invenio_records_files,invenio_records_rest,invenio_rest,invenio_search,kombu,marshmallow,pytest,setuptools,six,sqlalchemy,werkzeug
known_third_party = celery,click,elasticsearch,elasticsearch_dsl,flask,flask_login,flask_security,invenio_accounts,invenio_app,invenio_db,invenio_files_processor,invenio_files_rest,invenio_indexer,invenio_oauth2server,invenio_oauthclient,invenio_pidstore,invenio_records,invenio_records_files,invenio_records_rest,invenio_rest,invenio_search,kombu,marshmallow,pytest,setuptools,six,sqlalchemy,sqlalchemy_continuum,werkzeug
multi_line_output = 3
include_trailing_comma = True
force_grid_wrap = 0
......
......@@ -7,7 +7,7 @@
# under the terms of the MIT License; see LICENSE file for more details.
# Use CentOS7:
FROM gitlab-registry.cern.ch/webservices/cern-search/cern-search-rest-api/cern-search-rest-api-base:6c8529ea9d6817aaab9f64f4c3c55870c5b28d4f
FROM gitlab-registry.cern.ch/webservices/cern-search/cern-search-rest-api/cern-search-rest-api-base:795c35db2e0302474454fde0ba453253c8916eef
ARG build_env
# CERN Search installation
......
......@@ -2,10 +2,38 @@
"settings": {
"index.percolator.map_unmapped_fields_as_text": true,
"index.mapping.total_fields.limit": 500,
"index.number_of_shards": 5,
"index.max_ngram_diff": 100,
"index.query.default_field": [
"_data.*"
],
"analysis": {
"tokenizer": {
"keyword_edge_ngram": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 100,
"token_chars": [
"letter",
"digit",
"whitespace",
"punctuation",
"symbol"
]
},
"keyword_ngram": {
"type": "ngram",
"min_gram": 3,
"max_gram": 50,
"token_chars": [
"letter",
"digit",
"whitespace",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"case_accent_analyzer": {
"tokenizer": "standard",
......@@ -13,6 +41,27 @@
"lowercase",
"asciifolding"
]
},
"wildcard_edge": {
"tokenizer": "keyword_edge_ngram",
"filter": [
"lowercase",
"asciifolding"
]
},
"wildcard": {
"tokenizer": "keyword_ngram",
"filter": [
"lowercase",
"asciifolding"
]
},
"lowercase_keyword_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"normalizer": {
......@@ -61,8 +110,9 @@
"boost": 5
},
"cern_id": {
"type": "keyword",
"normalizer": "case_accent_normalizer",
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 5
},
"cern_id_version": {
......@@ -105,6 +155,12 @@
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 1.5
},
"wildcard": {
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 1.5
}
}
},
......@@ -122,6 +178,12 @@
"type": "text",
"boost": 1.25,
"analyzer": "french"
},
"wildcard": {
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 1.25
}
}
},
......@@ -133,6 +195,12 @@
"analyzer": "case_accent_analyzer",
"boost": 1.2
},
"wildcard": {
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 1.2
},
"email": {
"type": "text",
"analyzer": "case_accent_analyzer",
......@@ -159,6 +227,12 @@
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 1.4
},
"wildcard": {
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 1.5
}
}
},
......
......@@ -2,10 +2,27 @@
"settings": {
"index.percolator.map_unmapped_fields_as_text": true,
"index.mapping.total_fields.limit": 500,
"index.highlight.max_analyzed_offset": 10000000,
"index.number_of_shards": 5,
"index.max_ngram_diff": 100,
"index.query.default_field": [
"_data.*"
],
"analysis": {
"tokenizer": {
"keyword_ngram": {
"type": "ngram",
"min_gram": 3,
"max_gram": 50,
"token_chars": [
"letter",
"digit",
"whitespace",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"case_accent_analyzer": {
"tokenizer": "standard",
......@@ -13,6 +30,20 @@
"lowercase",
"asciifolding"
]
},
"wildcard": {
"tokenizer": "keyword_ngram",
"filter": [
"lowercase",
"asciifolding"
]
},
"lowercase_keyword_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"normalizer": {
......@@ -81,9 +112,17 @@
"boost": 1.9
},
"cern_id": {
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 1.9
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 1.9,
"fields": {
"exact_match": {
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 1.9
}
}
},
"cern_id_version": {
"type": "keyword",
......@@ -125,6 +164,12 @@
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 1.4
},
"wildcard": {
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 1.4
}
}
},
......@@ -142,6 +187,12 @@
"type": "text",
"boost": 1.2,
"analyzer": "french"
},
"wildcard": {
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 1.2
}
}
},
......@@ -157,6 +208,12 @@
"type": "text",
"analyzer": "case_accent_analyzer",
"boost": 1.15
},
"wildcard": {
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 1.15
}
}
},
......@@ -179,6 +236,12 @@
"type": "keyword",
"boost": 1.3,
"normalizer": "case_accent_normalizer"
},
"wildcard": {
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 1.3
}
}
},
......
......@@ -2,10 +2,25 @@
"settings": {
"index.percolator.map_unmapped_fields_as_text": true,
"index.mapping.total_fields.limit": 500,
"index.max_ngram_diff": 100,
"index.query.default_field": [
"_data.*"
],
"analysis": {
"tokenizer": {
"keyword_ngram": {
"type": "ngram",
"min_gram": 3,
"max_gram": 50,
"token_chars": [
"letter",
"digit",
"whitespace",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"case_accent_analyzer": {
"tokenizer": "standard",
......@@ -13,6 +28,20 @@
"lowercase",
"asciifolding"
]
},
"wildcard": {
"tokenizer": "keyword_ngram",
"filter": [
"lowercase",
"asciifolding"
]
},
"lowercase_keyword_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"normalizer": {
......@@ -50,14 +79,30 @@
"type": "object",
"properties": {
"code": {
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 1.5
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 1.5,
"fields": {
"exact_match": {
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 1.5
}
}
},
"other_id": {
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 1.4
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 1.4,
"fields": {
"exact_match": {
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 1.4
}
}
},
"class": {
"type": "keyword",
......
......@@ -2,10 +2,25 @@
"settings": {
"index.percolator.map_unmapped_fields_as_text": true,
"index.mapping.total_fields.limit": 500,
"index.max_ngram_diff": 100,
"index.query.default_field": [
"_data.*"
],
"analysis": {
"tokenizer": {
"keyword_ngram": {
"type": "ngram",
"min_gram": 3,
"max_gram": 50,
"token_chars": [
"letter",
"digit",
"whitespace",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"case_accent_analyzer": {
"tokenizer": "standard",
......@@ -13,6 +28,20 @@
"lowercase",
"asciifolding"
]
},
"wildcard": {
"tokenizer": "keyword_ngram",
"filter": [
"lowercase",
"asciifolding"
]
},
"lowercase_keyword_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"normalizer": {
......@@ -50,9 +79,17 @@
"type": "object",
"properties": {
"item_id": {
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 2
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 2,
"fields": {
"exact_match": {
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 2
}
}
},
"version": {
"type": "keyword",
......
......@@ -2,10 +2,25 @@
"settings": {
"index.percolator.map_unmapped_fields_as_text": true,
"index.mapping.total_fields.limit": 500,
"index.max_ngram_diff": 100,
"index.query.default_field": [
"_data.*"
],
"analysis": {
"tokenizer": {
"keyword_ngram": {
"type": "ngram",
"min_gram": 3,
"max_gram": 50,
"token_chars": [
"letter",
"digit",
"whitespace",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"case_accent_analyzer": {
"tokenizer": "standard",
......@@ -13,6 +28,20 @@
"lowercase",
"asciifolding"
]
},
"wildcard": {
"tokenizer": "keyword_ngram",
"filter": [
"lowercase",
"asciifolding"
]
},
"lowercase_keyword_analyzer": {
"tokenizer": "keyword",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"normalizer": {
......@@ -50,9 +79,17 @@
"type": "object",
"properties": {
"project_id": {
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 2
"type": "text",
"analyzer": "wildcard",
"search_analyzer": "lowercase_keyword_analyzer",
"boost": 2,
"fields": {
"exact_match": {
"type": "keyword",
"normalizer": "case_accent_normalizer",
"boost": 2
}
}
},
"name": {
"type": "text",
......
......@@ -979,7 +979,7 @@ ua-parser = ">=0.7.3"
admin = ["Flask-Admin (>=1.3.0)"]
all = ["Flask-Admin (>=1.3.0)", "Sphinx (>=3)", "check-manifest (>=0.25)", "coverage (>=4.0)", "isort (>=4.3.0)", "mock (>=2.0.0)", "pydocstyle (>=1.0.0)", "pytest-invenio (>=1.4.0)", "selenium (>=3.0.1)"]
docs = ["Sphinx (>=3)"]
mysql = ["invenio-db[versioning,mysql] (>=1.0.0)"]
mysql = ["invenio-db[mysql,versioning] (>=1.0.0)"]
postgresql = ["invenio-db[postgresql,versioning] (>=1.0.0)"]
sqlite = ["invenio-db[versioning] (>=1.0.0)"]
tests = ["check-manifest (>=0.25)", "coverage (>=4.0)", "isort (>=4.3.0)", "mock (>=2.0.0)", "pydocstyle (>=1.0.0)", "pytest-invenio (>=1.4.0)", "selenium (>=3.0.1)"]
......@@ -1132,6 +1132,7 @@ description = "Database management for Invenio."
category = "main"
optional = false
python-versions = "*"
develop = false
[package.dependencies]
Flask-Alembic = ">=2.0.1"
......@@ -1150,6 +1151,12 @@ postgresql = ["psycopg2-binary (>=2.8.6)"]
tests = ["pytest-invenio (>=1.4.0)", "cryptography (>=2.1.4)", "mock (>=4.0.0)"]
versioning = ["SQLAlchemy-Continuum (>=1.3.11)"]
[package.source]
type = "git"
url = "https://github.com/carantunes/invenio-db.git"
reference = "1.0.10"
resolved_reference = "1f0901ae2eeaef1dc709fe1f6fc628f87a1866e4"
[[package]]
name = "invenio-files-processor"
version = "0.1.0"
......@@ -1404,7 +1411,7 @@ werkzeug = ">=0.14.1"
admin = ["Flask-Admin (>=1.3.0)"]
all = ["Sphinx (>=1.7.2)", "Flask-Admin (>=1.3.0)", "check-manifest (>=0.25)", "coverage (>=4.5.3)", "Flask-Menu (>=0.5.0)", "invenio-admin (>=1.0.0)", "isort (>=4.3.0)", "mock (>=1.3.0)", "pydocstyle (>=3.0.0)", "pytest-cov (>=2.7.1)", "pytest-pep8 (>=1.0.6)", "pytest (>=4.6.4,<5.0.0)"]
docs = ["Sphinx (>=1.7.2)"]
mysql = ["invenio-db[versioning,mysql] (>=1.0.0)"]
mysql = ["invenio-db[mysql,versioning] (>=1.0.0)"]
postgresql = ["invenio-db[postgresql,versioning] (>=1.0.0)"]
sqlite = ["invenio-db[versioning] (>=1.0.0)"]
tests = ["check-manifest (>=0.25)", "coverage (>=4.5.3)", "Flask-Menu (>=0.5.0)", "invenio-admin (>=1.0.0)", "isort (>=4.3.0)", "mock (>=1.3.0)", "pydocstyle (>=3.0.0)", "pytest-cov (>=2.7.1)", "pytest-pep8 (>=1.0.6)", "pytest (>=4.6.4,<5.0.0)"]
......@@ -1427,7 +1434,7 @@ invenio-records-rest = ">=1.6.3"
[package.extras]
all = ["Sphinx (>=3)", "invenio-indexer (>=1.1.0)", "invenio-search[elasticsearch6] (>=1.2.0)", "mock (>=1.3.0)", "pytest-invenio (>=1.4.0)"]
docs = ["Sphinx (>=3)"]
mysql = ["invenio-db[versioning,mysql] (>=1.0.0)"]
mysql = ["invenio-db[mysql,versioning] (>=1.0.0)"]
postgresql = ["invenio-db[postgresql,versioning] (>=1.0.0)"]
sqlite = ["invenio-db[versioning] (>=1.0.0)"]
tests = ["invenio-indexer (>=1.1.0)", "invenio-search[elasticsearch6] (>=1.2.0)", "mock (>=1.3.0)", "pytest-invenio (>=1.4.0)"]
......@@ -2709,7 +2716,7 @@ timezone = ["python-dateutil"]
[metadata]
lock-version = "1.1"
python-versions = "^3.8"
content-hash = "764c24c7abbe19616fdc8f767edd05795cd2d37e850cd46ddb189fee773d094b"
content-hash = "1067a43b96dc601ba5bf7e51e653b4702d70499de4a145698b0a0c4646f5d50b"
[metadata.files]
alembic = [
......@@ -3102,10 +3109,7 @@ invenio-config = [
{file = "invenio-config-1.0.3.tar.gz", hash = "sha256:9d10492b49a46703f0ac028ce8ab78b5ff1c72b180ecb4ffcee5bf49682d1e6c"},
{file = "invenio_config-1.0.3-py2.py3-none-any.whl", hash = "sha256:238ab074991e7f0d6ee7ebc6eb2f5e41658749dd977ab6e86476e862c0efaf28"},
]
invenio-db = [
{file = "invenio-db-1.0.9.tar.gz", hash = "sha256:bc65e82c350a908c4802399d39ae01cde051195ef42e0bc396c6a63e0a75bf50"},
{file = "invenio_db-1.0.9-py2.py3-none-any.whl", hash = "sha256:c8861a665af5cbb3645d4fde5cc0e04354a108a3d5b4cf2c3eee5d252da5d534"},
]
invenio-db = []
invenio-files-processor = [
{file = "invenio-files-processor-0.1.0.tar.gz", hash = "sha256:c9345dc7de0a5fc3f981fb12baee04d5e19b95fcbcd06dabc4bd27206d4c30c4"},
{file = "invenio_files_processor-0.1.0-py2.py3-none-any.whl", hash = "sha256:e288258bf072fe5db7808dbc69a4fce55552dd14317edfaa68892941c2f6bc5d"},
......
......@@ -15,7 +15,7 @@ invenio-assets = ">=1.1.3,<1.2.0"
invenio-base = ">=1.2.3,<1.3.0"
invenio-celery = ">=1.2.0,<1.3.0"
invenio-config = ">=1.0.3,<1.1.0"
invenio-db = {version = ">=1.0.5,<1.1.0", extras = ["postgresql", "versioning"]}
invenio-db = {git = "https://github.com/carantunes/invenio-db.git", rev = "1.0.10", extras = ["versioning", "postgresql"]}
invenio-files-processor = {extras = ["tika"], version = "^0.1.0"}
invenio-files-rest = ">=1.2.0,<1.3.0"
invenio-i18n = "<1.3.0,>=1.2.0"
......
......@@ -20,6 +20,7 @@ import pytest
from flask import current_app
from invenio_accounts.models import Role, User
from invenio_oauth2server.models import Token
from sqlalchemy_continuum import versioning_manager
@pytest.fixture()
......@@ -75,3 +76,11 @@ def instance_path():
`os.path.join(sys.prefix, 'var/instance/static')`
"""
pass
@pytest.fixture(scope="function")
def db(db):
"""Re-initialize versioning."""
versioning_manager.track_session(db.session)
yield db
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment