Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
webservices
cern-search
cern-search-rest-api
Commits
91eb2779
Commit
91eb2779
authored
Jun 14, 2021
by
Carina Antunes
Browse files
[EDMS] Wildcard improvements / Invenio-DB/SQLAlchemy Fix
parent
f1d2002b
Changes
11
Hide whitespace changes
Inline
Side-by-side
.env-lcagenda
0 → 100644
View file @
91eb2779
CERN_SEARCH_INSTANCE=indico
INVENIO_INDEXER_DEFAULT_DOC_TYPE=events_v1.0.0
INVENIO_INDEXER_DEFAULT_INDEX=indico-events_v1.0.0
INVENIO_SEARCH_INDEX_PREFIX=cernsearch-lcagenda_
.isort.cfg
View file @
91eb2779
[settings]
line_length=120
known_third_party = celery,click,elasticsearch,elasticsearch_dsl,flask,flask_login,flask_security,invenio_accounts,invenio_app,invenio_db,invenio_files_processor,invenio_files_rest,invenio_indexer,invenio_oauth2server,invenio_oauthclient,invenio_pidstore,invenio_records,invenio_records_files,invenio_records_rest,invenio_rest,invenio_search,kombu,marshmallow,pytest,setuptools,six,sqlalchemy,werkzeug
known_third_party = celery,click,elasticsearch,elasticsearch_dsl,flask,flask_login,flask_security,invenio_accounts,invenio_app,invenio_db,invenio_files_processor,invenio_files_rest,invenio_indexer,invenio_oauth2server,invenio_oauthclient,invenio_pidstore,invenio_records,invenio_records_files,invenio_records_rest,invenio_rest,invenio_search,kombu,marshmallow,pytest,setuptools,six,sqlalchemy,
sqlalchemy_continuum,
werkzeug
multi_line_output = 3
include_trailing_comma = True
force_grid_wrap = 0
...
...
Dockerfile
View file @
91eb2779
...
...
@@ -7,7 +7,7 @@
# under the terms of the MIT License; see LICENSE file for more details.
# Use CentOS7:
FROM
gitlab-registry.cern.ch/webservices/cern-search/cern-search-rest-api/cern-search-rest-api-base:
6c8529ea9d6817aaab9f64f4c3c55870c5b28d4
f
FROM
gitlab-registry.cern.ch/webservices/cern-search/cern-search-rest-api/cern-search-rest-api-base:
795c35db2e0302474454fde0ba453253c8916ee
f
ARG
build_env
# CERN Search installation
...
...
cern_search_rest_api/modules/cernsearch/mappings/edms/v7/edms/document_v5.0.0.json
View file @
91eb2779
...
...
@@ -2,10 +2,38 @@
"settings"
:
{
"index.percolator.map_unmapped_fields_as_text"
:
true
,
"index.mapping.total_fields.limit"
:
500
,
"index.number_of_shards"
:
5
,
"index.max_ngram_diff"
:
100
,
"index.query.default_field"
:
[
"_data.*"
],
"analysis"
:
{
"tokenizer"
:
{
"keyword_edge_ngram"
:
{
"type"
:
"edge_ngram"
,
"min_gram"
:
2
,
"max_gram"
:
100
,
"token_chars"
:
[
"letter"
,
"digit"
,
"whitespace"
,
"punctuation"
,
"symbol"
]
},
"keyword_ngram"
:
{
"type"
:
"ngram"
,
"min_gram"
:
3
,
"max_gram"
:
50
,
"token_chars"
:
[
"letter"
,
"digit"
,
"whitespace"
,
"punctuation"
,
"symbol"
]
}
},
"analyzer"
:
{
"case_accent_analyzer"
:
{
"tokenizer"
:
"standard"
,
...
...
@@ -13,6 +41,27 @@
"lowercase"
,
"asciifolding"
]
},
"wildcard_edge"
:
{
"tokenizer"
:
"keyword_edge_ngram"
,
"filter"
:
[
"lowercase"
,
"asciifolding"
]
},
"wildcard"
:
{
"tokenizer"
:
"keyword_ngram"
,
"filter"
:
[
"lowercase"
,
"asciifolding"
]
},
"lowercase_keyword_analyzer"
:
{
"tokenizer"
:
"keyword"
,
"filter"
:
[
"lowercase"
,
"asciifolding"
]
}
},
"normalizer"
:
{
...
...
@@ -61,8 +110,9 @@
"boost"
:
5
},
"cern_id"
:
{
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
5
},
"cern_id_version"
:
{
...
...
@@ -105,6 +155,12 @@
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
1.5
},
"wildcard"
:
{
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
1.5
}
}
},
...
...
@@ -122,6 +178,12 @@
"type"
:
"text"
,
"boost"
:
1.25
,
"analyzer"
:
"french"
},
"wildcard"
:
{
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
1.25
}
}
},
...
...
@@ -133,6 +195,12 @@
"analyzer"
:
"case_accent_analyzer"
,
"boost"
:
1.2
},
"wildcard"
:
{
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
1.2
},
"email"
:
{
"type"
:
"text"
,
"analyzer"
:
"case_accent_analyzer"
,
...
...
@@ -159,6 +227,12 @@
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
1.4
},
"wildcard"
:
{
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
1.5
}
}
},
...
...
cern_search_rest_api/modules/cernsearch/mappings/edms/v7/edms/file_v6.0.0.json
View file @
91eb2779
...
...
@@ -2,10 +2,27 @@
"settings"
:
{
"index.percolator.map_unmapped_fields_as_text"
:
true
,
"index.mapping.total_fields.limit"
:
500
,
"index.highlight.max_analyzed_offset"
:
10000000
,
"index.number_of_shards"
:
5
,
"index.max_ngram_diff"
:
100
,
"index.query.default_field"
:
[
"_data.*"
],
"analysis"
:
{
"tokenizer"
:
{
"keyword_ngram"
:
{
"type"
:
"ngram"
,
"min_gram"
:
3
,
"max_gram"
:
50
,
"token_chars"
:
[
"letter"
,
"digit"
,
"whitespace"
,
"punctuation"
,
"symbol"
]
}
},
"analyzer"
:
{
"case_accent_analyzer"
:
{
"tokenizer"
:
"standard"
,
...
...
@@ -13,6 +30,20 @@
"lowercase"
,
"asciifolding"
]
},
"wildcard"
:
{
"tokenizer"
:
"keyword_ngram"
,
"filter"
:
[
"lowercase"
,
"asciifolding"
]
},
"lowercase_keyword_analyzer"
:
{
"tokenizer"
:
"keyword"
,
"filter"
:
[
"lowercase"
,
"asciifolding"
]
}
},
"normalizer"
:
{
...
...
@@ -81,9 +112,17 @@
"boost"
:
1.9
},
"cern_id"
:
{
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
1.9
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
1.9
,
"fields"
:
{
"exact_match"
:
{
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
1.9
}
}
},
"cern_id_version"
:
{
"type"
:
"keyword"
,
...
...
@@ -125,6 +164,12 @@
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
1.4
},
"wildcard"
:
{
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
1.4
}
}
},
...
...
@@ -142,6 +187,12 @@
"type"
:
"text"
,
"boost"
:
1.2
,
"analyzer"
:
"french"
},
"wildcard"
:
{
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
1.2
}
}
},
...
...
@@ -157,6 +208,12 @@
"type"
:
"text"
,
"analyzer"
:
"case_accent_analyzer"
,
"boost"
:
1.15
},
"wildcard"
:
{
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
1.15
}
}
},
...
...
@@ -179,6 +236,12 @@
"type"
:
"keyword"
,
"boost"
:
1.3
,
"normalizer"
:
"case_accent_normalizer"
},
"wildcard"
:
{
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
1.3
}
}
},
...
...
cern_search_rest_api/modules/cernsearch/mappings/edms/v7/edms/inforeamobject_v1.0.0.json
View file @
91eb2779
...
...
@@ -2,10 +2,25 @@
"settings"
:
{
"index.percolator.map_unmapped_fields_as_text"
:
true
,
"index.mapping.total_fields.limit"
:
500
,
"index.max_ngram_diff"
:
100
,
"index.query.default_field"
:
[
"_data.*"
],
"analysis"
:
{
"tokenizer"
:
{
"keyword_ngram"
:
{
"type"
:
"ngram"
,
"min_gram"
:
3
,
"max_gram"
:
50
,
"token_chars"
:
[
"letter"
,
"digit"
,
"whitespace"
,
"punctuation"
,
"symbol"
]
}
},
"analyzer"
:
{
"case_accent_analyzer"
:
{
"tokenizer"
:
"standard"
,
...
...
@@ -13,6 +28,20 @@
"lowercase"
,
"asciifolding"
]
},
"wildcard"
:
{
"tokenizer"
:
"keyword_ngram"
,
"filter"
:
[
"lowercase"
,
"asciifolding"
]
},
"lowercase_keyword_analyzer"
:
{
"tokenizer"
:
"keyword"
,
"filter"
:
[
"lowercase"
,
"asciifolding"
]
}
},
"normalizer"
:
{
...
...
@@ -50,14 +79,30 @@
"type"
:
"object"
,
"properties"
:
{
"code"
:
{
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
1.5
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
1.5
,
"fields"
:
{
"exact_match"
:
{
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
1.5
}
}
},
"other_id"
:
{
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
1.4
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
1.4
,
"fields"
:
{
"exact_match"
:
{
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
1.4
}
}
},
"class"
:
{
"type"
:
"keyword"
,
...
...
cern_search_rest_api/modules/cernsearch/mappings/edms/v7/edms/item_v1.0.0.json
View file @
91eb2779
...
...
@@ -2,10 +2,25 @@
"settings"
:
{
"index.percolator.map_unmapped_fields_as_text"
:
true
,
"index.mapping.total_fields.limit"
:
500
,
"index.max_ngram_diff"
:
100
,
"index.query.default_field"
:
[
"_data.*"
],
"analysis"
:
{
"tokenizer"
:
{
"keyword_ngram"
:
{
"type"
:
"ngram"
,
"min_gram"
:
3
,
"max_gram"
:
50
,
"token_chars"
:
[
"letter"
,
"digit"
,
"whitespace"
,
"punctuation"
,
"symbol"
]
}
},
"analyzer"
:
{
"case_accent_analyzer"
:
{
"tokenizer"
:
"standard"
,
...
...
@@ -13,6 +28,20 @@
"lowercase"
,
"asciifolding"
]
},
"wildcard"
:
{
"tokenizer"
:
"keyword_ngram"
,
"filter"
:
[
"lowercase"
,
"asciifolding"
]
},
"lowercase_keyword_analyzer"
:
{
"tokenizer"
:
"keyword"
,
"filter"
:
[
"lowercase"
,
"asciifolding"
]
}
},
"normalizer"
:
{
...
...
@@ -50,9 +79,17 @@
"type"
:
"object"
,
"properties"
:
{
"item_id"
:
{
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
2
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
2
,
"fields"
:
{
"exact_match"
:
{
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
2
}
}
},
"version"
:
{
"type"
:
"keyword"
,
...
...
cern_search_rest_api/modules/cernsearch/mappings/edms/v7/edms/project_v1.0.0.json
View file @
91eb2779
...
...
@@ -2,10 +2,25 @@
"settings"
:
{
"index.percolator.map_unmapped_fields_as_text"
:
true
,
"index.mapping.total_fields.limit"
:
500
,
"index.max_ngram_diff"
:
100
,
"index.query.default_field"
:
[
"_data.*"
],
"analysis"
:
{
"tokenizer"
:
{
"keyword_ngram"
:
{
"type"
:
"ngram"
,
"min_gram"
:
3
,
"max_gram"
:
50
,
"token_chars"
:
[
"letter"
,
"digit"
,
"whitespace"
,
"punctuation"
,
"symbol"
]
}
},
"analyzer"
:
{
"case_accent_analyzer"
:
{
"tokenizer"
:
"standard"
,
...
...
@@ -13,6 +28,20 @@
"lowercase"
,
"asciifolding"
]
},
"wildcard"
:
{
"tokenizer"
:
"keyword_ngram"
,
"filter"
:
[
"lowercase"
,
"asciifolding"
]
},
"lowercase_keyword_analyzer"
:
{
"tokenizer"
:
"keyword"
,
"filter"
:
[
"lowercase"
,
"asciifolding"
]
}
},
"normalizer"
:
{
...
...
@@ -50,9 +79,17 @@
"type"
:
"object"
,
"properties"
:
{
"project_id"
:
{
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
2
"type"
:
"text"
,
"analyzer"
:
"wildcard"
,
"search_analyzer"
:
"lowercase_keyword_analyzer"
,
"boost"
:
2
,
"fields"
:
{
"exact_match"
:
{
"type"
:
"keyword"
,
"normalizer"
:
"case_accent_normalizer"
,
"boost"
:
2
}
}
},
"name"
:
{
"type"
:
"text"
,
...
...
poetry.lock
View file @
91eb2779
...
...
@@ -979,7 +979,7 @@ ua-parser = ">=0.7.3"
admin = ["Flask-Admin (>=1.3.0)"]
all = ["Flask-Admin (>=1.3.0)", "Sphinx (>=3)", "check-manifest (>=0.25)", "coverage (>=4.0)", "isort (>=4.3.0)", "mock (>=2.0.0)", "pydocstyle (>=1.0.0)", "pytest-invenio (>=1.4.0)", "selenium (>=3.0.1)"]
docs = ["Sphinx (>=3)"]
mysql = ["invenio-db[versioning
,mysql
] (>=1.0.0)"]
mysql = ["invenio-db[
mysql,
versioning] (>=1.0.0)"]
postgresql = ["invenio-db[postgresql,versioning] (>=1.0.0)"]
sqlite = ["invenio-db[versioning] (>=1.0.0)"]
tests = ["check-manifest (>=0.25)", "coverage (>=4.0)", "isort (>=4.3.0)", "mock (>=2.0.0)", "pydocstyle (>=1.0.0)", "pytest-invenio (>=1.4.0)", "selenium (>=3.0.1)"]
...
...
@@ -1132,6 +1132,7 @@ description = "Database management for Invenio."
category = "main"
optional = false
python-versions = "*"
develop = false
[package.dependencies]
Flask-Alembic = ">=2.0.1"
...
...
@@ -1150,6 +1151,12 @@ postgresql = ["psycopg2-binary (>=2.8.6)"]
tests = ["pytest-invenio (>=1.4.0)", "cryptography (>=2.1.4)", "mock (>=4.0.0)"]
versioning = ["SQLAlchemy-Continuum (>=1.3.11)"]
[package.source]
type = "git"
url = "https://github.com/carantunes/invenio-db.git"
reference = "1.0.10"
resolved_reference = "1f0901ae2eeaef1dc709fe1f6fc628f87a1866e4"
[[package]]
name = "invenio-files-processor"
version = "0.1.0"
...
...
@@ -1404,7 +1411,7 @@ werkzeug = ">=0.14.1"
admin = ["Flask-Admin (>=1.3.0)"]
all = ["Sphinx (>=1.7.2)", "Flask-Admin (>=1.3.0)", "check-manifest (>=0.25)", "coverage (>=4.5.3)", "Flask-Menu (>=0.5.0)", "invenio-admin (>=1.0.0)", "isort (>=4.3.0)", "mock (>=1.3.0)", "pydocstyle (>=3.0.0)", "pytest-cov (>=2.7.1)", "pytest-pep8 (>=1.0.6)", "pytest (>=4.6.4,<5.0.0)"]
docs = ["Sphinx (>=1.7.2)"]
mysql = ["invenio-db[versioning
,mysql
] (>=1.0.0)"]
mysql = ["invenio-db[
mysql,
versioning] (>=1.0.0)"]
postgresql = ["invenio-db[postgresql,versioning] (>=1.0.0)"]
sqlite = ["invenio-db[versioning] (>=1.0.0)"]
tests = ["check-manifest (>=0.25)", "coverage (>=4.5.3)", "Flask-Menu (>=0.5.0)", "invenio-admin (>=1.0.0)", "isort (>=4.3.0)", "mock (>=1.3.0)", "pydocstyle (>=3.0.0)", "pytest-cov (>=2.7.1)", "pytest-pep8 (>=1.0.6)", "pytest (>=4.6.4,<5.0.0)"]
...
...
@@ -1427,7 +1434,7 @@ invenio-records-rest = ">=1.6.3"
[package.extras]
all = ["Sphinx (>=3)", "invenio-indexer (>=1.1.0)", "invenio-search[elasticsearch6] (>=1.2.0)", "mock (>=1.3.0)", "pytest-invenio (>=1.4.0)"]
docs = ["Sphinx (>=3)"]
mysql = ["invenio-db[versioning
,mysql
] (>=1.0.0)"]
mysql = ["invenio-db[
mysql,
versioning] (>=1.0.0)"]
postgresql = ["invenio-db[postgresql,versioning] (>=1.0.0)"]
sqlite = ["invenio-db[versioning] (>=1.0.0)"]
tests = ["invenio-indexer (>=1.1.0)", "invenio-search[elasticsearch6] (>=1.2.0)", "mock (>=1.3.0)", "pytest-invenio (>=1.4.0)"]
...
...
@@ -2709,7 +2716,7 @@ timezone = ["python-dateutil"]
[metadata]
lock-version = "1.1"
python-versions = "^3.8"
content-hash = "
764c24c7abbe19616fdc8f767edd05795cd2d37e85
0c
d
46
ddb189fee773d094
b"
content-hash = "
1067a43b96dc601ba5bf7e51e653b4702d70499de4a145698b0a
0c46
46f5d50
b"
[metadata.files]
alembic = [
...
...
@@ -3102,10 +3109,7 @@ invenio-config = [
{file = "invenio-config-1.0.3.tar.gz", hash = "sha256:9d10492b49a46703f0ac028ce8ab78b5ff1c72b180ecb4ffcee5bf49682d1e6c"},
{file = "invenio_config-1.0.3-py2.py3-none-any.whl", hash = "sha256:238ab074991e7f0d6ee7ebc6eb2f5e41658749dd977ab6e86476e862c0efaf28"},
]
invenio-db = [
{file = "invenio-db-1.0.9.tar.gz", hash = "sha256:bc65e82c350a908c4802399d39ae01cde051195ef42e0bc396c6a63e0a75bf50"},
{file = "invenio_db-1.0.9-py2.py3-none-any.whl", hash = "sha256:c8861a665af5cbb3645d4fde5cc0e04354a108a3d5b4cf2c3eee5d252da5d534"},
]
invenio-db = []
invenio-files-processor = [
{file = "invenio-files-processor-0.1.0.tar.gz", hash = "sha256:c9345dc7de0a5fc3f981fb12baee04d5e19b95fcbcd06dabc4bd27206d4c30c4"},
{file = "invenio_files_processor-0.1.0-py2.py3-none-any.whl", hash = "sha256:e288258bf072fe5db7808dbc69a4fce55552dd14317edfaa68892941c2f6bc5d"},
...
...
pyproject.toml
View file @
91eb2779
...
...
@@ -15,7 +15,7 @@ invenio-assets = ">=1.1.3,<1.2.0"
invenio-base
=
">
=
1.2
.
3
,
<
1.3
.
0
"
invenio-celery
=
">
=
1.2
.
0
,
<
1.3
.
0
"
invenio-config
=
">
=
1.0
.
3
,
<
1.1
.
0
"
invenio-db
=
{
version
=
"
>
=
1.0
.
5
,
<
1.1
.
0
", extras = ["
postgresql
", "
versioning
"]}
invenio-db
=
{
git
=
"https://github.com/carantunes/invenio-db.git"
,
rev
=
"1.0.
1
0"
,
extras
=
[
"
versioning"
,
"postgresql
"
]}
invenio-files-processor
=
{
extras
=
["tika"]
,
version
=
"^0.1.0"
}
invenio-files-rest
=
">
=
1.2
.
0
,
<
1.3
.
0
"
invenio-i18n
=
"<1.3.0,>
=
1.2
.
0
"
...
...
tests/conftest.py
View file @
91eb2779
...
...
@@ -20,6 +20,7 @@ import pytest
from
flask
import
current_app
from
invenio_accounts.models
import
Role
,
User
from
invenio_oauth2server.models
import
Token
from
sqlalchemy_continuum
import
versioning_manager
@
pytest
.
fixture
()
...
...
@@ -75,3 +76,11 @@ def instance_path():
`os.path.join(sys.prefix, 'var/instance/static')`
"""
pass
@
pytest
.
fixture
(
scope
=
"function"
)
def
db
(
db
):
"""Re-initialize versioning."""
versioning_manager
.
track_session
(
db
.
session
)
yield
db
Write
Preview