Commit 2ea7a272 authored by Carina Antunes's avatar Carina Antunes
Browse files

Fix linting

parent a0cec983
.env*
.flake8
.gitignore
.gitlab-ci.yml
.isort.cfg
.pre-commit-config.yaml
docker-compose*
.poetry.env
.python-version
AUTHORS.md
CONTRIBUTING.md
LICENSE
.logging.yaml
docker
with_env.sh
[flake8]
max-line-length = 120
max-complexity = 10
enable-extensions=G
......@@ -80,35 +80,20 @@ validate-base-image-updated:
- pyproject.toml
- poetry.lock
script:
- git diff --name-only origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME
- git diff --name-only origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME | grep -q -E "(^|[^-])\bDockerfile\b([^-]|$)"
# Lint
lint:
services:
- docker:dind
variables:
# As of GitLab 12.5, privileged runners at CERN mount a /certs/client docker volume that enables use of TLS to
# communicate with the docker daemon. This avoids a warning about the docker service possibly not starting
# successfully.
# See https://docs.gitlab.com/ee/ci/docker/using_docker_build.html#tls-enabled
DOCKER_TLS_CERTDIR: "/certs"
# Note that we do not need to set DOCKER_HOST when using the official docker client image: it automatically
# defaults to tcp://docker:2376 upon seeing the TLS certificate directory.
#DOCKER_HOST: tcp://docker:2376/
image: python:3.6
stage: test
only:
refs:
- merge_requests
image: tmaier/docker-compose:latest
script: make build-env lint MODE=test
script:
- make lint
before_script:
- docker info
- docker login -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY
- docker-compose --version
- apk add make
allow_failure: true
tags:
- docker-privileged-xl
- pip install pre-commit
# Test
test:
......
[settings]
line_length=120
known_third_party = celery,click,elasticsearch,elasticsearch_dsl,flask,flask_login,flask_security,invenio_accounts,invenio_app,invenio_db,invenio_files_processor,invenio_files_rest,invenio_indexer,invenio_oauth2server,invenio_oauthclient,invenio_pidstore,invenio_records,invenio_records_files,invenio_records_rest,invenio_rest,invenio_search,kombu,marshmallow,pytest,setuptools,six,werkzeug
multi_line_output = 3
include_trailing_comma = True
force_grid_wrap = 0
use_parentheses = True
ensure_newline_before_comments = True
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://gitlab.com/pycqa/flake8
rev: 3.7.9
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/asottile/seed-isort-config
rev: v1.9.3
hooks:
- id: flake8
additional_dependencies: [flake8-docstrings]
- repo: https://github.com/timothycrosley/isort
rev: 4.3.21-2
- id: seed-isort-config
- repo: https://github.com/pre-commit/mirrors-isort
rev: v4.3.21
hooks:
- id: isort
- id: isort
- repo: https://github.com/psf/black
rev: stable
hooks:
- id: black
language_version: python3.6
args: [--line-length=120]
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.4
hooks:
- id: flake8
additional_dependencies: [flake8-docstrings, flake8-logging-format]
......@@ -5,4 +5,3 @@ CERN Search as a Service REST API.
- Pablo Panero <pablo.panero@cern.ch>
- Carina Antunes <carina.oliveira.antunes@cern.ch>
......@@ -16,7 +16,7 @@ Changes:
Changes:
- [SEARCH-86] Bump to Invenio 3.3
- [SEARCH-86] Bump to Invenio 3.3
- [NI] Generate certificate with a Certificate Authority
----
......@@ -28,21 +28,21 @@ Changes:
- [SEARCH-102] EDMS schemas updates
- Limit content of files on ~100MB
----
----
**Version 1.3.2-beta (released 2020-08-20)**
Changes:
- [NI] Add sentry-sdk dependency
----
----
**Version 1.3.1-beta (released 2020-08-18)**
Changes:
- [SEARCH-86] Upgrade to invenio 3.3
----
----
**Version 1.3.0-beta (released 2020-07-06)**
Changes:
......@@ -56,7 +56,7 @@ Changes:
- [SEARCH-92] Add codimd schemas
- [NI] Add openshift-dev CI
----
----
**Version 1.2.1-beta (released 2020-05-28)**
......@@ -71,7 +71,7 @@ Changes:
- Make public records available without login
- Only view admin accounts can use the access query param
----
----
**Version 1.2-beta (released 2020-05-14)**
......@@ -81,7 +81,7 @@ Changes:
- Instead of `invenio index run` now should use `invenio utils runindex`
- [NI] fix indico jonschemas
----
----
**Version 1.1.2-beta (released 2020-05-14)**
......@@ -89,7 +89,7 @@ Changes:
- Update full compose
- [SEARCH-67] Remove binary mappings
----
----
**Version 1.1.1-beta (released 2020-04-28)**
......@@ -160,7 +160,7 @@ Changes:
**Version 1.0.2-beta (released 2020-03-23)**
Changes:
- [SEARCH-47] Bump tika to 1.24
- [SEARCH-47] Bump tika to 1.24
----
......@@ -239,4 +239,4 @@ Features:
Fixes:
- Document creation should just check if the user is authenticated in the first iteration. Permissions over the schema are checked on the second iteration.
- Document creation should just check if the user is authenticated in the first iteration. Permissions over the schema are checked on the second iteration.
# How to contribute
Guidelines on how to contribute are explained in the official documentation, they can be found [here](http://cern-search.docs.cern.ch/cernsearchdocs/contributing/).
\ No newline at end of file
Guidelines on how to contribute are explained in the official documentation, they can be found [here](http://cern-search.docs.cern.ch/cernsearchdocs/contributing/).
# -*- coding: utf-8 -*-
#
# This file is part of CERN Search.
# Copyright (C) 2018-2019 CERN.
# Copyright (C) 2018-2021 CERN.
#
# Citadel Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
# Use CentOS7:
FROM gitlab-registry.cern.ch/webservices/cern-search/cern-search-rest-api/cern-search-rest-api-base:3b6147ba87cd87992329c2dfaf4ba5f5d42ecf27
FROM gitlab-registry.cern.ch/webservices/cern-search/cern-search-rest-api/cern-search-rest-api-base:1baab88940e876b2bea33d551bf5ed424d0607de
ARG build_env
# CERN Search installation
WORKDIR /${WORKING_DIR}/src
ADD . /${WORKING_DIR}/src
RUN python -V && pip -V
# If env is development, install development dependencies
RUN if [ "$build_env" != "prod" ]; then poetry install --no-root; fi
RUN pip freeze
RUN if [ "$build_env" != "prod" ]; then poetry install --no-root --no-interaction --no-ansi; fi
# Install CSaS
RUN pip install -e .
......
# -*- coding: utf-8 -*-
#
# This file is part of CERN Search.
# Copyright (C) 2018-2019 CERN.
# Copyright (C) 2018-2021 CERN.
#
# Citadel Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
......@@ -16,14 +16,17 @@ RUN yum update -y && \
openssl \
openldap-devel \
https://linuxsoft.cern.ch/cern/centos/7/cern/x86_64/Packages/CERN-CA-certs-20180516-1.el7.cern.noarch.rpm \
mailcap
mailcap \
https://packages.endpoint.com/rhel/7/os/x86_64/endpoint-repo-1.7-1.x86_64.rpm && \
yum upgrade -y git
# CERN Search installation
WORKDIR /${WORKING_DIR}/src
ADD ./poetry.lock /${WORKING_DIR}/src
ADD ./pyproject.toml /${WORKING_DIR}/src
COPY poetry.lock pyproject.toml /${WORKING_DIR}/src/
# Install dependencies globally
RUN pip install poetry && \
poetry config virtualenvs.create false && \
poetry install --no-dev --no-root
RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python
ENV PATH="${PATH}:/root/.poetry/bin"
RUN poetry config virtualenvs.create false -vvv && \
poetry install --no-root --no-dev --no-interaction --no-ansi
......@@ -17,4 +17,4 @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
......@@ -101,13 +101,10 @@ test: stop-env build-env pytest
.PHONY: test
lint:
docker-compose -f $(DOCKER_FILE) exec -T $(SERVICE_NAME) /bin/bash -c \
"echo running isort...; \
isort -rc -c -df; \
echo running flake8...; \
flake8 --max-complexity 10 --ignore E501,D401"
pre-commit run --all-files --show-diff-on-failure
.PHONY: lint
################### Local development helpful directives ####################
################### (poetry + docker) ####################
#
......@@ -176,8 +173,5 @@ local-test:
.PHONY: local-test
local-lint:
@echo running isort...;
sh with_env.sh $(POETRY_DOTENV) poetry run isort -rc -c -df .;
@echo running flake8...;
sh with_env.sh $(POETRY_DOTENV) poetry run flake8 --max-complexity 10 --ignore E501,D401
poetry run pre-commit run --all-files --show-diff-on-failure;
.PHONY: local-lint
......@@ -2,13 +2,13 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Search.
# Copyright (C) 2018-2019 CERN.
# Copyright (C) 2018-2021 CERN.
#
# CERN Search is free software; you can redistribute it and/or modify it
# Citadel Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Citadel Search."""
from __future__ import absolute_import, print_function
from .version import __version__
__all__ = ('__version__', )
__all__ = ("__version__",)
......@@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Search.
# Copyright (C) 2018-2019 CERN.
# Copyright (C) 2018-2021 CERN.
#
# Citadel Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
......@@ -14,14 +14,6 @@ import ast
import copy
import os
from cern_search_rest_api.modules.cernsearch.api import CernSearchRecord
from cern_search_rest_api.modules.cernsearch.facets import regex_aggregation, simple_query_string
from cern_search_rest_api.modules.cernsearch.indexer import CernSearchRecordIndexer
from cern_search_rest_api.modules.cernsearch.permissions import (record_create_permission_factory,
record_delete_permission_factory,
record_list_permission_factory,
record_read_permission_factory,
record_update_permission_factory)
from elasticsearch_dsl import A
from flask import request
from invenio_oauthclient.contrib import cern_openid
......@@ -29,6 +21,17 @@ from invenio_records_rest import config as irr_config
from invenio_records_rest.facets import terms_filter
from kombu import Exchange, Queue
from cern_search_rest_api.modules.cernsearch.api import CernSearchRecord
from cern_search_rest_api.modules.cernsearch.facets import regex_aggregation, simple_query_string
from cern_search_rest_api.modules.cernsearch.indexer import CernSearchRecordIndexer
from cern_search_rest_api.modules.cernsearch.permissions import (
record_create_permission_factory,
record_delete_permission_factory,
record_list_permission_factory,
record_read_permission_factory,
record_update_permission_factory,
)
def _(x):
"""Identity function used to trigger string extraction."""
......@@ -45,9 +48,13 @@ THEME_SEARCHBAR = False
OAUTHCLIENT_CERN_OPENID_ALLOWED_ROLES = ["search-user", "search-admin"]
CERN_REMOTE_APP = copy.deepcopy(cern_openid.REMOTE_APP)
CERN_REMOTE_APP["params"].update(dict(request_token_params={
"scope": "openid",
}))
CERN_REMOTE_APP["params"].update(
dict(
request_token_params={
"scope": "openid",
}
)
)
OAUTHCLIENT_REMOTE_APPS = dict(
cern_openid=CERN_REMOTE_APP,
......@@ -57,9 +64,13 @@ OAUTHCLIENT_REMOTE_APPS = dict(
# ============
OAUTH_REMOTE_REST_APP = copy.deepcopy(cern_openid.REMOTE_REST_APP)
OAUTH_REMOTE_REST_APP["params"].update(dict(request_token_params={
"scope": "openid",
}))
OAUTH_REMOTE_REST_APP["params"].update(
dict(
request_token_params={
"scope": "openid",
}
)
)
OAUTHCLIENT_REST_REMOTE_APPS = dict(
cern_openid=OAUTH_REMOTE_REST_APP,
......@@ -75,28 +86,28 @@ SERVER_NAME = os.getenv("CERN_SEARCH_SERVER_NAME")
# Admin
# =====
ADMIN_PERMISSION_FACTORY = 'cern_search_rest_api.modules.cernsearch.permissions:admin_permission_factory'
ADMIN_PERMISSION_FACTORY = "cern_search_rest_api.modules.cernsearch.permissions:admin_permission_factory"
# JSON Schemas configuration
# ==========================
JSONSCHEMAS_ENDPOINT = '/schemas'
JSONSCHEMAS_HOST = '0.0.0.0'
JSONSCHEMAS_ENDPOINT = "/schemas"
JSONSCHEMAS_HOST = "0.0.0.0"
# Do not register the endpoints on the UI app."""
JSONSCHEMAS_REGISTER_ENDPOINTS_UI = True
# Search configuration
# =====================
SEARCH_MAPPINGS = [os.getenv('CERN_SEARCH_INSTANCE', 'test')]
SEARCH_USE_EGROUPS = ast.literal_eval(os.getenv('CERN_SEARCH_USE_EGROUPS', 'True'))
SEARCH_DOC_PIPELINES = ast.literal_eval(os.getenv('CERN_SEARCH_DOC_PIPELINES', '{}'))
SEARCH_MAPPINGS = [os.getenv("CERN_SEARCH_INSTANCE", "test")]
SEARCH_USE_EGROUPS = ast.literal_eval(os.getenv("CERN_SEARCH_USE_EGROUPS", "True"))
SEARCH_DOC_PIPELINES = ast.literal_eval(os.getenv("CERN_SEARCH_DOC_PIPELINES", "{}"))
# Alias instance - don't allow updates, allow only search
SEARCH_INSTANCE_IMMUTABLE = ast.literal_eval(os.getenv('CERN_SEARCH_INSTANCE_IMMUTABLE', 'False'))
SEARCH_INSTANCE_IMMUTABLE = ast.literal_eval(os.getenv("CERN_SEARCH_INSTANCE_IMMUTABLE", "False"))
# File indexer capabilities enabled
SEARCH_FILE_INDEXER = ast.literal_eval(os.getenv('CERN_SEARCH_FILE_INDEXER', 'True'))
SEARCH_FILE_INDEXER = ast.literal_eval(os.getenv("CERN_SEARCH_FILE_INDEXER", "True"))
# Records REST configuration
# ===========================
......@@ -106,41 +117,38 @@ SEARCH_FILE_INDEXER = ast.literal_eval(os.getenv('CERN_SEARCH_FILE_INDEXER', 'Tr
_Record_PID = 'pid(recid, record_class="cern_search_rest_api.modules.cernsearch.api:CernSearchRecord")' # TODO
RECORDS_FILES_REST_ENDPOINTS = {
'RECORDS_REST_ENDPOINTS': {
'docid': '/files',
"RECORDS_REST_ENDPOINTS": {
"docid": "/files",
}
}
FILES_REST_PERMISSION_FACTORY = 'cern_search_rest_api.modules.cernsearch.permissions:files_permission_factory'
FILES_REST_PERMISSION_FACTORY = "cern_search_rest_api.modules.cernsearch.permissions:files_permission_factory"
RECORDS_REST_ENDPOINTS = dict(
docid=dict(
pid_type='recid',
pid_fetcher='recid',
pid_minter='recid',
pid_type="recid",
pid_fetcher="recid",
pid_minter="recid",
default_endpoint_prefix=True,
default_media_type='application/json',
item_route='/record/<{0}:pid_value>'.format(_Record_PID),
list_route='/records/',
links_factory_imp='invenio_records_rest.links:default_links_factory',
default_media_type="application/json",
item_route="/record/<{0}:pid_value>".format(_Record_PID),
list_route="/records/",
links_factory_imp="invenio_records_rest.links:default_links_factory",
record_class=CernSearchRecord,
indexer_class=CernSearchRecordIndexer,
record_serializers={
'application/json': ('cern_search_rest_api.modules.cernsearch.serializers'
':json_v1_response'),
"application/json": ("cern_search_rest_api.modules.cernsearch.serializers" ":json_v1_response"),
},
record_loaders={
'application/json': ('cern_search_rest_api.modules.cernsearch.loaders:'
'csas_loader'),
'application/json-patch+json': lambda: request.get_json(force=True)
"application/json": ("cern_search_rest_api.modules.cernsearch.loaders:" "csas_loader"),
"application/json-patch+json": lambda: request.get_json(force=True),
},
search_class='cern_search_rest_api.modules.cernsearch.search.RecordCERNSearch',
search_index=os.getenv('CERN_SEARCH_INSTANCE', 'test'),
search_class="cern_search_rest_api.modules.cernsearch.search.RecordCERNSearch",
search_index=os.getenv("CERN_SEARCH_INSTANCE", "test"),
search_serializers={
'application/json': ('cern_search_rest_api.modules.cernsearch.serializers'
':json_v1_search'),
"application/json": ("cern_search_rest_api.modules.cernsearch.serializers" ":json_v1_search"),
},
search_factory_imp='cern_search_rest_api.modules.cernsearch.search.csas_search_factory',
search_factory_imp="cern_search_rest_api.modules.cernsearch.search.csas_search_factory",
max_result_window=10000,
read_permission_factory_imp=record_read_permission_factory,
list_permission_factory_imp=record_list_permission_factory,
......@@ -148,9 +156,9 @@ RECORDS_REST_ENDPOINTS = dict(
update_permission_factory_imp=record_update_permission_factory,
delete_permission_factory_imp=record_delete_permission_factory,
suggesters={
'phrase': {
'completion': {
'field': 'suggest_keywords',
"phrase": {
"completion": {
"field": "suggest_keywords",
}
},
},
......@@ -164,90 +172,82 @@ def aggs_filter(field):
:param field: Field name.
:returns: Function that returns the Terms query.
"""
def inner(values):
return A('terms', field=field, include=f'.*{values[0]}.*')
return A("terms", field=field, include=f".*{values[0]}.*")
return inner
cern_rest_facets = {
'aggs': {
'collection': {
'terms': {'field': 'collection'}
},
'type_format': {
'terms': {'field': 'type_format'}
},
'author': regex_aggregation('_data.authors.exact_match', 'authors_suggest'),
'site': regex_aggregation('_data.site.exact_match', 'sites_suggest'),
'keyword': regex_aggregation('_data.keywords.exact_match', 'keywords_suggest')
"aggs": {
"collection": {"terms": {"field": "collection"}},
"type_format": {"terms": {"field": "type_format"}},
"author": regex_aggregation("_data.authors.exact_match", "authors_suggest"),
"site": regex_aggregation("_data.site.exact_match", "sites_suggest"),
"keyword": regex_aggregation("_data.keywords.exact_match", "keywords_suggest"),
},
'filters': {
'collection': terms_filter("collection"),
'type_format': terms_filter("type_format"),
'author': terms_filter("_data.authors.exact_match"),
'site': terms_filter("_data.site.exact_match"),
'keyword': terms_filter("_data.keywords.exact_match"),
"filters": {
"collection": terms_filter("collection"),
"type_format": terms_filter("type_format"),
"author": terms_filter("_data.authors.exact_match"),
"site": terms_filter("_data.site.exact_match"),
"keyword": terms_filter("_data.keywords.exact_match"),
},
"matches": {
"author_match": simple_query_string("_data.authors"),
"keyword_match": simple_query_string("_data.keywords"),
"site_match": simple_query_string("_data.site"),
"name_match": simple_query_string("_data.name"),
"url_match": simple_query_string("url"),
},
'matches': {
'author_match': simple_query_string("_data.authors"),
'keyword_match': simple_query_string("_data.keywords"),
'site_match': simple_query_string("_data.site"),
'name_match': simple_query_string("_data.name"),
'url_match': simple_query_string("url"),
}
}
RECORDS_REST_FACETS = {
'cernsearchqa-*': cern_rest_facets,
'webservices': cern_rest_facets,
'indico': {
'aggs': {
'event_type': {
'terms': {'field': '_data.event_type'}
},
'speakers_chairs': {
'terms': {'field': '_data.speakers_chairs.exact_match'}
},
'list_of_persons': {
'terms': {'field': '_data.list_of_persons.exact_match'}
}
"cernsearchqa-*": cern_rest_facets,
"webservices": cern_rest_facets,
"indico": {
"aggs": {
"event_type": {"terms": {"field": "_data.event_type"}},
"speakers_chairs": {"terms": {"field": "_data.speakers_chairs.exact_match"}},
"list_of_persons": {"terms": {"field": "_data.list_of_persons.exact_match"}},
}
}
},
}
cern_sort_options = {
'bestmatch': {
'fields': ['-_score'],
'title': 'Best match',
'default_order': 'asc',
"bestmatch": {
"fields": ["-_score"],
"title": "Best match",
"default_order": "asc",
},
"mostrecent": {
"fields": ["_updated"],
"title": "Newest",
"default_order": "asc",
},
'mostrecent': {
'fields': ['_updated'],
'title': 'Newest',
'default_order': 'asc',
}
}
RECORDS_REST_SORT_OPTIONS = {
'webservices': cern_sort_options,
'cernsearchqa-*': cern_sort_options,
'edms': {
'bestmatch': {
'fields': ['-_score'],
'title': 'Best match',
'default_order': 'asc',
"webservices": cern_sort_options,
"cernsearchqa-*": cern_sort_options,
"edms": {
"bestmatch": {
"fields": ["-_score"],
"title": "Best match",
"default_order": "asc",
},
'mostrecent': {
'fields': ['_updated'],
'title': 'Newest',
'default_order': 'asc',