Commit bafa3973 authored by Carina Antunes's avatar Carina Antunes
Browse files

[REVERT] Bump Invenio 1.3.2

parent b67460bb
......@@ -2,6 +2,7 @@ CELERY_LOG_LEVEL=error
CERN_SEARCH_INSTANCE=test
CERN_SEARCH_REMOTE_APP_RESOURCE=localhost
CERN_SEARCH_SERVER_NAME=localhost
CERN_SEARCH_USE_EGROUPS='False'
CERN_SEARCH_FILES_PROCESSOR_QUEUE=files_processor
CERN_SEARCH_FILES_PROCESSOR_QUEUE_DLX=files_processor_dlx
......@@ -17,6 +18,7 @@ ENV=dev
FLASK_SKIP_DOTENV=1
FLASK_DEBUG='False'
INVENIO_DEBUG=0
FLOWER_PASS=password
......@@ -36,8 +38,9 @@ INVENIO_COLLECT_STORAGE=flask_collect.storage.file
INVENIO_INDEXER_BULK_REQUEST_TIMEOUT=10
INVENIO_INDEXER_DEFAULT_DOC_TYPE=doc_v0.0.2
INVENIO_INDEXER_DEFAULT_INDEX=test-doc_v0.0.2
INVENIO_JSONSCHEMAS_HOST=0.0.0.0
INVENIO_LOGGING_CONSOLE='True'
INVENIO_LOGGING_CONSOLE_LEVEL=WARNING
INVENIO_LOGGING_CONSOLE_LEVEL=DEBUG
INVENIO_RATELIMIT_STORAGE_URL='redis://redis:6379/3'
INVENIO_RATELIMIT_AUTHENTICATED_USER=100000/hour
INVENIO_SEARCH_ELASTIC_HOSTS=elasticsearch
......
CERN_SEARCH_REMOTE_APP_RESOURCE=dev-cern-search.web.cern.ch
CERN_SEARCH_SERVER_NAME=dev-cern-search.web.cern.ch
INVENIO_APP_ALLOWED_HOSTS=['localhost', 'nginx','dev-cern-search.web.cern.ch']
INVENIO_CERN_APP_CREDENTIALS={'consumer_key':'dev-cern-search','consumer_secret':'H55huuLaDZKT20vfHZZLg1gOYl2wn7018uWOqoxNhks1'}
INVENIO_JSONSCHEMAS_HOST=dev-cern-search.web.cern.ch
......@@ -17,6 +17,9 @@ secrets/
*.key
*.crt
*.csr
*.pem
*.srl
# Dump files
......
......@@ -7,21 +7,20 @@
# under the terms of the MIT License; see LICENSE file for more details.
# Use CentOS7:
FROM gitlab-registry.cern.ch/webservices/cern-search/cern-search-rest-api/cern-search-rest-api-base:bfdd86117598a031f427328c9d276f7f1b782520
FROM gitlab-registry.cern.ch/webservices/cern-search/cern-search-rest-api/cern-search-rest-api-base:d7964b30051811d72629dd35a6175e28224589c8
ARG build_env
# Switch to base once issues with pipenv are fixed
RUN yum update -y && \
yum install -y mailcap
# CERN Search installation
WORKDIR /${WORKING_DIR}/src
ADD . /${WORKING_DIR}/src
RUN pip freeze
# If env is development, install development dependencies
RUN if [ "$build_env" != "prod" ]; then pipenv install --system --ignore-pipfile --deploy --dev; fi
RUN pip freeze
# Install CSaS
RUN pip install -e .
......
......@@ -31,6 +31,10 @@ build-env:
docker-compose -f $(DOCKER_FILE) up -d --remove-orphans
.PHONY: build-env
rebuild-env:
docker-compose -f $(DOCKER_FILE) up -d --build --remove-orphans
.PHONY: build-env
es-setup:
curl -XPUT "http://localhost:9200/_settings" -H 'Content-Type: application/json' -d' \
{\
......@@ -64,7 +68,7 @@ stop-env:
docker-compose -f $(DOCKER_FILE) down --volumes
.PHONY: stop-env
reload-env: destroy-env env
reload-env: destroy-env generate-certificates rebuild-env populate-instance es-setup load-fixtures shell-env
.PHONY: reload-env
shell-env:
......@@ -75,6 +79,9 @@ shell-worker:
docker-compose -f $(DOCKER_FILE) exec $(WORKER_NAME) /bin/bash
.PHONY: shell-worker
env-staging: generate-certificates build-env populate-instance es-setup shell-env
.PHONY: env-staging
env: generate-certificates build-env populate-instance es-setup load-fixtures shell-env
.PHONY: env
......
......@@ -7,46 +7,46 @@ verify_ssl = true
flake8 = ">=3.7.9"
flake8-docstrings = ">=1.5.0"
isort = "==4.3.21"
pytest-dotenv = "*"
pytest-invenio = ">=1.2.0"
pytest-mock = ">=1.6.0"
pytest-dotenv = "*"
[packages]
invenio-access = ">=1.0.0,<1.1.0"
invenio-admin = ">=1.0.0,<1.1.0"
invenio-accounts = ">=1.0.0,<1.1.0"
invenio-app = "<1.3.0,>=1.2.0"
invenio-base = "<1.3.0,>=1.2.0"
invenio-config = "<1.1.0,>=1.0.2"
invenio-db = {version = ">=1.0.0,<1.1.0",extras = ["postgresql", "versioning"]}
invenio-indexer = ">=1.1.2,<1.2.0"
invenio-jsonschemas = ">=1.0.0,<1.1.0"
invenio-logging = ">=1.0.0,<1.1.0"
invenio-records-rest = "<=1.7.0,>1.6.5"
invenio-records = {extras = ["postgresql"],version = ">=1.0.0,<1.3.0"}
invenio-rest = "<1.3,>=1.2.0"
invenio-oauthclient = ">=1.0.0,<1.1.0"
invenio-oauth2server = ">=1.0.0,<1.1.0"
invenio-search = {extras = ["elasticsearch7"],version = ">=1.2.0,<1.3.0"}
invenio-theme = ">=1.1.0,<1.2.0"
Flask = "*"
eventlet = "*"
gevent = ">=1.4.0"
invenio-access = ">=1.4.1,<1.5.0"
invenio-accounts = ">=1.3.0,<1.4.0"
invenio-admin = ">=1.2.1,<1.3.0"
invenio-app = ">=1.2.6,<1.3.0"
invenio-assets = ">=1.1.3,<1.2.0"
invenio-base = ">=1.2.3,<1.3.0"
invenio-celery = ">=1.2.0,<1.3.0"
invenio-config = ">=1.0.3,<1.1.0"
invenio-db = {version = ">=1.0.5,<1.1.0",extras = ["postgresql", "versioning"]}
invenio-files-processor = {extras = ["tika"],git = "https://github.com/carantunes/invenio-files-processor.git",ref = "1.0.2-alpha"}
invenio-files-rest = ">=1.2.0,<1.3.0"
invenio-indexer = ">=1.1.1,<1.2.0"
invenio-jsonschemas = ">=1.1.0,<1.2.0"
invenio-logging = {extras = ["sentry-sdk"],version = ">=1.3.0,<1.4.0"}
invenio-oauth2server = ">=1.2.0,<1.3.0"
invenio-oauthclient = ">=1.3.0,<1.4.0"
invenio-records = {extras = ["postgresql"],version = ">=1.3.1,<1.4.0"}
invenio-records-files = ">=1.2.1,<1.3.0"
invenio-records-rest = ">=1.7.1,<1.8.0"
invenio-rest = ">=1.2.1,<1.3.0"
invenio-search = {extras = ["elasticsearch7"],version = ">=1.3.1,<1.4.0"}
invenio-theme = ">=1.1.4,<1.2.0"
ldap = "<1.1.0,>=1.0.2"
npm = ">=0.1.1"
pip = "*"
python-ldap = "<3.2.0,>=3.1.0"
raven = ">=6.9.0,<6.10.0"
redis = ">=2.10.0,<3.0.0"
uwsgi-tools = ">=1.1.1,<1.2.0"
Flask = "*"
uWSGI = ">=2.0.16"
marshmallow = "<3"
invenio-records-files = "<1.3.0,>=1.2.0"
invenio-celery = "<1.2.0,>=1.1.0"
invenio-files-rest = ">=1.0.5,<1.1.0"
gevent = ">=1.4.0"
invenio-files-processor = {extras = ["tika"],git = "https://github.com/carantunes/invenio-files-processor.git",ref = "1.0.2-alpha"}
Werkzeug = ">=0.15,<1.0.0"
pip = "*"
eventlet = "*"
uwsgi-tools = ">=1.1.1,<1.2.0"
wtforms = "<2.3.0"
tika = "==1.24"
[requires]
python_version = "3.6"
This diff is collapsed.
# CERN Search as a Service
# Citadel Search as a Service
CERN Search provides enterprise search capabilities on demand. You can set up your own search instance, submit your
Citadel Search provides enterprise search capabilities on demand. You can set up your own search instance, submit your
documents and search among them when needed!
- User documentation [here](http://cern-search.docs.cern.ch/cernsearchdocs/).
- Administration documentation [here](https://cern-search-admin.docs.cern.ch/cernsearch-admin-docs/).
\ No newline at end of file
- Administration documentation [here](https://cern-search-admin.docs.cern.ch/cernsearch-admin-docs/).
# Local Development options
## Docker (recommended)
1. Run `make env MODE=test`
2. Follow [instructions](#tls---how-to-install-certificate) to install certificate.
3. Chrome https:://localhost
Read more on the makefile.
## Docker (connected to cern sso)
1. Edit /etc/hosts and add line:
`127.0.0.1 dev-cern-search.web.cern.ch`
2. Edit docker-compose.test.yml and add `- .env-staging` under:
```
env_file:
- .env
```
3. Run `make env-staging MODE=test`
4. Follow [instructions](#tls---how-to-install-certificate) to install certificate.
5. Chrome https://dev-cern-search.web.cern.ch/ (without proxy to cern on)
## docker + pipenv: Read more on the makefile
1. Run `make local-env MODE=test`
2. Follow [instructions](#tls---how-to-install-certificate) to install certificate.
## TLS - How to install certificate
Install generated certificate `nginx/tls/cern.ch.crt` locally.
For mac:
`sudo security add-trusted-cert -d -r trustRoot -k /Library/Keychains/System.keychain nginx/tls/cern.ch.crt`
......@@ -55,6 +55,20 @@ OAUTHCLIENT_REMOTE_APPS = dict(
cern=CERN_REMOTE_APP,
)
# OAuth REST Client
# ============
OAUTH_REMOTE_APP = copy.deepcopy(cern.REMOTE_REST_APP)
OAUTH_REMOTE_APP["params"].update(dict(request_token_params={
"resource": os.getenv('CERN_SEARCH_REMOTE_APP_RESOURCE', 'test-cern-search.cern.ch'),
"scope": "Name Email Bio Groups Group",
}))
OAUTH_REMOTE_APP["authorized_handler"] = \
'cern_search_rest_api.modules.cernsearch.handlers:cern_authorized_signup_handler'
OAUTHCLIENT_REST_REMOTE_APPS = dict(
cern=OAUTH_REMOTE_APP,
)
# Accounts
# ========
# FIXME: Needs to be disable for role base auth in SSO. If not invenio_account/sessions:login_listener will crash
......
......@@ -38,7 +38,7 @@ def cern_authorized_signup_handler(resp, remote, *args, **kwargs):
# Store token in session
# ----------------------
# Set token in session - token object only returned if
# current_user.is_autenticated().
# current_user.is_authenticated().
token = response_token_setter(remote, resp)
handlers = current_oauthclient.signup_handlers[remote.name]
......
......@@ -18,7 +18,7 @@ class CSASRecordSchemaV1(RecordMetadataSchemaJSONV1):
"""Record schema."""
@validates_schema(pass_original=True)
def validate_record(self, data, original_data):
def validate_record(self, data, original_data, **kwargs):
"""Validate record."""
if not original_data.get('_access'):
raise ValidationError('Missing field _access')
......
......@@ -112,6 +112,7 @@ services:
image: nginx:1.17.4-alpine
ports:
- "8080:8080"
- "443:443"
depends_on:
- cern-search-api
volumes:
......
......@@ -17,9 +17,9 @@ http {
server {
# Running port
listen 8080 ssl;
ssl_certificate /etc/nginx/tls/tls.crt;
ssl_certificate_key /etc/nginx/tls/tls.key;
listen 443 ssl;
ssl_certificate /etc/nginx/tls/cern.ch.crt;
ssl_certificate_key /etc/nginx/tls/cern.ch.key;
rewrite ^/$ /account/settings/applications/;
client_max_body_size 0; # Disable body size limits for testing purposes
......
[req]
default_bits = 4096
distinguished_name = req_distinguished_name
req_extensions = req_ext
[req_distinguished_name]
countryName = Country Name (2 letter code)
countryName_default = CH
stateOrProvinceName = State or Province Name (full name)
stateOrProvinceName_default = Geneve
localityName = Locality Name (eg, city)
localityName_default = Geneve
organizationName = Organization Name (eg, company)
organizationName_default = CERN
commonName = Common Name (e.g. server FQDN or YOUR name)
commonName_max = 64
commonName_default = cern.ch
[req_ext]
basicConstraints=CA:FALSE
extendedKeyUsage=serverAuth,clientAuth
subjectAltName = @alt_names
[alt_names]
DNS.1 = cern.ch
DNS.2 = *.web.cern.ch
......@@ -9,11 +9,52 @@
readonly SCRIPT_PATH=$(dirname $0)
readonly TLS_DIR="$SCRIPT_PATH/../nginx/tls"
readonly SSL_DIR="$SCRIPT_PATH/../nginx"
readonly KEY="tls.key"
readonly CRT="tls.crt"
mkdir -p $TLS_DIR
openssl req -x509 -nodes -newkey rsa:4096 \
-subj '/C=CH/ST=Geneve/L=Geneve/O=CERN/OU=IT Department/CN=Search as a Service' \
-keyout "$TLS_DIR/$KEY" -out "$TLS_DIR/$CRT"
readonly NAME="cern.ch" # Use your own domain name
######################
# Check certificate already exists
######################
if test -f "$TLS_DIR/$NAME.crt"; then
echo "Skipping... $NAME.crt already exists."
exit 0;
fi
######################
# Become a Certificate Authority
######################
# Generate private key
openssl genrsa -out "$TLS_DIR/myCernRootCA.key" 2048
# Generate root certificate
openssl req -x509 -new -nodes -key "$TLS_DIR/myCernRootCA.key" -sha256 -days 825 -out "$TLS_DIR/myCernRootCA.pem" \
-subj "/C=CH/ST=Geneve/O=CERN/CN=cern.ch"
######################
# Create CA-signed certs
######################
# Generate a private key
openssl genrsa -out "$TLS_DIR/$NAME.key" 2048
# Create a certificate-signing request
openssl req -new -sha256 \
-key "$TLS_DIR/$NAME.key" \
-subj "/C=CH/ST=Geneve/O=CERN/CN=cern.ch" \
-config "$SSL_DIR/ssl.conf" \
-out "$TLS_DIR/$NAME.csr"
# Create the signed certificate
openssl x509 -req -in "$TLS_DIR/$NAME.csr" -CA "$TLS_DIR/myCernRootCA.pem" -CAkey "$TLS_DIR/myCernRootCA.key" \
-CAserial "$TLS_DIR/$NAME.srl" -CAcreateserial \
-extfile "$SSL_DIR/ssl.conf" -extensions req_ext \
-out "$TLS_DIR/$NAME.crt" -days 3650 -sha256
......@@ -6,7 +6,10 @@
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Pre-configured remote application for enabling sign in/up with CERN.
r"""Pre-configured remote application for enabling sign in/up with CERN.
CERN (remote app).
^^^^^^^^^^^^^^^^^^
1. Edit your configuration and add:
......@@ -70,16 +73,77 @@ In templates you can add a sign in/up link:
</a>
For more details you can play with a :doc:`working example <examplesapp>`.
CERN (remote REST app)
^^^^^^^^^^^^^^^^^^^^^^
This configuration is appropriate for e.g. a SPA application which communicates
with Invenio via REST calls.
1. Edit your configuration and add:
.. code-block:: python
import copy
from invenio_oauthclient.contrib import cern
OAUTH_REMOTE_APP = copy.deepcopy(cern.REMOTE_REST_APP)
# Path where you want your SPA to be redirected after a
# successful login.
OAUTH_REMOTE_APP["authorized_redirect_url"] = \
'https://<my_SPA_site>/login'
# Path where you want your SPA to be redirected after a
# login error.
OAUTH_REMOTE_APP["error_redirect_url"] = 'https://<my_SPA_site>/error'
OAUTHCLIENT_REST_REMOTE_APPS = dict(
cern=OAUTH_REMOTE_APP,
)
2. Register a new application with CERN. When registering the
application ensure that the *Redirect URI* points to:
``https://<my_invenio_site>:5000/oauth/authorized/cern/`` (note, CERN does
not allow localhost to be used, thus testing on development machines is
somewhat complicated by this).
3. Grab the *Client ID* and *Client Secret* after registering the application
and add them to your instance configuration (``config.py``):
.. code-block:: python
CERN_APP_CREDENTIALS = dict(
consumer_key='<client_id>',
consumer_secret='<secret>',
)
4. Now access the login page from your SPA using CERN OAuth:
.. code-block:: javascript
window.location =
"https://<my_invenio_site>:5000/api/oauth/login/cern?next=<my_next_page>";
By default the CERN module will try first look if a link already exists
between a CERN account and a user. If no link is found, the user is asked
to provide an email address to sign-up.
For more details you can play with a :doc:`working example <examplesapp>`.
"""
import copy
import re
from datetime import datetime, timedelta
from flask import current_app, g, redirect, session, url_for
from flask import Blueprint, current_app, flash, g, redirect, session, url_for
from flask_babelex import gettext as _
from flask_login import current_user
from flask_principal import AnonymousIdentity, RoleNeed, UserNeed, identity_changed, identity_loaded
from invenio_db import db
from invenio_oauthclient.errors import OAuthCERNRejectedAccountError
from invenio_oauthclient.handlers.rest import response_handler
from invenio_oauthclient.models import RemoteAccount
from invenio_oauthclient.proxies import current_oauthclient
from invenio_oauthclient.utils import oauth_link_external_id, oauth_unlink_external_id
......@@ -121,19 +185,17 @@ OAUTHCLIENT_CERN_REFRESH_TIMEDELTA = timedelta(minutes=-5)
OAUTHCLIENT_CERN_SESSION_KEY = 'identity.cern_provides'
"""Name of session key where CERN roles are stored."""
REMOTE_APP = dict(
OAUTHCLIENT_CERN_ALLOWED_IDENTITY_CLASSES = [
'CERN Registered',
'CERN Shared'
]
"""Cern oauth identityClass values that are allowed to be used."""
BASE_APP = dict(
title='CERN',
description='Connecting to CERN Organization.',
icon='',
authorized_handler='invenio_oauthclient.handlers'
':authorized_signup_handler',
disconnect_handler='invenio_oauthclient.contrib.cern'
':disconnect_handler',
signup_handler=dict(
info='invenio_oauthclient.contrib.cern:account_info',
setup='invenio_oauthclient.contrib.cern:account_setup',
view='invenio_oauthclient.handlers:signup_handler',
),
logout_url='https://login.cern.ch/adfs/ls/?wa=wsignout1.0',
params=dict(
base_url='https://oauth.web.cern.ch/',
request_token_url=None,
......@@ -146,8 +208,43 @@ REMOTE_APP = dict(
'show_login': 'true'}
)
)
REMOTE_APP = dict(BASE_APP)
REMOTE_APP.update(dict(
authorized_handler='invenio_oauthclient.handlers'
':authorized_signup_handler',
disconnect_handler='invenio_oauthclient.contrib.cern'
':disconnect_handler',
signup_handler=dict(
info='invenio_oauthclient.contrib.cern:account_info',
setup='invenio_oauthclient.contrib.cern:account_setup',
view='invenio_oauthclient.handlers:signup_handler',
)
))
"""CERN Remote Application."""
REMOTE_REST_APP = dict(BASE_APP)
REMOTE_REST_APP.update(dict(
authorized_handler='invenio_oauthclient.handlers.rest'
':authorized_signup_handler',
disconnect_handler='invenio_oauthclient.contrib.cern'
':disconnect_rest_handler',
signup_handler=dict(
info='invenio_oauthclient.contrib.cern:account_info_rest',
setup='invenio_oauthclient.contrib.cern:account_setup',
view='invenio_oauthclient.handlers.rest:signup_handler',
),
response_handler=(
'invenio_oauthclient.handlers.rest:default_remote_response_handler'
),
authorized_redirect_url='/',
disconnect_redirect_url='/',
signup_redirect_url='/',
error_redirect_url='/'
))
"""CERN Remote REST Application."""
REMOTE_SANDBOX_APP = copy.deepcopy(REMOTE_APP)
"""CERN Sandbox Remote Application."""
......@@ -160,10 +257,29 @@ REMOTE_SANDBOX_APP['params'].update(dict(
REMOTE_APP_RESOURCE_API_URL = 'https://oauthresource.web.cern.ch/api/Me'
REMOTE_APP_RESOURCE_SCHEMA = 'http://schemas.xmlsoap.org/claims/'
cern_oauth_blueprint = Blueprint('cern_oauth', __name__)
@cern_oauth_blueprint.route('/cern/logout')
def logout():
"""CERN logout view."""
logout_url = REMOTE_APP['logout_url']
apps = current_app.config.get('OAUTHCLIENT_REMOTE_APPS')
if apps:
cern_app = apps.get('cern', REMOTE_APP)
logout_url = cern_app['logout_url']
return redirect(logout_url, code=302)
def find_remote_by_client_id(client_id):
"""Return a remote application based with given client ID."""
current_app.logger.debug(f"find_remote_by_client_id: client_id={client_id}")
for remote in current_oauthclient.oauth.remote_apps.values():
current_app.logger.debug(f"Remote: consumer_key={remote.consumer_key} name={remote.name}")
if remote.name == 'cern' and remote.consumer_key == client_id:
return remote
......@@ -190,6 +306,19 @@ def fetch_groups(groups):
return groups
def fetch_extra_data(resource):
"""Return a dict with extra data retrieved from cern oauth."""
person_id = resource.get('PersonID', [None])[0]
identity_class = resource.get('IdentityClass', [None])[0]
department = resource.get('Department', [None])[0]
return dict(
person_id=person_id,
identity_class=identity_class,
department=department
)
def should_refresh_groups(extra_data_updated=None, refresh_timedelta=None):
"""Check if updating the groups is needed."""
updated = datetime.utcnow()
......@@ -207,18 +336,30 @@ def should_refresh_groups(extra_data_updated=None, refresh_timedelta=None):
return True
def account_groups(account, resource, refresh_timedelta=None):
"""Fetch account groups from resource if necessary."""
def account_groups_and_extra_data(account, resource,
refresh_timedelta=None):
"""Fetch account groups and extra data from resource if necessary."""
updated = datetime.utcnow()
modified_since = updated
if refresh_timedelta is not None:
modified_since += refresh_timedelta
modified_since = modified_since.isoformat()
last_update = account.extra_data.get('updated', modified_since)
if last_update > modified_since:
return account.extra_data.get('groups', [])
groups = fetch_groups(resource['Group'])
extra_data = current_app.config.get(
'OAUTHCLIENT_CERN_EXTRA_DATA_SERIALIZER',
fetch_extra_data
)(resource)
account.extra_data.update(
groups=groups,
updated=updated.isoformat(),
**extra_data
)
db.session.commit()
return groups
......@@ -252,6 +393,8 @@ def get_dict_from_response(response):
def get_user_resources_ldap(user):
current_app.logger.debug(f"get_user_resources_ldap")
import ldap
from flask import jsonify
# assert not isinstance(user, AnonymousUser)
......@@ -276,9 +419,8 @@ def get_user_resources_ldap(user):
res = res[0][1]
groups = []
if res['mail'][0].decode("utf-8") == user.email:
if res['mail'][0] == user.email:
for group in res['memberOf']: