Commit 1980a259 authored by Pablo Panero's avatar Pablo Panero
Browse files

Merge branch 'dev' into 'master'

Release 0.5.0

See merge request webservices/cern-search/cern-search-rest-api!41
parents 7ffa132e 4a59fe2c
......@@ -24,7 +24,9 @@ secrets/
# Local env source
env-*.sh
*.ini
# Debug and other logs
*.log
*.pid
......@@ -9,8 +9,8 @@ variables:
### also the variable name
RESOURCE: cern-search-rest-api
### OpenShift namespace and server values
NAMESPACE_DEV: test-cern-search-master
OPENSHIFT_SERVER_DEV: https://openshift-dev.cern.ch
NAMESPACE_DEV: cern-search-master
OPENSHIFT_SERVER_DEV: https://openshift.cern.ch
NAMESPACE_PROD: cern-search-master
OPENSHIFT_SERVER_PROD: https://openshift.cern.ch
......@@ -70,7 +70,7 @@ tag_image_dev: &tag_image_openshift
script:
- oc tag --source=docker ${CI_REGISTRY_IMAGE}:${CI_COMMIT_TAG} ${RESOURCE}:${CI_COMMIT_TAG} --token=${TOKEN} --server=${OPENSHIFT_SERVER} -n ${NAMESPACE}
variables:
TOKEN: ${SERVICE_ACCOUNT_TOKEN_DEV}
TOKEN: ${SERVICE_ACCOUNT_TOKEN_PROD}
NAMESPACE: ${NAMESPACE_DEV}
OPENSHIFT_SERVER: ${OPENSHIFT_SERVER_DEV}
......@@ -92,7 +92,7 @@ import_image_dev:
script:
- oc import-image ${RESOURCE}:${CI_COMMIT_TAG:-latest} --token=${TOKEN} --server=${OPENSHIFT_SERVER} -n ${NAMESPACE}
variables:
TOKEN: ${SERVICE_ACCOUNT_TOKEN_DEV}
TOKEN: ${SERVICE_ACCOUNT_TOKEN_PROD}
OPENSHIFT_SERVER: ${OPENSHIFT_SERVER_DEV}
NAMESPACE: ${NAMESPACE_DEV}
......
......@@ -11,7 +11,8 @@ RUN yum update -y && \
python-pip \
gcc \
openssl \
npm && \
npm \
openldap-devel && \
pip install --upgrade pip setuptools wheel
ADD requirements.txt /tmp
......
......@@ -62,6 +62,7 @@ JSONSCHEMAS_REGISTER_ENDPOINTS_UI = True
# TODO use ES central service. Change INDEXER_RECORD_TO_INDEX = 'invenio_indexer.utils.default_record_to_index'
INDEX_PREFIX = os.getenv('CERN_SEARCH_INDEX_PREFIX', 'cernsearch')
INDEXER_DEFAULT_DOC_TYPE = os.getenv('CERN_SEARCH_DEFAULT_DOC_TYPE', 'doc_v0.0.1')
INDEXER_DEFAULT_INDEX = os.getenv('CERN_SEARCH_DEFAULT_INDEX', 'cernsearch-test-doc_v0.0.1')
......@@ -107,6 +108,11 @@ RECORDS_REST_ENDPOINTS = dict(
)
)
# App
# ===
RATELIMIT_DEFAULT = os.getenv('CERN_SEARCH_INSTANCE_RATELIMIT', '5000/hour')
# Flask Security
# ==============
# Avoid error upon registration with email sending
......
......@@ -117,6 +117,4 @@ def egroup_admin():
user_provides = get_user_provides()
# set.isdisjoint() is faster than set.intersection()
admin_access_groups = admin_access_groups.split(',')
if user_provides and not set(user_provides).isdisjoint(set(admin_access_groups)):
return True
return False
return user_provides and not set(user_provides).isdisjoint(set(admin_access_groups))
......@@ -2,11 +2,10 @@
# -*- coding: utf-8 -*-
from flask_security import current_user
from flask import request, g, current_app
from invenio_indexer.utils import default_record_to_index
from flask import request, current_app
from invenio_search import current_search_client
from cern_search_rest_api.modules.cernsearch.utils import get_user_provides
from cern_search_rest_api.modules.cernsearch.utils import get_user_provides, cern_search_record_to_index
"""Access control for CERN Search."""
......@@ -85,76 +84,96 @@ class RecordPermission(object):
def has_owner_permission(user, record=None):
"""Check if user is authenticated and has create access"""
log_action(user, 'CREATE/OWNER')
if user.is_authenticated:
# Allow based in the '_access' key
user_provides = get_user_provides()
es_index, doc = get_index_from_request(record)
current_app.logger.debug('Using index {idx} and doc {doc}'.format(idx=es_index, doc=doc))
if current_search_client.indices.exists([es_index]):
mapping = current_search_client.indices.get_mapping([es_index])
if mapping is not None:
current_app.logger.debug('Using mapping for {idx}'.format(idx=es_index))
current_app.logger.debug('Mapping {mapping}'.format(mapping=mapping))
# set.isdisjoint() is faster than set.intersection()
create_access_groups = mapping[es_index]['mappings'][doc]['_meta']['_owner'].split(',')
if user_provides and not set(user_provides).isdisjoint(set(create_access_groups)):
current_app.logger.debug('User authenticated correctly')
return True
current_app.logger.debug('Could not authenticate user, group sets are disjoint')
return False
def get_index_from_request(record=None):
if record is not None and record.get('$schema', '') is not None:
return default_record_to_index(record)
return cern_search_record_to_index(record)
current_app.logger.debug('get_index_from_schema() No record or no $schema in it, using defaults')
return (current_app.config['INDEXER_DEFAULT_INDEX'],
current_app.config['INDEXER_DEFAULT_DOC_TYPE'])
def has_list_permission(user, record=None):
"""Check if user is authenticated and has create access"""
return user.is_authenticated
if user:
log_action(user, 'LIST')
return user.is_authenticated
else:
return False
def has_update_permission(user, record):
"""Check if user is authenticated and has update access"""
log_action(user, 'UPDATE')
if user.is_authenticated:
# Allow based in the '_access' key
user_provides = get_user_provides()
# set.isdisjoint() is faster than set.intersection()
update_access_groups = record['_access']['update']
if check_elasticsearch(record) and user_provides and has_owner_permission(user) and \
(
not set(user_provides).isdisjoint(set(update_access_groups))
or is_admin(user)
):
(
not set(user_provides).isdisjoint(set(update_access_groups))
or is_admin(user)
):
current_app.logger.debug('Group sets not disjoint, user allowed')
return True
return False
def has_read_record_permission(user, record):
"""Check if user is authenticated and has read access. This implies reading one document"""
log_action(user, 'READ')
if user.is_authenticated:
# Allow based in the '_access' key
user_provides = get_user_provides()
# set.isdisjoint() is faster than set.intersection()
read_access_groups = record['_access']['read']
if check_elasticsearch(record) and user_provides and has_owner_permission(user) and \
(
not set(user_provides).isdisjoint(set(read_access_groups))
or is_admin(user)
):
try:
read_access_groups = record['_access']['read']
if check_elasticsearch(record) and user_provides and has_owner_permission(user) and \
(
not set(user_provides).isdisjoint(set(read_access_groups))
or is_admin(user)
):
current_app.logger.debug('Group sets not disjoint, user allowed')
return True
except KeyError:
return True
return False
def has_delete_permission(user, record):
"""Check if user is authenticated and has delete access"""
log_action(user, 'DELETE')
if user.is_authenticated:
# Allow based in the '_access' key
user_provides = get_user_provides()
# set.isdisjoint() is faster than set.intersection()
delete_access_groups = record['_access']['delete']
if check_elasticsearch(record) and user_provides and has_owner_permission(user) and \
(
not set(user_provides).isdisjoint(set(delete_access_groups))
or is_admin(user)
):
(
not set(user_provides).isdisjoint(set(delete_access_groups))
or is_admin(user)
):
current_app.logger.debug('Group sets not disjoint, user allowed')
return True
return False
......@@ -194,6 +213,7 @@ def has_admin_view_permission(user):
# set.isdisjoint() is faster than set.intersection()
admin_access_groups = admin_access_groups.split(',')
if user_provides and not set(user_provides).isdisjoint(set(admin_access_groups)):
current_app.logger.debug('User has admin view access')
return True
return False
......@@ -215,6 +235,7 @@ def is_admin(user):
"""Check if the user is administrator"""
admin_user = current_app.config['ADMIN_USER']
if user.email == admin_user or user.email.replace('@cern.ch', '') == admin_user:
current_app.logger.debug('User {user} is admin'.format(user=user.email))
return True
return False
......@@ -234,3 +255,15 @@ def check_elasticsearch(record=None):
search = search.get_record(str(record.id))
return search.count() == 1
return False
def log_action(user, action):
try:
email = user.email
except AttributeError:
email = 'Anonymous'
current_app.logger.debug('Action {action} - user {usr} authenticated: {status}'.format(
action=action,
usr=email,
status=user.is_authenticated
))
......@@ -6,7 +6,7 @@
from flask import g
from flask import current_app
from invenio_search import current_search
from invenio_search.utils import schema_to_index
from invenio_search.utils import schema_to_index, build_index_name
def get_user_provides():
......@@ -23,21 +23,28 @@ def cern_search_record_to_index(record):
:param record: The record object.
:returns: Tuple (index, doc_type).
"""
INDEX_PREFIX = current_app.config['CERN_SEARCH_DEFAULT_INDEX_PREFIX']
prefix = current_app.config['INDEX_PREFIX']
index_names = current_search.mappings.keys()
schema = record.get('$schema', '')
if isinstance(schema, dict):
schema = schema.get('$ref', '')
aux = current_app.config['CERN_SEARCH_INDEX_PREFIX']
if aux:
INDEX_PREFIX = aux
index, doc_type = schema_to_index(schema, index_names=index_names)
if index and doc_type:
return '{0}{1}'.format(INDEX_PREFIX, index), doc_type
else:
return ('{0}{1}'.format(current_app.config['CERN_SEARCH_DEFAULT_INDEX_PREFIX'],
current_app.config['INDEXER_DEFAULT_INDEX']),
current_app.config['INDEXER_DEFAULT_DOC_TYPE'])
parts = schema.split('/')
if index_names:
for start in range(len(parts)):
index_name = build_index_name(*parts[start:])
if index_name in index_names:
if index_name.startswith(prefix) and len(index_name) > len(prefix) + 2:
return index_name, index_name[len(prefix) + 1:]
current_app.logger.debug('Index {0}{1} - Doc {2}'.format(
current_app.config['CERN_SEARCH_INDEX_PREFIX'],
current_app.config['INDEXER_DEFAULT_INDEX'],
current_app.config['INDEXER_DEFAULT_DOC_TYPE'])
)
return ('{0}{1}'.format(current_app.config['CERN_SEARCH_INDEX_PREFIX'],
current_app.config['INDEXER_DEFAULT_INDEX']),
current_app.config['INDEXER_DEFAULT_DOC_TYPE'])
\ No newline at end of file
......@@ -54,10 +54,7 @@
"description": "Website content."
},
"extras": {
"type": "array",
"items": {
"type": "string"
},
"type": "object",
"description": "Attachments and followed links present in the website"
},
"custom_pid": {
......
......@@ -60,15 +60,13 @@
}
},
"extras": {
"type": "text",
"fields": {
"english": {
"type": "text",
"analyzer": "english"
"type": "nested",
"properties": {
"title": {
"type": "keyword"
},
"french": {
"type": "text",
"analyzer": "french"
"content": {
"type": "text"
}
}
},
......
......@@ -8,6 +8,7 @@ invenio-config>=1.0.0,<1.2.0
invenio-db[postgresql,versioning]>=1.0.0,<1.2.0
invenio-indexer[elasticsearch5]>=1.0.0,<1.2.0
invenio-jsonschemas>=1.0.0,<1.2.0
invenio-logging>=1.0.0,<1.1.0
invenio-records-rest[elasticsearch5]>=1.1.1,<1.2.0
invenio-records[postgresql]>=1.0.0,<1.2.0
invenio-rest[cors]>=1.0.0,<1.2.0
......@@ -15,6 +16,10 @@ invenio-oauthclient>=1.0.0,<1.2.0
invenio_oauth2server>=1.0.0,<1.2.0
invenio-search[elasticsearch5]>=1.0.0,<1.2.0
invenio-theme>=1.0.0,<1.2.0
ldap>=1.0.2,<1.1.0
npm>=0.1.1
python-ldap>=3.1.0,<3.2.0
raven>=6.9.0,<6.10.0
redis>=2.10.0
uWSGI>=2.0.16
\ No newline at end of file
uWSGI>=2.0.16
uwsgi-tools>=1.1.1,<1.2.0
......@@ -16,3 +16,7 @@ npm install
invenio collect -v
invenio assets build
mv /code/static/${LOGO_PATH} ${INVENIO_INSTANCE_PATH}/static/${LOGO_PATH}
# PID File for uWSGI
touch /code/uwsgi.pid
chmod 666 /code/uwsgi.pid
......@@ -6,8 +6,6 @@
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
# NOTE: This is a patch to fix the Oauth refresh token (More can be found in the README.md file)
"""Pre-configured remote application for enabling sign in/up with CERN.
1. Edit your configuration and add:
......@@ -195,25 +193,35 @@ def fetch_groups(groups):
return groups
def account_groups(account, resource, refresh_timedelta=None):
"""Fetch account groups from resource if necessary."""
def should_refresh_groups(extra_data_updated=None, refresh_timedelta=None):
"""Check if updating the groups is needed."""
updated = datetime.utcnow()
modified_since = updated
if refresh_timedelta is not None:
modified_since += refresh_timedelta
modified_since = modified_since.isoformat()
last_update = account.extra_data.get('updated', modified_since)
if updated is None:
updated = modified_since
last_update = extra_data_updated
if last_update > modified_since:
return False
#if last_update > modified_since:
groups_db = account.extra_data.get('groups', [])
if groups_db is not None and groups_db:
return account.extra_data.get('groups', [])
return True
def account_groups(account, resource, refresh_timedelta=None):
"""Fetch account groups from resource if necessary."""
updated = datetime.utcnow()
groups = fetch_groups(resource['Group'])
account.extra_data.update(
groups=groups,
updated=updated.isoformat(),
)
db.session.commit()
return groups
......@@ -246,6 +254,45 @@ def get_dict_from_response(response):
return result
def get_user_resources_ldap(user):
import ldap
from flask import jsonify
# assert not isinstance(user, AnonymousUser)
query=user.email
if not query:
return jsonify([])
lc = ldap.initialize('ldap://xldap.cern.ch')
lc.search_ext(
'OU=Users,OU=Organic Units,DC=cern,DC=ch',
ldap.SCOPE_ONELEVEL,
'mail=*{}*'.format(query),
# rf,
['memberOf', 'mail', 'uidNumber', 'displayName'],
serverctrls=[ldap.controls.SimplePagedResultsControl(
True, size=20, cookie='')]
)
res = lc.result()[1]
res = res[0][1]
groups = []
if res['mail'][0] == user.email:
for group in res['memberOf']:
group = group.split(',')[0]
group = group.split('=')[1]
groups.append(group)
return {
'groups': groups,
'email': user.email,
'cern_uid': res['uidNumber'][0],
'name': res['displayName'][0]
}
def get_resource(remote):
"""Query CERN Resources to get user info and groups."""
cached_resource = session.pop('cern_resource', None)
......@@ -253,9 +300,21 @@ def get_resource(remote):
return cached_resource
response = remote.get(REMOTE_APP_RESOURCE_API_URL)
dict_response = get_dict_from_response(response)
session['cern_resource'] = dict_response
return dict_response
if response.status == 200:
dict_response = get_dict_from_response(response)
session['cern_resource'] = dict_response
return dict_response
else:
response = get_user_resources_ldap(current_user)
r = {}
r['EmailAddress'] = [response['email']]
r['uidNumber'] = [response['cern_uid']]
r['CommonName'] = [response['name']]
r['DisplayName'] = [response['name']]
r['Group'] = response['groups']
return r
def account_info(remote, resp):
......@@ -301,21 +360,20 @@ def account_setup(remote, token, resp):
"""Perform additional setup after user have been logged in."""
resource = get_resource(remote)
with db.session.begin_nested():
external_id = resource['uidNumber'][0]
external_id = resource['uidNumber'][0]
# Set CERN person ID in extra_data.
token.remote_account.extra_data = {
'external_id': external_id,
}
groups = account_groups(token.remote_account, resource)
assert not isinstance(g.identity, AnonymousIdentity)
extend_identity(g.identity, groups)
# Set CERN person ID in extra_data.
token.remote_account.extra_data = {
'external_id': external_id,
}
groups = account_groups(token.remote_account, resource)
assert not isinstance(g.identity, AnonymousIdentity)
extend_identity(g.identity, groups)
user = token.remote_account.user
user = token.remote_account.user
# Create user <-> external id link.
oauth_link_external_id(user, dict(id=external_id, method='cern'))
# Create user <-> external id link.
oauth_link_external_id(user, dict(id=external_id, method='cern'))
@identity_changed.connect
......@@ -332,19 +390,25 @@ def on_identity_changed(sender, identity):
user_id=current_user.get_id(),
client_id=client_id,
)
groups = []
if account:
remote = find_remote_by_client_id(client_id)
resource = get_resource(remote)
refresh = current_app.config.get(
groups = account.extra_data.get('groups', [])
resources_last_updated = account.extra_data.get('updated', None)
refresh_timedelta = current_app.config.get(
'OAUTHCLIENT_CERN_REFRESH_TIMEDELTA',
OAUTHCLIENT_CERN_REFRESH_TIMEDELTA
)
groups.extend(
account_groups(account, resource, refresh_timedelta=refresh)
)
extend_identity(identity, groups)
if should_refresh_groups(resources_last_updated, refresh_timedelta):
remote = find_remote_by_client_id(client_id)
resource = get_resource(remote)
groups.extend(
account_groups(account, resource, refresh_timedelta=refresh_timedelta)
)
extend_identity(identity, groups)
@identity_loaded.connect
......
......@@ -50,6 +50,7 @@ install_requires = [
'invenio-db[postgresql,versioning]>=1.0.2,<1.2.0',
'invenio-indexer[elasticsearch5]>=1.0.0,<1.2.0',
'invenio-jsonschemas>=1.0.0,<1.2.0',
'invenio-logging>=1.0.0,<1.1.0',
'invenio-records-rest[elasticsearch5]>=1.1.1,<1.2.0',
'invenio-records[postgresql]>=1.0.0,<1.2.0',
'invenio-rest[cors]>=1.0.0,<1.2.0',
......@@ -57,9 +58,12 @@ install_requires = [
'invenio_oauth2server>=1.0.0,<1.2.0',
'invenio-search[elasticsearch5]>=1.0.0,<1.2.0',
'invenio-theme>=1.0.0,<1.2.0',
'python-ldap>=3.1.0,<3.2.0',
'raven>=6.9.0,<6.10.0'
'redis>=2.10.0',
'npm>=0.1.1',
'uWSGI>=2.0.16',
'uwsgi-tools>=1.1.1,<1.2.0',
'idna>=2.5,<2.7',
'urllib3<1.23', # Needed until invenio-search[elasticsearch] is updated to 6 (depends on central service version)
]
......
......@@ -74,6 +74,11 @@ objects:
secretKeyRef:
name: oauth
key: oauth_credentials
- name: INDEXER_SENTRY_DSN
valueFrom:
secretKeyRef:
name: sentry
key: dsn
image: gitlab-registry.cern.ch/webservices/cern-search/cern-search-rest-api:latest
imagePullPolicy: Always
name: cern-search-rest-api
......@@ -319,6 +324,8 @@ objects:
data:
# Invenio
INVENIO_INSTANCE_PATH: ${INSTANCE_PATH}
# Invenio Logging
INVENIO_LOGGING_SENTRY_LEVEL: ${LOGGING_LEVEL}
# App to allow hosts
INVENIO_APP_ALLOWED_HOSTS: ${ALLOWED_HOSTS}
# Invenio Theme
......@@ -363,3 +370,6 @@ parameters:
- name: ADMIN_UI_ACCESS_LIST
description: "List of comma separated egroups that have access to the ADMIN UI (e.g. 'egroup_one,egroup_two')"
value: "CernSearch-Administrators@cern.ch"
- name: LOGGING_LEVEL
description: "Logging level of the application"
value: 'WARNING'
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment