Commit 60508ac3 authored by Carina Antunes's avatar Carina Antunes Committed by Carina Antunes
Browse files

add support for files in indico

parent 19a6ffcd
......@@ -63,3 +63,5 @@ INVENIO_FILES_PROCESSOR_TIKA_SERVER_ENDPOINT=http://tika:9998
SQLALCHEMY_POOL_SIZE=10
SQLALCHEMY_MAX_OVERFLOW=15
CERN_SEARCH_COPY_TO_METADATA='True'
CERN_SEARCH_INSTANCE=indico
INVENIO_INDEXER_DEFAULT_DOC_TYPE=events_v1.0.0
INVENIO_INDEXER_DEFAULT_INDEX=indico-events_v1.0.0
CERN_SEARCH_PROCESS_FILE_META='["collection"]'
......@@ -74,18 +74,28 @@ def index_metadata(file_content, json, file_name):
"""Extract metadata from file to be indexed."""
metadata = extract_metadata_from_processor(file_content["metadata"])
if metadata.get("authors"):
index_specific_meta = isinstance(current_app.config.get("PROCESS_FILE_META"), list)
indexable_meta = current_app.config.get("PROCESS_FILE_META")
def should_index(field):
return not index_specific_meta or (index_specific_meta and field in indexable_meta)
if metadata.get("authors") and should_index(AUTHORS_KEY):
json[DATA_KEY][AUTHORS_KEY] = metadata.get("authors")
if metadata.get("content_type"):
if metadata.get("content_type") and should_index(COLLECTION_KEY):
json[COLLECTION_KEY] = metadata["content_type"]
if metadata.get("title"):
if metadata.get("title") and should_index(NAME_KEY):
json[DATA_KEY][NAME_KEY] = metadata["title"]
if metadata.get("keywords"):
if metadata.get("keywords") and should_index(KEYWORDS_KEY):
json[DATA_KEY][KEYWORDS_KEY] = metadata["keywords"]
if metadata.get("creation_date"):
if metadata.get("creation_date") and should_index(CREATION_KEY):
json[CREATION_KEY] = metadata["creation_date"]
if "." in file_name:
if "." in file_name and should_index(FILE_FORMAT_KEY):
json[FILE_FORMAT_KEY] = file_name.split(".")[-1]
......
......@@ -221,12 +221,10 @@
},
"type": {
"type": "keyword",
"copy_to": "collection",
"eager_global_ordinals": true
},
"collection": {
"type": "keyword",
"store" : true,
"eager_global_ordinals": true
},
"type_format": {
......@@ -266,6 +264,15 @@
"site": {
"type": "keyword"
},
"_bucket": {
"type": "keyword"
},
"_bucket_content": {
"type": "keyword"
},
"file": {
"type": "keyword"
},
"url": {
"type": "keyword"
},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment