Commit 3b6150b1 authored by Pablo Panero's avatar Pablo Panero
Browse files

mappings/jsonschemas[webservices]: refactor to comply with _data field and improve analysis

parent 7b29992a
{
"title": "Webservice Generic Website schema v1.0.0",
"id": "http://0.0.0.0:5000/schemas/webservices/generic_website_v1.0.0.json",
"$schema": "http://0.0.0.0:5000/schemas/webservices/generic_website_v1.0.0.json",
"id": "http://0.0.0.0:5000/schemas/webservices/generic_website_v2.0.0.json",
"$schema": "http://0.0.0.0:5000/schemas/webservices/generic_website_v2.0.0.json",
"type": "object",
"properties": {
"_access": {
......@@ -33,31 +33,61 @@
}
}
},
"name": {
"type": "string",
"description": "Website name or title."
},
"url": {
"type": "string",
"description": "Website URL."
},
"origin": {
"type": "string",
"description": "Website origin. Meaning FL or ATT, or SRC if its equals than url."
},
"last_updated": {
"type": "string",
"description": "Datetime of when the web page content was last updated."
},
"content": {
"type": "string",
"description": "Website content."
},
"extras": {
"_data": {
"type": "object",
"description": "Attachments and followed links present in the website"
"properties": {
"name": {
"type": "string",
"description": "Website name or title"
},
"url": {
"type": "string",
"description": "Full path website URL"
},
"website": {
"type": "string",
"description": "Base website URL"
},
"analytics_relevance": {
"type": "number",
"description": "Matomo analytics based relevance of the site"
},
"origin": {
"type": "string",
"description": "Website origin. Meaning FL or ATT, or SRC if its equals than url"
},
"content": {
"type": "string",
"description": "Website content"
},
"extras": {
"type": "array",
"description": "Attachments and followed links present in the website",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Linked website name or title"
},
"url": {
"type": "string",
"description": "Linked website full path website URL"
},
"content": {
"type": "string",
"description": "Linked website content"
}
}
}
},
"last_updated": {
"type": "string",
"description": "Datetime of when the web page content was last updated"
}
}
},
"custom_pid": {
"control_number": {
"type": "string"
},
"$schema": {
......
{
"settings": {
"index.percolator.map_unmapped_fields_as_string": true,
"index.mapping.total_fields.limit": 50
},
"mappings": {
"generic_website_v1.0.0": {
"numeric_detection": true,
"_meta": {
"_owner": "CernSearch-Administrators@cern.ch"
},
"properties": {
"_access": {
"type": "object",
"properties": {
"owner":{
"type": "keyword"
},
"read": {
"type": "keyword"
},
"update": {
"type": "keyword"
},
"delete": {
"type": "keyword"
}
}
},
"_search_all": {
"type": "text",
"fields": {
"english": {
"type": "text",
"analyzer": "english"
},
"french": {
"type": "text",
"analyzer": "french"
}
}
},
"name": {
"type": "text",
"copy_to": "_search_all"
},
"url": {
"type": "keyword",
"copy_to": "_search_all"
},
"origin": {
"type": "keyword"
},
"content": {
"type": "text",
"copy_to": "_search_all"
},
"extras": {
"type": "nested",
"properties": {
"title": {
"type": "keyword",
"copy_to": "_search_all"
},
"content": {
"type": "text",
"copy_to": "_search_all"
}
}
},
"last_updated": {
"type": "date",
"format": "yyyy-MM-ddZHH:mm"
},
"custom_pid": {
"type": "keyword"
},
"$schema": {
"enabled": false
}
}
}
}
}
\ No newline at end of file
{
"settings": {
"index.percolator.map_unmapped_fields_as_string": true,
"index.mapping.total_fields.limit": 50,
"analysis": {
"analyzer": {
"url_analyzer": {
"type": "standard",
"stopwords": ["http", "https", "ftp", "www", "web", "cern", "ch"]
}
}
}
},
"mappings": {
"generic_website_v2.0.0": {
"numeric_detection": true,
"_meta": {
"_owner": "CernSearch-Administrators@cern.ch"
},
"properties": {
"_access": {
"type": "object",
"properties": {
"owner":{
"type": "keyword"
},
"read": {
"type": "keyword"
},
"update": {
"type": "keyword"
},
"delete": {
"type": "keyword"
}
}
},
"_data": {
"type": "object",
"properties": {
"name": {
"type": "keyword",
"fields": {
"french": {
"type": "text",
"analyzer": "french"
},
"english": {
"type": "text",
"analyzer": "english"
}
}
},
"url": {
"type": "keyword",
"fields": {
"full_text": {
"type": "text",
"analyzer": "url_analyzer"
}
}
},
"website": {
"type": "keyword",
"fields": {
"full_text": {
"type": "text",
"analyzer": "url_analyzer"
}
}
},
"analytics_relevance" : {
"type": "double"
},
"origin": {
"type": "keyword",
"fields": {
"full_text": {
"type": "text",
"analyzer": "url_analyzer"
}
}
},
"content": {
"type": "text",
"analyzer": "english",
"fields": {
"french": {
"type": "text",
"analyzer": "french"
}
}
},
"extras": {
"type": "object",
"properties": {
"name": {
"type": "keyword",
"fields": {
"french": {
"type": "text",
"analyzer": "french"
},
"english": {
"type": "text",
"analyzer": "english"
}
}
},
"url": {
"type": "keyword",
"fields": {
"full_text": {
"type": "text",
"analyzer": "url_analyzer"
}
}
},
"content": {
"type": "text",
"analyzer": "english",
"fields": {
"french": {
"type": "text",
"analyzer": "french"
}
}
}
}
},
"last_updated": {
"type": "date",
"format": "yyyy-MM-ddZHH:mm"
}
}
},
"control_number": {
"type": "keyword"
},
"$schema": {
"enabled": false
}
}
}
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment