Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
webservices
cern-search
cern-search-rest-api
Commits
3b6150b1
Commit
3b6150b1
authored
Mar 20, 2019
by
Pablo Panero
Browse files
mappings/jsonschemas[webservices]: refactor to comply with _data field and improve analysis
parent
7b29992a
Changes
3
Hide whitespace changes
Inline
Side-by-side
cern_search_rest_api/modules/cernsearch/jsonschemas/webservices/webservices/generic_website_v
1
.0.0.json
→
cern_search_rest_api/modules/cernsearch/jsonschemas/webservices/webservices/generic_website_v
2
.0.0.json
View file @
3b6150b1
{
"title"
:
"Webservice Generic Website schema v1.0.0"
,
"id"
:
"http://0.0.0.0:5000/schemas/webservices/generic_website_v
1
.0.0.json"
,
"$schema"
:
"http://0.0.0.0:5000/schemas/webservices/generic_website_v
1
.0.0.json"
,
"id"
:
"http://0.0.0.0:5000/schemas/webservices/generic_website_v
2
.0.0.json"
,
"$schema"
:
"http://0.0.0.0:5000/schemas/webservices/generic_website_v
2
.0.0.json"
,
"type"
:
"object"
,
"properties"
:
{
"_access"
:
{
...
...
@@ -33,31 +33,61 @@
}
}
},
"name"
:
{
"type"
:
"string"
,
"description"
:
"Website name or title."
},
"url"
:
{
"type"
:
"string"
,
"description"
:
"Website URL."
},
"origin"
:
{
"type"
:
"string"
,
"description"
:
"Website origin. Meaning FL or ATT, or SRC if its equals than url."
},
"last_updated"
:
{
"type"
:
"string"
,
"description"
:
"Datetime of when the web page content was last updated."
},
"content"
:
{
"type"
:
"string"
,
"description"
:
"Website content."
},
"extras"
:
{
"_data"
:
{
"type"
:
"object"
,
"description"
:
"Attachments and followed links present in the website"
"properties"
:
{
"name"
:
{
"type"
:
"string"
,
"description"
:
"Website name or title"
},
"url"
:
{
"type"
:
"string"
,
"description"
:
"Full path website URL"
},
"website"
:
{
"type"
:
"string"
,
"description"
:
"Base website URL"
},
"analytics_relevance"
:
{
"type"
:
"number"
,
"description"
:
"Matomo analytics based relevance of the site"
},
"origin"
:
{
"type"
:
"string"
,
"description"
:
"Website origin. Meaning FL or ATT, or SRC if its equals than url"
},
"content"
:
{
"type"
:
"string"
,
"description"
:
"Website content"
},
"extras"
:
{
"type"
:
"array"
,
"description"
:
"Attachments and followed links present in the website"
,
"items"
:
{
"type"
:
"object"
,
"properties"
:
{
"name"
:
{
"type"
:
"string"
,
"description"
:
"Linked website name or title"
},
"url"
:
{
"type"
:
"string"
,
"description"
:
"Linked website full path website URL"
},
"content"
:
{
"type"
:
"string"
,
"description"
:
"Linked website content"
}
}
}
},
"last_updated"
:
{
"type"
:
"string"
,
"description"
:
"Datetime of when the web page content was last updated"
}
}
},
"c
ustom_pid
"
:
{
"c
ontrol_number
"
:
{
"type"
:
"string"
},
"$schema"
:
{
...
...
cern_search_rest_api/modules/cernsearch/mappings/webservices/v6/webservices/generic_website_v1.0.0.json
deleted
100644 → 0
View file @
7b29992a
{
"settings"
:
{
"index.percolator.map_unmapped_fields_as_string"
:
true
,
"index.mapping.total_fields.limit"
:
50
},
"mappings"
:
{
"generic_website_v1.0.0"
:
{
"numeric_detection"
:
true
,
"_meta"
:
{
"_owner"
:
"CernSearch-Administrators@cern.ch"
},
"properties"
:
{
"_access"
:
{
"type"
:
"object"
,
"properties"
:
{
"owner"
:{
"type"
:
"keyword"
},
"read"
:
{
"type"
:
"keyword"
},
"update"
:
{
"type"
:
"keyword"
},
"delete"
:
{
"type"
:
"keyword"
}
}
},
"_search_all"
:
{
"type"
:
"text"
,
"fields"
:
{
"english"
:
{
"type"
:
"text"
,
"analyzer"
:
"english"
},
"french"
:
{
"type"
:
"text"
,
"analyzer"
:
"french"
}
}
},
"name"
:
{
"type"
:
"text"
,
"copy_to"
:
"_search_all"
},
"url"
:
{
"type"
:
"keyword"
,
"copy_to"
:
"_search_all"
},
"origin"
:
{
"type"
:
"keyword"
},
"content"
:
{
"type"
:
"text"
,
"copy_to"
:
"_search_all"
},
"extras"
:
{
"type"
:
"nested"
,
"properties"
:
{
"title"
:
{
"type"
:
"keyword"
,
"copy_to"
:
"_search_all"
},
"content"
:
{
"type"
:
"text"
,
"copy_to"
:
"_search_all"
}
}
},
"last_updated"
:
{
"type"
:
"date"
,
"format"
:
"yyyy-MM-ddZHH:mm"
},
"custom_pid"
:
{
"type"
:
"keyword"
},
"$schema"
:
{
"enabled"
:
false
}
}
}
}
}
\ No newline at end of file
cern_search_rest_api/modules/cernsearch/mappings/webservices/v6/webservices/generic_website_v2.0.0.json
0 → 100644
View file @
3b6150b1
{
"settings"
:
{
"index.percolator.map_unmapped_fields_as_string"
:
true
,
"index.mapping.total_fields.limit"
:
50
,
"analysis"
:
{
"analyzer"
:
{
"url_analyzer"
:
{
"type"
:
"standard"
,
"stopwords"
:
[
"http"
,
"https"
,
"ftp"
,
"www"
,
"web"
,
"cern"
,
"ch"
]
}
}
}
},
"mappings"
:
{
"generic_website_v2.0.0"
:
{
"numeric_detection"
:
true
,
"_meta"
:
{
"_owner"
:
"CernSearch-Administrators@cern.ch"
},
"properties"
:
{
"_access"
:
{
"type"
:
"object"
,
"properties"
:
{
"owner"
:{
"type"
:
"keyword"
},
"read"
:
{
"type"
:
"keyword"
},
"update"
:
{
"type"
:
"keyword"
},
"delete"
:
{
"type"
:
"keyword"
}
}
},
"_data"
:
{
"type"
:
"object"
,
"properties"
:
{
"name"
:
{
"type"
:
"keyword"
,
"fields"
:
{
"french"
:
{
"type"
:
"text"
,
"analyzer"
:
"french"
},
"english"
:
{
"type"
:
"text"
,
"analyzer"
:
"english"
}
}
},
"url"
:
{
"type"
:
"keyword"
,
"fields"
:
{
"full_text"
:
{
"type"
:
"text"
,
"analyzer"
:
"url_analyzer"
}
}
},
"website"
:
{
"type"
:
"keyword"
,
"fields"
:
{
"full_text"
:
{
"type"
:
"text"
,
"analyzer"
:
"url_analyzer"
}
}
},
"analytics_relevance"
:
{
"type"
:
"double"
},
"origin"
:
{
"type"
:
"keyword"
,
"fields"
:
{
"full_text"
:
{
"type"
:
"text"
,
"analyzer"
:
"url_analyzer"
}
}
},
"content"
:
{
"type"
:
"text"
,
"analyzer"
:
"english"
,
"fields"
:
{
"french"
:
{
"type"
:
"text"
,
"analyzer"
:
"french"
}
}
},
"extras"
:
{
"type"
:
"object"
,
"properties"
:
{
"name"
:
{
"type"
:
"keyword"
,
"fields"
:
{
"french"
:
{
"type"
:
"text"
,
"analyzer"
:
"french"
},
"english"
:
{
"type"
:
"text"
,
"analyzer"
:
"english"
}
}
},
"url"
:
{
"type"
:
"keyword"
,
"fields"
:
{
"full_text"
:
{
"type"
:
"text"
,
"analyzer"
:
"url_analyzer"
}
}
},
"content"
:
{
"type"
:
"text"
,
"analyzer"
:
"english"
,
"fields"
:
{
"french"
:
{
"type"
:
"text"
,
"analyzer"
:
"french"
}
}
}
}
},
"last_updated"
:
{
"type"
:
"date"
,
"format"
:
"yyyy-MM-ddZHH:mm"
}
}
},
"control_number"
:
{
"type"
:
"keyword"
},
"$schema"
:
{
"enabled"
:
false
}
}
}
}
}
\ No newline at end of file
Pablo Panero
@ppanero
mentioned in issue
web-indexer#1 (closed)
·
Mar 20, 2019
mentioned in issue
web-indexer#1 (closed)
mentioned in issue web-indexer#1
Toggle commit list
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment