Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
CroRIS
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
CMS-IRB
CroRIS
Compare revisions
b705e97124f9402396cd051ce0d44fc44942f908 to b50964002516349fc1c86b57942a791a23636dc5
Compare revisions
Changes are shown as if the
source
revision was being merged into the
target
revision.
Learn more about comparing revisions.
Source
CMS-IRB/CroRIS
Select target project
No results found
b50964002516349fc1c86b57942a791a23636dc5
Select Git revision
Swap
Target
CMS-IRB/CroRIS
Select target project
CMS-IRB/CroRIS
1 result
b705e97124f9402396cd051ce0d44fc44942f908
Select Git revision
Show changes
Only incoming changes from source
Include changes to target since source was created
Compare
Commits on Source (3)
added new journal
· 8d65a8ca
Dinko Ferencek
authored
6 months ago
8d65a8ca
updated description of the exclusion list format
· 9a2c5ee9
Dinko Ferencek
authored
6 months ago
9a2c5ee9
improved summary report
· b5096400
Dinko Ferencek
authored
6 months ago
b5096400
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
configuration.py
+4
-2
4 additions, 2 deletions
configuration.py
prepare_input.py
+66
-35
66 additions, 35 deletions
prepare_input.py
with
70 additions
and
37 deletions
configuration.py
View file @
b5096400
...
...
@@ -113,7 +113,8 @@ journals = {
'
Nature
'
:
'
Nature
'
,
'
Nature Phys.
'
:
'
Nature physics
'
,
'
Comput. Softw. Big Sci.
'
:
'
Computing and software for big science
'
,
'
Nucl. Instrum. Meth. A
'
:
'
Nuclear instruments & methods in physics research. Section A, Accelerators, spectrometers, detectors and associated equipment
'
'
Nucl. Instrum. Meth. A
'
:
'
Nuclear instruments & methods in physics research. Section A, Accelerators, spectrometers, detectors and associated equipment
'
,
'
Rept. Prog. Phys.
'
:
'
Reports on progress in physics
'
}
# ISSNs for known journals
...
...
@@ -129,7 +130,8 @@ issn = {
'
Nature
'
:
[
'
0028-0836
'
,
'
1476-4687
'
],
'
Nature Phys.
'
:
[
'
1745-2473
'
,
'
1745-2481
'
],
'
Comput. Softw. Big Sci.
'
:
[
'
2510-2036
'
,
'
2510-2044
'
],
'
Nucl. Instrum. Meth. A
'
:
[
'
0168-9002
'
,
'
1872-9576
'
]
'
Nucl. Instrum. Meth. A
'
:
[
'
0168-9002
'
,
'
1872-9576
'
],
'
Rept. Prog. Phys.
'
:
[
'
0034-4885
'
,
'
1361-6633
'
]
}
# --------------------------------------------------
This diff is collapsed.
Click to expand it.
prepare_input.py
View file @
b5096400
...
...
@@ -77,7 +77,7 @@ def get_volume(name, volume):
def
get_journal
(
name
):
if
name
not
in
journals
.
keys
():
return
"
*** Unknown journal
'"
+
name
+
"'
encountered! Please put it in the list of known journals or remove this article from the input list. ***
"
return
None
return
journals
[
name
]
...
...
@@ -102,11 +102,13 @@ def prepare_input(list_of_papers, output_file, configuration, exclusion_list):
dois
=
[]
data
=
[]
error
=
[]
counter
=
0
counter_skip
=
0
counter_error
=
0
unknownJournals
=
set
()
unknown_counter
=
0
skip_counter
=
0
excluded
=
[]
duplicates
=
[]
noAuthor
=
[]
invalidPage
=
[]
# Loop over all papers which are stored in bib
for
n
,
p
in
enumerate
(
list_of_papers
.
entries
,
1
):
...
...
@@ -119,12 +121,14 @@ def prepare_input(list_of_papers, output_file, configuration, exclusion_list):
doi_lower
=
doi
.
lower
()
# Skip excluded DOIs
if
doi_lower
in
exclusion_list
:
counter_skip
+=
1
skip_counter
+=
1
excluded
.
append
(
doi
)
print
(
'
\n
INFO: This paper with DOI:{} is excluded and will be skipped.
'
.
format
(
doi
))
continue
# Skip any duplicates
if
doi_lower
in
dois
:
counter_skip
+=
1
skip_counter
+=
1
duplicates
.
append
(
doi
)
print
(
'
\n
WARNING: This paper with DOI:{} is a duplicate and will be skipped.
'
.
format
(
doi
))
continue
else
:
...
...
@@ -132,7 +136,20 @@ def prepare_input(list_of_papers, output_file, configuration, exclusion_list):
# Get the arXiv paper id (if defined)
eprint
=
(
p
[
'
eprint
'
]
if
'
eprint
'
in
p
else
''
)
# Get fixed journal name (see more detailed description above)
journal_name
=
get_name
(
p
[
'
journal
'
],
p
[
'
volume
'
])
# Journal (according to CroRIS nomenclature)
journal
=
get_journal
(
journal_name
)
# Catch articles from unknown journals
if
journal
is
None
:
unknown_counter
+=
1
unknownJournals
.
add
(
journal_name
)
print
(
'
\n
WARNING: This paper with DOI:{} was published in an unknown journal {}. Skipping.
'
.
format
(
doi
,
journal_name
))
continue
# Fetch paper data from Inspire HEP in JSON format
# More info at: https://github.com/inspirehep/rest-api-doc
url
=
'
https://inspirehep.net/api/doi/{}
'
.
format
(
doi
)
...
...
@@ -206,7 +223,8 @@ def prepare_input(list_of_papers, output_file, configuration, exclusion_list):
# Check if any authors are found
if
len
(
authors_pretty
)
==
0
:
counter_skip
+=
1
skip_counter
+=
1
noAuthor
.
append
(
doi
)
print
(
'
\n
WARNING: No authors found for this paper with DOI:{}. Skipping.
'
.
format
(
doi
))
continue
...
...
@@ -307,15 +325,9 @@ def prepare_input(list_of_papers, output_file, configuration, exclusion_list):
# Year
year
=
p
[
'
year
'
]
# Get fixed journal name (see more detailed description above)
journal_name
=
get_name
(
p
[
'
journal
'
],
p
[
'
volume
'
])
# Volume
volume
=
get_volume
(
p
[
'
journal
'
],
p
[
'
volume
'
])
# Journal (according to CroRIS nomenclature)
journal
=
get_journal
(
journal_name
)
# ISSN
issn
=
get_issn
(
journal_name
)
...
...
@@ -398,6 +410,13 @@ def prepare_input(list_of_papers, output_file, configuration, exclusion_list):
_temp
[
'
ukupno_stranica
'
]
=
page_tot
validity_counter
[
1
]
+=
1
# Check page info status
if
validity_counter
[
0
]
<
2
and
validity_counter
[
1
]
<
2
:
skip_counter
+=
1
invalidPage
.
append
(
doi
)
print
(
'
\n
WARNING: This paper with DOI:{} has invalid page info. Skipping.
'
.
format
(
doi
))
continue
ml
=
[
{
"
jezik
"
:
"
en
"
,
...
...
@@ -424,13 +443,9 @@ def prepare_input(list_of_papers, output_file, configuration, exclusion_list):
projekti
.
append
(
p_dict
)
_temp
[
'
projekti
'
]
=
projekti
# Catch articles with unknown journal or invalid page info status
if
'
Unknown journal
'
in
journal
or
(
validity_counter
[
0
]
<
2
and
validity_counter
[
1
]
<
2
):
error
.
append
(
_temp
)
counter_error
+=
1
else
:
data
.
append
(
_temp
)
counter
+=
1
# Append paper info
data
.
append
(
_temp
)
print
(
'
\n
DOI:
'
,
doi
)
print
(
'
arXiv:
'
,
(
eprint
if
eprint
!=
''
else
'
N/A
'
))
...
...
@@ -452,22 +467,38 @@ def prepare_input(list_of_papers, output_file, configuration, exclusion_list):
print
(
'
------------------------------------------------
'
)
print
(
'
\n
%i paper(s) prepared for upload
\n
'
%
counter
)
if
counter_skip
>
0
:
print
(
'
%i paper(s) skipped
\n
'
%
counter_skip
)
if
counter_error
>
0
:
print
(
'
%i paper(s) with unknown journal(s)
\n
'
%
counter_error
)
print
(
'
\n
%i paper(s) prepared for upload
'
%
len
(
data
))
if
skip_counter
>
0
:
print
(
'
\n
%i paper(s) skipped:
'
%
skip_counter
)
if
len
(
excluded
)
>
0
:
print
(
'
\n
%i excluded DOIs:
\n
'
%
len
(
excluded
))
for
doi
in
excluded
:
print
(
'
{}
'
.
format
(
doi
))
if
len
(
duplicates
)
>
0
:
print
(
'
\n
%i duplicate DOIs:
\n
'
%
len
(
duplicates
))
for
doi
in
duplicates
:
print
(
'
{}
'
.
format
(
doi
))
if
len
(
noAuthor
)
>
0
:
print
(
'
\n
%i DOIs with missing author info:
\n
'
%
len
(
noAuthor
))
for
doi
in
noAuthor
:
print
(
'
{}
'
.
format
(
doi
))
if
len
(
invalidPage
)
>
0
:
print
(
'
\n
%i DOIs with invalid page info:
\n
'
%
len
(
invalidPage
))
for
doi
in
invalidPage
:
print
(
'
{}
'
.
format
(
doi
))
if
unknown_counter
==
0
:
print
(
''
)
if
unknown_counter
>
0
:
print
(
'
\n
%i paper(s) from the following unknown journal(s):
\n
'
%
unknown_counter
)
for
j
in
sorted
(
unknownJournals
):
print
(
j
)
print
(
'
\n
Please add the unknown journal info to configuration.py
\n
'
)
# Output file, i.e. input for CroRIS
if
counter
>
0
:
if
len
(
data
)
>
0
:
with
open
(
output_file
,
'
w
'
,
encoding
=
'
utf8
'
)
as
outfile
:
json
.
dump
(
data
,
outfile
,
ensure_ascii
=
False
,
indent
=
2
)
# Output file for papers with unknown journals
if
counter_error
>
0
:
with
open
(
output_file
.
rstrip
(
'
.json
'
)
+
'
_error.json
'
,
'
w
'
,
encoding
=
'
utf8
'
)
as
outfile
:
json
.
dump
(
error
,
outfile
,
ensure_ascii
=
False
,
indent
=
2
)
# --------------------------------------------------
if
__name__
==
'
__main__
'
:
...
...
@@ -493,7 +524,7 @@ if __name__ == '__main__':
required
=
True
)
parser
.
add_argument
(
"
-e
"
,
"
--exclude
"
,
dest
=
"
exclude
"
,
help
=
"
L
ist of DOIs to exclude
"
,
help
=
"
Text file containing a l
ist of DOIs to exclude
(one per line)
"
,
metavar
=
"
EXCLUDE
"
)
(
options
,
args
)
=
parser
.
parse_known_args
()
...
...
This diff is collapsed.
Click to expand it.