From 62d8662e130599b1fb18f74e5bcc8aaf2c234a79 Mon Sep 17 00:00:00 2001 From: Dinko Ferencek <Dinko.Ferencek@cern.ch> Date: Sun, 7 Apr 2024 23:51:47 +0200 Subject: [PATCH] JSON format that conforms to the specifications of the CroRIS API --- prepare_input.py | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/prepare_input.py b/prepare_input.py index c635a79..369c26b 100644 --- a/prepare_input.py +++ b/prepare_input.py @@ -121,6 +121,18 @@ def prepare_input(list_of_papers, output_file): # Authors all_authors = paper_data['metadata']['authors'] + # List that contains authors' CroRIS IDs + autori = [] + author_dict = { + "croris_id": None, + "oib": None, + "mbz": None + } + + # Set that contains author institutions' CroRIS IDs + inst_ids = set() + + # List that contains authors' full names authors_pretty = [] # All authors @@ -132,6 +144,10 @@ def prepare_input(list_of_papers, output_file): author_text = author['full_name'] if a in author_text: authors_pretty.append(a_pretty) + a_dict = copy.deepcopy(author_dict) + a_dict['croris_id'] = authors[a][1] + autori.append(a_dict) + inst_ids.add(authors[a][2]) break # First author @@ -215,12 +231,12 @@ def prepare_input(list_of_papers, output_file): _temp = {} _temp.update(copy.deepcopy(pub_common)) _temp['doi'] = doi + _temp['poveznice'][0]['url'] += doi _temp['autor_string'] = authors_string - _temp['naslov'] = title + _temp['autori'] = autori if collaboration: _temp['kolaboracija'] = collaboration _temp['godina'] = year - _temp['casopis'] = journal _temp['issn'] = issn[0] _temp['e-issn'] = issn[1] _temp['volumen'] = volume @@ -238,8 +254,29 @@ def prepare_input(list_of_papers, output_file): if page_tot: _temp['ukupno_stranica'] = page_tot validity_counter[1] += 1 - _temp['sazetak'] = abstract - _temp['kljucne_rijeci'] = '; '.join(keywords) + + ml = [ + { + "jezik": "en", + "trans": "o", + "naslov": title, + "sazetak": abstract, + "kljucne_rijeci": '; '.join(keywords) + } + ] + _temp['ml'] = ml + + inst_dict = { + "croris_id": None, + "mbu": None, + "uloga": 941 + } + ustanove = [] + for i_id in inst_ids: + i_dict = copy.deepcopy(inst_dict) + i_dict['croris_id'] = i_id + ustanove.append(i_dict) + _temp['ustanove'] = ustanove # Catch articles with unknown journal or invalid page info status if 'Unknown journal' in journal or (validity_counter[0] < 2 and validity_counter[1] < 2): @@ -265,7 +302,7 @@ def prepare_input(list_of_papers, output_file): print('Article number:', (article_no if article_no != '' else 'N/A')) print('Total pages:', (page_tot if page_tot != '' else 'N/A')) print('\nAbstract:', abstract) - print('\nKeywords:', keywords) + print('\nKeywords:', '; '.join(keywords)) print('------------------------------------------------') -- GitLab