files.py 4.41 KB
Newer Older
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# This file is part of CERN Search.
Carina Antunes's avatar
Carina Antunes committed
5
# Copyright (C) 2018-2021 CERN.
6
7
8
9
#
# Citadel Search is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""File utilities."""
10
import json
11
12
13
14
15
from io import BytesIO

from flask import current_app
from invenio_db import db
from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion
Carina Antunes's avatar
Carina Antunes committed
16
from invenio_records_files.api import FileObject, FilesIterator
17
18
from invenio_records_files.models import RecordsBuckets

Carina Antunes's avatar
Carina Antunes committed
19
20
from cern_search_rest_api.modules.cernsearch.api import CernSearchRecord

21
22
23
24
25

def record_from_object_version(obj: ObjectVersion):
    """Retrieve Record given an ObjectVersion."""
    record_bucket = RecordsBuckets.query.filter_by(bucket_id=obj.bucket_id).one_or_none()

Carina Antunes's avatar
Carina Antunes committed
26
    current_app.logger.debug("Record Bucket: %s", str(record_bucket))
27
28
29

    record = CernSearchRecord.get_record(record_bucket.record_id)

Carina Antunes's avatar
Carina Antunes committed
30
    current_app.logger.debug("Record: %s", record.id)
31
32
33
34

    return record


35
def persist_file_content(record: CernSearchRecord, file_content: dict, filename: str):
36
    """Persist file's extracted content in bucket on filesystem and database."""
Carina Antunes's avatar
Carina Antunes committed
37
    current_app.logger.debug("Persist file: %s in record %s", filename, record.id)
38

39
40
    file_content.pop("attachments", None)

41
    bucket_content = record.files_content.bucket
42
    ObjectVersion.create(bucket_content, filename, stream=BytesIO(json.dumps(file_content).encode()))
43
44
45
46
47
48
    db.session.commit()


def delete_previous_record_file_if_exists(obj: ObjectVersion):
    """Delete all previous associated files to record if existing, since only one file per record is allowed."""
    record = record_from_object_version(obj)  # type: CernSearchRecord
Carina Antunes's avatar
Carina Antunes committed
49
    current_app.logger.debug("Delete previous files: %s", str(obj))
50

Carina Antunes's avatar
Carina Antunes committed
51
    current_app.logger.debug("Delete previous file")
52
    __delete_all_files_except(record.files, obj)
Carina Antunes's avatar
Carina Antunes committed
53
54

    current_app.logger.debug("Delete previous file content")
55
56
57
58
59
    __delete_all_files_except(record.files_content, obj)


def delete_object_version(obj: ObjectVersion):
    """Delete file on filesystem and soft delete on database."""
Carina Antunes's avatar
Carina Antunes committed
60
61
62
    if obj.deleted:
        return

Carina Antunes's avatar
Carina Antunes committed
63
    current_app.logger.debug("Delete Object Version: %s", str(obj))
64
65
66
67
68
69
70
71
72
73
74

    #  Soft delete bucket
    obj.delete(obj.bucket, obj.key)

    delete_file_instance(obj)

    db.session.commit()


def delete_file_instance(obj: ObjectVersion):
    """Delete file on filesystem and mark as not readable."""
Carina Antunes's avatar
Carina Antunes committed
75
76
    if obj.deleted:
        return
77

Carina Antunes's avatar
Carina Antunes committed
78
79
80
    f = FileInstance.get(str(obj.file_id))  # type: FileInstance
    if not f.readable:
        return
81

Carina Antunes's avatar
Carina Antunes committed
82
83
84
    current_app.logger.debug("Delete file instance: object %s - file %s", str(obj), str(f))
    # Mark file not readable
    f.readable = False
85
86
    db.session.commit()

Carina Antunes's avatar
Carina Antunes committed
87
88
89
90
91
    # Remove the file on disk
    # This leaves the possibility of having a file on disk dangling in case the database removal works,
    # and the disk file removal doesn't work.
    f.storage().delete()

92

Carina Antunes's avatar
Carina Antunes committed
93
def delete_record_file(record: CernSearchRecord, obj: ObjectVersion):
94
    """Delete associated file to record."""
Carina Antunes's avatar
Carina Antunes committed
95
    current_app.logger.debug("Delete file: %s", str(obj))
96
97
98
99
100
101
102
103

    delete_object_version(obj)
    if obj.key in record.files_content:
        delete_object_version(record.files_content[obj.key])


def delete_all_record_files(record: CernSearchRecord):
    """Delete all associated files to record."""
Carina Antunes's avatar
Carina Antunes committed
104
    current_app.logger.debug("Delete all record files: %s", str(record))
105
106
107
108
109
110

    __delete_all_files(record.files)
    __delete_all_files(record.files_content)


def __delete_all_files(objects: FilesIterator):
Carina Antunes's avatar
Carina Antunes committed
111
    for file in objects:  # type: FileObject
112
113
114
115
        delete_object_version(file.obj)


def __delete_all_files_except(objects: FilesIterator, obj: ObjectVersion):
Carina Antunes's avatar
Carina Antunes committed
116
117
118
119
120
121
122
123
    for file in objects:  # type: FileObject
        file_obj = file.obj  # type: ObjectVersion

        if not file_obj.is_head or file_obj.deleted:
            continue

        # delete previous file object versions with same name
        if file_obj.key == obj.key:
124
125
126
127
            __delete_object_versions_except(obj, objects.bucket)

            continue

Carina Antunes's avatar
Carina Antunes committed
128
129
        # if file has different name, delete all version
        delete_object_version(file_obj)
130
131
132


def __delete_object_versions_except(obj: ObjectVersion, bucket: Bucket):
Carina Antunes's avatar
Carina Antunes committed
133
134
    versions = ObjectVersion.get_versions(bucket, obj.key)
    for version in versions:
135
136
        if version.version_id != obj.version_id:
            delete_file_instance(version)