"""
.. See the NOTICE file distributed with this work for additional information
regarding copyright ownership.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an 'AS IS' BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
""" # pylint: disable=too-many-lines
from __future__ import print_function, unicode_literals
import datetime
import os
import random
import sys
import configparser
import pymongo
from pymongo import MongoClient, ReadPreference
import bson
from bson.objectid import ObjectId
from dm_generator.GenerateSampleBigBed import GenerateSampleBigBed
from dm_generator.GenerateSampleBigWig import GenerateSampleBigWig
from dm_generator.GenerateSampleCoords import GenerateSampleCoords
from dm_generator.GenerateSampleAdjacency import GenerateSampleAdjacency
[docs]class dmp(object): # pylint: disable=invalid-name
"""
API for management of files within the VRE
"""
def __init__(self, cnf_loc='', test=False):
"""
Initialise the module and setup parameters
"""
config = configparser.RawConfigParser()
config.read(cnf_loc)
if test is True:
import mongomock
self.client = mongomock.MongoClient()
self.db_handle = self.client["dmp"]
self._test_loading_dataset()
else:
host = config.get("dmp", "host")
port = config.getint("dmp", "port")
user = config.get("dmp", "user")
password = config.get("dmp", "pass")
dmp_db = config.get("dmp", "db")
try:
self.client = MongoClient(
host, port,
read_preference=ReadPreference.SECONDARY_PREFERRED
)
self.client.admin.authenticate(user, password)
self.db_handle = self.client[dmp_db]
except RuntimeError:
error = sys.exc_info()[0]
print("Error: %s" % error)
sys.exit(1)
self.entries = self.db_handle.entries
self.db_handle.entries.create_index(
[('user_id', pymongo.ASCENDING)],
unique=False, background=True)
self.db_handle.entries.create_index(
[('user_id', pymongo.ASCENDING), ('file_type', pymongo.ASCENDING)],
unique=False, background=True)
self.db_handle.entries.create_index(
[('user_id', pymongo.ASCENDING), ('data_type', pymongo.ASCENDING)],
unique=False, background=True)
self.db_handle.entries.create_index(
[('user_id', pymongo.ASCENDING), ('taxon_id', pymongo.ASCENDING)],
unique=False, background=True)
@staticmethod
def _copy_to_tmp(file_path, tmp_path):
"""
Copy file to a temporary location for testing
"""
if os.path.isfile(tmp_path) is False:
with open(tmp_path, 'wb') as f_out:
with open(file_path, 'rb') as f_in:
f_out.write(f_in.read())
return True
def _test_loading_dataset(self): # pylint: disable=too-many-locals
"""
Load a test dataset into the DM API object
"""
users = ["adam", "ben", "chris", "denis", "eric"]
file_types = [
"fastq", "fa", "fasta", "bam", "bed", "bb", "hdf5", "tsv", "gz",
"tbi", "wig", "bw"
]
data_types = ['RNA-seq', 'MNase-Seq', 'ChIP-seq', 'WGBS', 'HiC']
compressed = [None, 'gzip', 'zip']
resource_path = os.path.dirname(__file__)
file_id = self.set_file(
"rao", os.path.join(resource_path, 'rao2014.hdf5'),
"file", "hdf5", 64000, None, "HiC", 9606,
meta_data={'assembly': 'GCA_0123456789'}
)
data_path = os.path.join(os.path.dirname(__file__), "../tests/data/")
file_id = self.set_file(
"test", os.path.realpath(os.path.join(data_path, 'sample.bb')),
"file", "bb", 64000, None, "RNA-seq", 9606,
meta_data={'assembly': 'GCA_0123456789'},
_id=ObjectId(str("0123456789ab0123456789aa"))
)
if os.path.isfile(data_path + 'sample.bb') is False:
gsbb = GenerateSampleBigBed()
gsbb.main()
file_id = self.set_file(
"test", os.path.realpath(os.path.join(data_path, 'sample.bw')),
"file", "bw", 64000, None, "RNA-seq", 9606,
meta_data={'assembly': 'GCA_0123456789'},
_id=ObjectId(str("0123456789ab0123456789ab"))
)
if os.path.isfile(data_path + 'sample.bw') is False:
gsbw = GenerateSampleBigWig()
gsbw.main()
file_id = self.set_file(
"test", os.path.join(data_path, 'sample_coords.hdf5'),
"file", "hdf5", 64000, None, "HiC", 9606,
meta_data={'assembly': 'GCA_0123456789'},
_id=ObjectId(str("0123456789ab0123456789ac"))
)
if os.path.isfile(data_path + 'sample_coords.hdf5') is False:
gsc = GenerateSampleCoords()
gsc.main()
file_id = self.set_file(
"test", os.path.join(data_path, 'sample_adjacency.hdf5'),
"file", "hdf5", 64000, None, "HiC", 9606,
meta_data={'assembly': 'GCA_0123456789'},
_id=ObjectId(str("0123456789ab0123456789ad"))
)
if os.path.isfile(data_path + 'sample_adjacency.hdf5') is False:
gsa = GenerateSampleAdjacency()
gsa.main()
for user in users:
data_type = 'RNA-seq'
file_handle = '/tmp/test/' + data_type + '/test_rna-seq.fastq'
file_type = "fastq"
zipped = None
file_id = self.set_file(
user, file_handle, "file", file_type, 64000, None, data_type, 9606, None, None,
meta_data={'assembly': 'GCA_0123456789'})
file_handle = '/tmp/test/' + data_type + '/test_rna-seq.bam'
self.set_file(
user, file_handle, "file", 'bam', 64000, None, data_type, 9606, None, [file_id],
meta_data={'assembly': 'GCA_0123456789', 'tool': 'bwa_aligner'})
for i in range(10):
user = random.choice(users)
file_type = random.choice(file_types)
data_type = random.choice(data_types)
zipped = random.choice(compressed)
file_handle = '/tmp/test/' + data_type + '/test_' + str(i) + '.' + file_type
file_id = self.set_file(
user, file_handle, "file", file_type, 64000, None, data_type, 9606, zipped,
meta_data={'assembly': 'GCA_0123456789'})
if data_type == 'RNA-seq' and file_type == 'fastq' and random.choice([0, 1]) == 1:
file_handle = '/tmp/test/' + data_type + '/test_' + str(i) + '.bam'
self.set_file(
user, file_handle, "file", 'bam', 64000, None,
data_type, 9606, None, [file_id],
meta_data={'assembly': 'GCA_0123456789', 'tool': 'bwa_aligner'})
def _get_rows(self, user_id, key=None, value=None, rest=False):
"""
Get a list of the file dictionary objects given a `user_id` and
`taxon_id`
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
taxon_id : int
Taxon ID that the species that the file has been derived from
Returns
-------
dict
file_path : str
Location of the file in the file system
file_type : str
File format (see validate_file)
data_type : str
The type of information in the file (RNA-seq, ChIP-seq, etc)
taxon_id : int
Taxon ID that the species that the file has been derived from
compressed : str
Type of compression (None, gzip, zip)
source_id : list
List of IDs of files that were processed to generate this file
meta_data : dict
Dictionary object containing the extra data related to the
generation of the file or describing the way it was processed
creation_time : list
Time at which the file was loaded into the system
Example
-------
.. code-block:: python
:linenos:
from dmp import dmp
da = dmp()
da.get_files_by_taxon_id(<user_id>, <taxon_id>)
"""
entries = self.db_handle.entries
files = []
row_filter = {"user_id": user_id}
if (
key is not None
and isinstance(str(key), str)
and isinstance(value, (str, int, float, bson.objectid.ObjectId))
):
row_filter[key] = value
if rest is True:
results = entries.find(
row_filter,
{
"file_type": 1, "size": 1, "data_type": 1, "taxon_id": 1,
"source_id": 1, "meta_data": 1, "creation_time": 1
}
)
else:
results = entries.find(
row_filter,
{
"file_path": 1, "path_type": 1, "file_type": 1, "size": 1,
"parent_dir": 1, "data_type": 1, "taxon_id": 1,
"source_id": 1, "meta_data": 1, "creation_time": 1
}
)
for entry in results:
entry["_id"] = str(entry["_id"])
entry["creation_time"] = str(entry["creation_time"])
if "expiration_date" in entry["meta_data"]:
entry["meta_data"]["expiration_date"] = str(entry["meta_data"]["expiration_date"])
files.append(entry)
return files
[docs] def get_file_by_id(self, user_id, file_id, rest=False):
"""
Returns files data based on the unique_id for a given file
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
file_id : str
Location of the file in the file system
Returns
-------
dict
file_path : str
Location of the file in the file system
path_type : str
File or Folder
file_type : str
File format (see validate_file)
size : int
Size of the file
parent_dir : str
Location of the parent dir
data_type : str
The type of information in the file (RNA-seq, ChIP-seq, etc)
taxon_id : int
Taxon ID that the species that the file has been derived from
compressed : str
Type of compression (None, gzip, zip)
source_id : list
List of IDs of files that were processed to generate this file
meta_data : dict
Dictionary object containing the extra data related to the
generation of the file or describing the way it was processed
creation_time : list
Time at which the file was loaded into the system
Example
-------
.. code-block:: python
:linenos:
from dmp import dmp
da = dmp()
da.get_file_by_id(<unique_file_id>)
"""
file_obj = self._get_rows(str(user_id), '_id', ObjectId(str(file_id)), rest)
if not file_obj:
return {"msg": "No files found"}
return file_obj[0]
[docs] def get_file_by_file_path(self, user_id, file_path, rest=False):
"""
Get a list of the file dictionary objects given a `user_id` and
`file_path`
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
file_path : str
File path (see validate_file)
Returns
-------
dict
file_path : str
Location of the file in the file system
file_type : str
File format (see validate_file)
data_type : str
The type of information in the file (RNA-seq, ChIP-seq, etc)
taxon_id : int
Taxon ID that the species that the file has been derived from
compressed : str
Type of compression (None, gzip, zip)
source_id : list
List of IDs of files that were processed to generate this file
meta_data : dict
Dictionary object containing the extra data related to the
generation of the file or describing the way it was processed
creation_time : list
Time at which the file was loaded into the system
Example
-------
.. code-block:: python
:linenos:
from dmp import dmp
da = dmp()
da.get_files_by_file_path(<user_id>, <file_type>)
"""
file_obj = self._get_rows(str(user_id), 'file_path', str(file_path), rest)
return file_obj
[docs] def get_files_by_user(self, user_id, rest=False):
"""
Get a list of the file dictionary objects given a `user_id`
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
Returns
-------
list
List of dict objects for each file that has been loaded by a user.
Example
-------
.. code-block:: python
:linenos:
from dmp import dmp
da = dmp()
da.get_files_by_user(<user_id>)
"""
file_obj = self._get_rows(str(user_id), None, None, rest)
if not file_obj:
return {"msg": "No files found"}
return file_obj
[docs] def get_files_by_file_type(self, user_id, file_type, rest=False):
"""
Get a list of the file dictionary objects given a `user_id` and
`file_type`
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
file_type : str
File format (see validate_file)
Returns
-------
dict
file_path : str
Location of the file in the file system
file_type : str
File format (see validate_file)
data_type : str
The type of information in the file (RNA-seq, ChIP-seq, etc)
taxon_id : int
Taxon ID that the species that the file has been derived from
compressed : str
Type of compression (None, gzip, zip)
source_id : list
List of IDs of files that were processed to generate this file
meta_data : dict
Dictionary object containing the extra data related to the
generation of the file or describing the way it was processed
creation_time : list
Time at which the file was loaded into the system
Example
-------
.. code-block:: python
:linenos:
from dmp import dmp
da = dmp()
da.get_files_by_file_type(<user_id>, <file_type>)
"""
file_obj = self._get_rows(str(user_id), "file_type", str(file_type), rest)
if not file_obj:
return {"msg": "No files found"}
return file_obj
[docs] def get_files_by_data_type(self, user_id, data_type, rest=False):
"""
Get a list of the file dictionary objects given a `user_id` and
`data_type`
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
data_type : str
The type of information in the file (RNA-seq, ChIP-seq, etc)
Returns
-------
dict
file_path : str
Location of the file in the file system
file_type : str
File format (see validate_file)
data_type : str
The type of information in the file (RNA-seq, ChIP-seq, etc)
taxon_id : int
Taxon ID that the species that the file has been derived from
compressed : str
Type of compression (None, gzip, zip)
source_id : list
List of IDs of files that were processed to generate this file
meta_data : dict
Dictionary object containing the extra data related to the
generation of the file or describing the way it was processed
creation_time : list
Time at which the file was loaded into the system
Example
-------
.. code-block:: python
:linenos:
from dmp import dmp
da = dmp()
da.get_files_by_data_type(<user_id>, <data_type>)
"""
file_obj = self._get_rows(str(user_id), "data_type", str(data_type), rest)
if not file_obj:
return {"msg": "No files found"}
return file_obj
[docs] def get_files_by_taxon_id(self, user_id, taxon_id, rest=False):
"""
Get a list of the file dictionary objects given a `user_id` and
`taxon_id`
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
taxon_id : int
Taxon ID that the species that the file has been derived from
Returns
-------
dict
file_path : str
Location of the file in the file system
file_type : str
File format (see validate_file)
data_type : str
The type of information in the file (RNA-seq, ChIP-seq, etc)
taxon_id : int
Taxon ID that the species that the file has been derived from
compressed : str
Type of compression (None, gzip, zip)
source_id : list
List of IDs of files that were processed to generate this file
meta_data : dict
Dictionary object containing the extra data related to the
generation of the file or describing the way it was processed
creation_time : list
Time at which the file was loaded into the system
Example
-------
.. code-block:: python
:linenos:
from dmp import dmp
da = dmp()
da.get_files_by_taxon_id(<user_id>, <taxon_id>)
"""
file_obj = self._get_rows(str(user_id), "taxon_id", int(taxon_id), rest)
if not file_obj:
return {"msg": "No files found"}
return file_obj
[docs] def get_files_by_assembly(self, user_id, assembly, rest=False):
"""
Get a list of the file dictionary objects given a `user_id` and
`assembly`
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
assembly : str
Assembly that the species that the file has been derived from
Returns
-------
dict
file_path : str
Location of the file in the file system
file_type : str
File format (see validate_file)
data_type : str
The type of information in the file (RNA-seq, ChIP-seq, etc)
taxon_id : int
Taxon ID that the species that the file has been derived from
compressed : str
Type of compression (None, gzip, zip)
source_id : list
List of IDs of files that were processed to generate this file
meta_data : dict
Dictionary object containing the extra data related to the
generation of the file or describing the way it was processed
creation_time : list
Time at which the file was loaded into the system
Example
-------
.. code-block:: python
:linenos:
from dmp import dmp
da = dmp()
da.get_files_by_taxon_id(<user_id>, <taxon_id>)
"""
file_obj = self._get_rows(str(user_id), "meta_data.assembly", str(assembly), rest)
if not file_obj:
return {"msg": "No files found"}
return file_obj
def _get_file_parents(self, user_id, file_id):
"""
Private function for getting all parents on a file_id. This function
reursively goes up the tree of parents to get a full history.
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
file_id : str
File ID for leafe file
Returns
-------
file_ids : list
List of parent file_ids
"""
entries = self.db_handle.entries
file_obj = entries.find_one(
{'user_id': user_id, '_id': ObjectId(file_id)}, {"source_id": 1}
)
parent_files = []
if file_obj is not None and file_obj['source_id']:
source_count = len(file_obj['source_id'])
if source_count > 0:
for source_id in file_obj['source_id']:
parent_files.append([file_id, str(source_id)])
parent_files += self._get_file_parents(user_id, source_id)
return parent_files
[docs] def get_file_history(self, user_id, file_id):
"""
Returns the full path of file_ids from the current file to the original
file(s)
Needs work to define the format for how declaring the history is best
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
file_id : str
ID of the file. This is the value returned when a file is loaded
into the DMP or is the `_id` for a given file when the files have
been retrieved.
Returns
-------
list
List of lists representing the adjancency of child and parent files.
Example
-------
.. code-block:: python
:linenos:
from dmp import dmp
da = dmp()
history = da.get_file_history("aLongString")
print history
Output:
``[['aLongString', 'parentOfaLongString'], ['parentOfaLongString', 'parentOfParent']]``
These IDs can then be requested to ruturn the meta data and locations
with the `get_file_by_id` method.
"""
unique_data = [
list(x) for x in set(tuple(x) for x in self._get_file_parents(user_id, file_id))
]
return unique_data
[docs] def remove_file(self, user_id, file_id):
"""
Removes a single file from the directory. Returns the ID of the file
that was removed
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
file_id : str
ID of the file. This is the value returned when a file is loaded
into the DMP or is the `_id` for a given file when the files have
been retrieved.
Returns
-------
str
The file_id of the removed file.
Example
-------
.. code-block:: python
:linenos:
from dmp import dmp
da = dmp()
da.remove_file(<file_id>)
"""
self.db_handle.entries.delete_one({'user_id': user_id, '_id': ObjectId(file_id)})
return file_id
[docs] @staticmethod
def validate_file(entry):
"""
Validate that the required meta data for a given entry is present. If
there is missing data then a ValueError excepetion is raised. This
function checks that all required paths are defined and that when
various selections are made then the correct matching data is also
present
Parameters
----------
entry : dict
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
file_path : str
Location of the file in the file system
path_type : str
File or folder
file_type : str
File format ("amb", "ann", "bam", "bb", "bed", "bt2", "bw",
"bwt", "cpt", "csv", "dcd", "fa", "fasta", "fastq", "gem",
"gff3", "gz", "hdf5", "json", 'lif', "pac", "pdb", "pdf", "png",
"prmtop", "sa", "tbi", "tif", "tpr", "trj", "tsv", "txt", "wig")
size : int
Size of the file in bytes
data_type : str
The type of information in the file (RNA-seq, ChIP-seq, etc)
taxon_id : int
Taxon ID that the species that the file has been derived from
compressed : str
Type of compression (None, gzip, zip)
source_id : list
List of IDs of files that were processed to generate this file
meta_data : dict
Dictionary object containing the extra data related to the
generation of the file or describing the way it was processed
assembly : string
Returns
-------
bool
Returns True if there are no errors with the entry
If there are issues with the entry then a ValueError is raised.
"""
# Check the user_id is not empty:
if 'user_id' not in entry or entry['user_id'] is None or entry['user_id'] == '':
raise ValueError('User ID must be specified for all entries')
# Check the file_id is not empty:
if 'file_path' not in entry or entry['file_path'] is None or entry['file_path'] == '':
raise ValueError('User ID must be specified for all entries')
if entry['path_type'] not in ['file', 'dir', 'link']:
raise ValueError('Path type must be of value file|dir|link')
# Defined list of acepted file types
file_types = {
"amb": ["assembly"],
"ann": ["assembly"],
"bam": ["assembly"],
"bb": ["assembly"],
"bed": ["assembly"],
"bt2": ["assembly"],
"bw": ["assembly"],
"bwt": ["assembly"],
"cpt": [],
"csv": [],
"dcd": [],
"fa": [],
"fasta": ["assembly"], # This might not always be true and might need to be reviewed
"fastq": [],
"gem": ["assembly"],
"gff3": ["assembly"],
"gz": [],
"hdf5": ["assembly"],
"json": [],
'lif': [],
"pac": ["assembly"],
"pdb": [],
"pdf": [],
"png": [],
"prmtop": [],
"sa": ["assembly"],
"tbi": ["assembly"],
"tif": [],
"tpr": [],
"trj": [],
"tsv": [],
"txt": [],
"wig": ["assembly"]
}
# Check all files match the defined types
if (
'file_type' not in entry or
entry['file_type'] == "" or
entry['file_type'] not in file_types
):
raise ValueError(
"File type must be one of the valid file types: " + ','.join(file_types)
)
if isinstance(entry['size'], int) is False:
raise TypeError('Size must be an integer')
# Check all files have a matching Taxon ID
if 'taxon_id' not in entry or entry['taxon_id'] is None:
raise ValueError('Taxon ID must be specified for all entries')
# Require assembly in the meta_data
ft_assembly_required = [k for k in file_types if "assembly" in file_types[k]]
if str.lower(str(entry['file_type'])) in ft_assembly_required:
if 'meta_data' not in entry or 'assembly' not in entry['meta_data']:
raise ValueError(
'Matching assembly ID is required within the meta_data field'
)
if entry['source_id'] is not None:
if 'meta_data' not in entry or 'tool' not in entry['meta_data']:
raise ValueError(
'Matching Tool name is required within the meta_data field'
)
return True
[docs] def set_file( # pylint: disable=too-many-arguments,too-many-locals
self, user_id, file_path, path_type, file_type="", size=0, parent_dir="", data_type="",
taxon_id="", compressed=None, source_id=None, meta_data=None, **kwargs):
"""
Adds a file to the data management API.
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
file_path : str
Location of the file in the file system
path_type : str
parent_dir : str
_id of the parent directory
file_type : str
File format (see validate_file)
size : int
File size in bytes
data_type : str
The type of information in the file (RNA-seq, ChIP-seq, etc)
taxon_id : int
Taxon ID that the species that the file has been derived from
compressed : str
Type of compression (None, gzip, zip)
source_id : list
List of IDs of files that were processed to generate this file
meta_data : dict
Dictionary object containing the extra data related to the
generation of the file or describing the way it was processed
assembly : string
Dependent paramenter. If the sequence has been aligned at some
point during the production of this file then the assembly must
be recorded.
Returns
-------
str
This is an id for that file within the system and can be used for
tracing this file and where it is used and where it has come from.
Example
-------
.. code-block:: python
:linenos:
from dmp import dmp
da = dmp()
unique_file_id = da.set_file(
'user1', '/tmp/example_file.fastq', 'fastq', 'RNA-seq', 9606, None)
If there is a processed result of 1 or more files then these can be
specified using the file_id:
>>> da.set_file(
'user1', '/tmp/example_file.fastq', 'fastq', 'RNA-seq', 9606, None,
source_id=[1, 2])
Meta data about the file can also be included to provide extra
information about the file, origins or how it was generated:
>>> da.set_file('user1', '/tmp/example_file.fastq', 'fastq', 'RNA-seq',
9606, None, meta_data={'assembly' : 'GCA_0000nnnn',
'downloaded_from' : 'http://www.', })
"""
entry = {
"user_id": user_id,
"file_path": file_path,
"path_type": path_type,
"parent_dir": parent_dir,
"file_type": file_type,
"size": size,
"data_type": data_type,
"taxon_id": taxon_id,
"compressed": compressed,
"source_id": source_id,
"meta_data": meta_data,
"creation_time": datetime.datetime.utcnow()
}
date_delta = datetime.timedelta(days=84) # 12 weeks
entry["meta_data"]["expiration_date"] = entry["creation_time"] + date_delta
entry.update(kwargs)
self.validate_file(entry)
entries = self.db_handle.entries
entry_id = entries.insert_one(entry).inserted_id
return str(entry_id)
[docs] def modify_column(self, user_id, file_id, key, value):
"""
Update a key value pair for the record
Parameters
----------
user_id : str
Identifier to uniquely locate the users files. Can be set to
"common" if the files can be shared between users
file_id : str
ID of the file. This is the value returned when a file is loaded
into the DMP or is the `_id` for a given file when the files have
been retrieved.
key : str
Unique key for the identification of the extra meta data. If the key
matches a value already in the meta data then it over-writes the
current value.
value
Value to be stored for the given key. This can be a str, int, list
or dict.
Returns
-------
str
This is an id for that file within the system and can be used for
tracing this file and where it is used and where it has come from.
"""
entries = self.db_handle.entries
entry = entries.find_one(
{'user_id': user_id, '_id': ObjectId(file_id)}
)
if str(key) in ['size', 'taxon_id']:
entry[str(key)] = int(value)
else:
entry[str(key)] = value
# Check that the changes are still valid
self.validate_file(entry)
# Update the entry witin the mongodb
entries.update(
{'user_id': user_id, '_id': ObjectId(file_id)},
{'$set': {str(key): entry[str(key)]}}
)
return file_id