Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def extend(self, other, unique=False):
'''Extend the dataset. If unique, then only add files which are not
already in the dataset. You may extend with another LHCbCompressedDataset,
LHCbDataset, DiracFile or a list of string of LFNs'''
if isType(other, LHCbCompressedDataset):
self.files.extend(other.files)
elif isType(other, GangaLHCb.Lib.LHCbDataset.LHCbDataset):
lfns = other.getLFNs()
self.files.append(LHCbCompressedFileSet(lfns))
elif isType(other, DiracFile):
self.files.append(LHCbCompressedFileSet(other.lfn))
elif isType(other, [list, tuple, GangaList]):
self.files.append(LHCbCompressedFileSet(other))
else:
logger.error("Cannot add object of type %s to an LHCbCompressedDataset" % type(other))
self.total = self._totalNFiles()
def extend(self, other, unique=False):
'''Extend the dataset. If unique, then only add files which are not
already in the dataset. You may extend with another LHCbCompressedDataset,
LHCbDataset, DiracFile or a list of string of LFNs'''
if isType(other, LHCbCompressedDataset):
self.files.extend(other.files)
elif isType(other, GangaLHCb.Lib.LHCbDataset.LHCbDataset):
lfns = other.getLFNs()
self.files.append(LHCbCompressedFileSet(lfns))
elif isType(other, DiracFile):
self.files.append(LHCbCompressedFileSet(other.lfn))
elif isType(other, [list, tuple, GangaList]):
self.files.append(LHCbCompressedFileSet(other))
else:
logger.error("Cannot add object of type %s to an LHCbCompressedDataset" % type(other))
self.total = self._totalNFiles()
#if files is just a string
if files and isType(files, str):
newset = LHCbCompressedFileSet(files)
self.files.append(newset)
#if files is a single DiracFile
if files and isType(files, DiracFile):
newset = LHCbCompressedFileSet(files.lfn)
self.files.append(newset)
#if files is a single LHCbCompressedFileSet
if files and isType(files, LHCbCompressedFileSet):
self.files.append(files)
#if files is a list
if files and isType(files, [list, GangaList]):
#Is it a list of strings? Then it may have been produced from the BKQuery so pass along the metadata as well
if isType(files[0], str):
newset = LHCbCompressedFileSet(files)
self.files.append(newset)
#Is it a list of DiracFiles?
if isType(files[0], DiracFile):
lfns = []
for _df in files:
lfns.append(_df.lfn)
newset = LHCbCompressedFileSet(lfns)
self.files.append(newset)
#Is it a list of file sets?
if isType(files[0], LHCbCompressedFileSet):
self.files.extend(files)
self.files._setParent(self)
self.persistency = persistency
self.current = 0
self.total = self._totalNFiles()
if files and isType(files, GangaLHCb.Lib.LHCbDataset.LHCbDataset):
newset = LHCbCompressedFileSet(files.getLFNs())
self.files.append(newset)
#if files is an LHCbCompressedDataset
if files and isType(files, LHCbCompressedDataset):
self.files.extend(files.files)
#if files is just a string
if files and isType(files, str):
newset = LHCbCompressedFileSet(files)
self.files.append(newset)
#if files is a single DiracFile
if files and isType(files, DiracFile):
newset = LHCbCompressedFileSet(files.lfn)
self.files.append(newset)
#if files is a single LHCbCompressedFileSet
if files and isType(files, LHCbCompressedFileSet):
self.files.append(files)
#if files is a list
if files and isType(files, [list, GangaList]):
#Is it a list of strings? Then it may have been produced from the BKQuery so pass along the metadata as well
if isType(files[0], str):
newset = LHCbCompressedFileSet(files)
self.files.append(newset)
#Is it a list of DiracFiles?
if isType(files[0], DiracFile):
lfns = []
for _df in files:
lfns.append(_df.lfn)
newset = LHCbCompressedFileSet(lfns)
self.files.append(newset)
#Is it a list of file sets?
if isType(files[0], LHCbCompressedFileSet):
self.files.append(newset)
#if files is a single LHCbCompressedFileSet
if files and isType(files, LHCbCompressedFileSet):
self.files.append(files)
#if files is a list
if files and isType(files, [list, GangaList]):
#Is it a list of strings? Then it may have been produced from the BKQuery so pass along the metadata as well
if isType(files[0], str):
newset = LHCbCompressedFileSet(files)
self.files.append(newset)
#Is it a list of DiracFiles?
if isType(files[0], DiracFile):
lfns = []
for _df in files:
lfns.append(_df.lfn)
newset = LHCbCompressedFileSet(lfns)
self.files.append(newset)
#Is it a list of file sets?
if isType(files[0], LHCbCompressedFileSet):
self.files.extend(files)
self.files._setParent(self)
self.persistency = persistency
self.current = 0
self.total = self._totalNFiles()
logger.debug("Dataset Created")
setNo = len(self.files)-1
currentPrefix = None
#Iterate over the LFNs and find out where it came from
ds = LHCbCompressedDataset()
tempList = []
j = 0
while j < len(newLFNs):
if newLFNs[j] in self.files[setNo].getLFNs():
tempList.append(newLFNs[j])
j += 1
else:
if len(tempList) > 0:
ds.addSet(LHCbCompressedFileSet(tempList))
setNo += step
tempList = []
ds.addSet(LHCbCompressedFileSet(tempList))
else:
#Figure out where the file lies
setNo, setLocation = self._location(i)
if setNo < 0 or i >= self._totalNFiles():
logger.error("Unable to retrieve file %s. It is larger than the dataset size" % i)
return None
ds = DiracFile(lfn = self.files[setNo].getLFN(setLocation), credential_requirements = self.credential_requirements)
return ds
def extend(self, other, unique=False):
'''Extend the dataset. If unique, then only add files which are not
already in the dataset. You may extend with another LHCbCompressedDataset,
LHCbDataset, DiracFile or a list of string of LFNs'''
if isType(other, LHCbCompressedDataset):
self.files.extend(other.files)
elif isType(other, GangaLHCb.Lib.LHCbDataset.LHCbDataset):
lfns = other.getLFNs()
self.files.append(LHCbCompressedFileSet(lfns))
elif isType(other, DiracFile):
self.files.append(LHCbCompressedFileSet(other.lfn))
elif isType(other, [list, tuple, GangaList]):
self.files.append(LHCbCompressedFileSet(other))
else:
logger.error("Cannot add object of type %s to an LHCbCompressedDataset" % type(other))
self.total = self._totalNFiles()
def __init__(self, files=None, lfn_prefix=None):
super(LHCbCompressedFileSet, self).__init__()
if lfn_prefix:
self.lfn_prefix = lfn_prefix
self.suffixes = [(_f) for _f in files]
elif files:
self.lfn_prefix = ''
if not isType(files, [str, list, tuple, GangaList]):
raise GangaException("Incorrect type %s passed to LHCbCompressedFileSet" % type(files))
if isType(files, [list, tuple, GangaList]):
commonpath = os.path.commonpath(files)
if commonpath == '/':
commonpath = ''
suffixes = [_lfn.replace(commonpath, '', 1) for _lfn in files]
self.lfn_prefix = commonpath
self.suffixes = suffixes
else:
self.files.append(files)
self.files = []
#if files is an LHCbDataset
if files and isType(files, GangaLHCb.Lib.LHCbDataset.LHCbDataset):
newset = LHCbCompressedFileSet(files.getLFNs())
self.files.append(newset)
#if files is an LHCbCompressedDataset
if files and isType(files, LHCbCompressedDataset):
self.files.extend(files.files)
#if files is just a string
if files and isType(files, str):
newset = LHCbCompressedFileSet(files)
self.files.append(newset)
#if files is a single DiracFile
if files and isType(files, DiracFile):
newset = LHCbCompressedFileSet(files.lfn)
self.files.append(newset)
#if files is a single LHCbCompressedFileSet
if files and isType(files, LHCbCompressedFileSet):
self.files.append(files)
#if files is a list
if files and isType(files, [list, GangaList]):
#Is it a list of strings? Then it may have been produced from the BKQuery so pass along the metadata as well
if isType(files[0], str):
newset = LHCbCompressedFileSet(files)
self.files.append(newset)
#Is it a list of DiracFiles?
if isType(files[0], DiracFile):
lfns = []
for _df in files:
lfns.append(_df.lfn)
newset = LHCbCompressedFileSet(lfns)
'''Class for handling LHCb data sets (i.e. inputdata for LHCb jobs).
This is a version of LHCbDataset that should use less disk space.
It should only be used with DiracFile objects, and is best constructed
from a BKQuery.
All of the usual methods for datasets can be used here (extend, union, difference etc).
These also work if the other dataset is a regular LHCbDataset.
The LHCbCompressedDataset furthermore offers the ability to store some metadata about
the files in it, i.e. Luminosity, EvtStat, Run no, and TCK
For this dataset everything revolves around the LFN rather than individual file objects
'''
schema = {}
docstr = 'List of DiracFile objects'
schema['files'] = SimpleItem(defvalue=[], typelist=[LHCbCompressedFileSet], sequence=1, doc='A list of lists of the file suffixes')
schema['XMLCatalogueSlice'] = GangaFileItem(defvalue=None, doc='Use contents of file rather than generating catalog.')
schema['persistency'] = SimpleItem(defvalue=None, typelist=['str', 'type(None)'], doc='Specify the dataset persistency technology')
schema['credential_requirements'] = ComponentItem('CredentialRequirement', defvalue=None)
schema['depth'] = SimpleItem(defvalue = 0, doc='Depth')
_schema = Schema(Version(3, 0), schema)
_category = 'datasets'
_name = "LHCbCompressedDataset"
_exportmethods = ['getReplicas', '__len__', '__getitem__', '__iter__', '__next__', 'replicate',
'append', 'extend', 'getCatalog', 'optionsString', 'getFileNames', 'getFilenameList',
'getLFNs', 'getFullFileNames', 'getFullDataset', 'hasLFNs',
'difference', 'isSubset', 'isSuperset', 'intersection',
'symmetricDifference', 'union', 'bkMetadata', 'getMetadata',
'getLuminosity', 'getEvtStat', 'getRunNumbers', 'isEmpty', 'getPFNs']
def __init__(self, files=None, metadata = None, persistency=None, depth=0, fromRef=False):
super(LHCbCompressedDataset, self).__init__()