Source code for alphatwirl.loop.DatasetIntoEventBuildersSplitter
# Tai Sakuma <tai.sakuma@gmail.com>
from .splitfuncs import create_file_start_length_list
##__________________________________________________________________||
[docs]class DatasetIntoEventBuildersSplitter(object):
def __init__(self, EventBuilder, eventBuilderConfigMaker,
maxEvents = -1, maxEventsPerRun = -1,
maxFiles = -1, maxFilesPerRun = 1
):
self.EventBuilder = EventBuilder
self.eventBuilderConfigMaker = eventBuilderConfigMaker
self.maxEvents = maxEvents
self.maxEventsPerRun = maxEventsPerRun
self.maxFiles = maxFiles
self.maxFilesPerRun = maxFilesPerRun
self.create_file_start_length_list = create_file_start_length_list
def __repr__(self):
return '{}(EventBuilder = {!r}, eventBuilderConfigMaker = {!r}, maxEvents = {!r}, maxEventsPerRun = {!r}, maxFiles = {!r}, maxFilesPerRun = {!r})'.format(
self.__class__.__name__,
self.EventBuilder,
self.eventBuilderConfigMaker,
self.maxEvents,
self.maxEventsPerRun,
self.maxFiles,
self.maxFilesPerRun
)
def __call__(self, dataset):
file_start_length_list = self._file_start_length_list(
dataset,
maxEvents = self.maxEvents,
maxEventsPerRun = self.maxEventsPerRun,
maxFiles = self.maxFiles,
maxFilesPerRun = self.maxFilesPerRun
)
configs = self._create_configs(dataset, file_start_length_list)
eventBuilders = [self.EventBuilder(c) for c in configs]
return eventBuilders
def _file_start_length_list(self, dataset, maxEvents = -1, maxEventsPerRun = -1,
maxFiles = -1, maxFilesPerRun = 1):
if maxEvents < 0 and maxEventsPerRun < 0:
# fast path. unnecessary to get the number events in the files
files = self.eventBuilderConfigMaker.file_list_in(dataset, maxFiles = maxFiles)
if not files:
return [ ]
if maxFilesPerRun < 0:
return [(files, 0, -1)]
if maxFilesPerRun == 0:
return [ ]
return [(files[i:(i + maxFilesPerRun)], 0, -1) for i in range(0, len(files), maxFilesPerRun)]
# this can be slow
file_nevents_list = self._file_nevents_list_for(
dataset,
maxEvents = maxEvents,
maxFiles = maxFiles
)
file_start_length_list = self.create_file_start_length_list(
file_nevents_list = file_nevents_list,
max_events_per_run = maxEventsPerRun,
max_events_total = maxEvents,
max_files_per_run = maxFilesPerRun
)
return file_start_length_list
def _file_nevents_list_for(self, dataset, maxEvents = -1, maxFiles = -1):
files = self.eventBuilderConfigMaker.file_list_in(dataset, maxFiles = maxFiles)
totalEvents = 0
ret = [ ]
for f in files:
if 0 <= maxEvents <= totalEvents:
return ret
n = self.eventBuilderConfigMaker.nevents_in_file(f)
ret.append((f, n))
totalEvents += n
return ret
def _create_configs(self, dataset, file_start_length_list):
configs = [ ]
for files, start, length in file_start_length_list:
config = self.eventBuilderConfigMaker.create_config_for(dataset, files, start, length)
configs.append(config)
return configs
##__________________________________________________________________||