Source code for alphatwirl.collector.ToTupleListWithDatasetColumn

# Tai Sakuma <tai.sakuma@gmail.com>

##__________________________________________________________________||
[docs]class ToTupleListWithDatasetColumn(object): def __init__(self, summaryColumnNames, datasetColumnName = 'component' ): self.summaryColumnNames = summaryColumnNames self.datasetColumnName = datasetColumnName def __repr__(self): name_value_pairs = ( ('summaryColumnNames', self.summaryColumnNames), ('datasetColumnName', self.datasetColumnName), ) return '{}({})'.format( self.__class__.__name__, ', '.join(['{} = {!r}'.format(n, v) for n, v in name_value_pairs]), )
[docs] def combine(self, dataset_readers_list): if len(dataset_readers_list) == 0: return None # e.g., # dataset_readers_list = [ # ('QCD', (reader1, reader2)), # ('TTJets', (reader3, )), # ('WJets', (reader4, )), # ('ZJets', ( )), # ] # remove entries with no readers dataset_readers_list = [l for l in dataset_readers_list if l[1]] if len(dataset_readers_list) == 0: return None dataset_summarizers_list = [(d, tuple(r.results() for r in rs)) for d, rs in dataset_readers_list] # e.g., # dataset_summarizers_list = [ # ('QCD', (summarizer1, summarizer2)), # ('TTJets', (summarizer3, ), # ('WJets', (summarizer4, ), # ] dataset_summarizer_pairs = [(d, sum(s)) for d, s in dataset_summarizers_list] # e.g., # dataset_summarizer_pairs = [ # ('QCD', summarizer1 + summarizer2), # ('TTJets', summarizer3), # ('WJets', summarizer4), # ] # note: summarizers can be added dataset_tuple_list_pairs = [(d, s.to_tuple_list()) for d, s in dataset_summarizer_pairs] # e.g., # dataset_tuple_list_pairs = [ # ('QCD', [ # (200, 2, 120, 240), # (300, 2, 490, 980), # (300, 3, 210, 420) # ]), # ('TTJets', [ # (300, 2, 20, 40), # (300, 3, 15, 30) # ]), # ('WJets', []) # ] ret = [ ] for dataset, tuple_list in dataset_tuple_list_pairs: ret.extend([(dataset, ) + e for e in tuple_list]) # e.g., # [ # ('QCD', 200, 2, 120, 240), # ('QCD', 300, 2, 490, 980), # ('QCD', 300, 3, 210, 420), # ('TTJets', 300, 2, 20, 40), # ('TTJets', 300, 3, 15, 30) # ] header = (self.datasetColumnName, ) + self.summaryColumnNames ret.insert(0, header) # e.g., # [ # ('dataset', 'htbin', 'njetbin', 'n', 'nvar'), # ('QCD', 200, 2, 120, 240), # ('QCD', 300, 2, 490, 980), # ('QCD', 300, 3, 210, 420), # ('TTJets', 300, 2, 20, 40), # ('TTJets', 300, 3, 15, 30) # ] return ret
##__________________________________________________________________||