|
[Rivet-svn] r3085 - in trunk: . bin pyextblackhole at projects.hepforge.org blackhole at projects.hepforge.orgSun May 8 18:32:55 BST 2011
Author: buckley Date: Sun May 8 18:32:55 2011 New Revision: 3085 Log: Extending flat2aida to be able to read from and write to stdin/out as for aida2flat, and also eliminating the internal histo parsing representation in favour of the one in lighthisto. lighthisto's fromFlat also needed a bit of an overhaul: it has been extended to parse each histo's chunk of text (including BEGIN and END lines) in fromFlatHisto, and for fromFlat to parse a collection of histos from a file, in keeping with the behaviour of fromDPS/fromAIDA. Merging into Professor is now needed. Modified: trunk/ChangeLog trunk/bin/aida2flat trunk/bin/flat2aida trunk/pyext/lighthisto.py Modified: trunk/ChangeLog ============================================================================== --- trunk/ChangeLog Sun May 8 14:16:28 2011 (r3084) +++ trunk/ChangeLog Sun May 8 18:32:55 2011 (r3085) @@ -1,5 +1,15 @@ 2011-05-08 Andy Buckley <andy at insectnation.org> + * Extending flat2aida to be able to read from and write to + stdin/out as for aida2flat, and also eliminating the internal + histo parsing representation in favour of the one in + lighthisto. lighthisto's fromFlat also needed a bit of an + overhaul: it has been extended to parse each histo's chunk of + text (including BEGIN and END lines) in fromFlatHisto, and for + fromFlat to parse a collection of histos from a file, in keeping + with the behaviour of fromDPS/fromAIDA. Merging into Professor is + now needed. + * Extending aida2flat to have a better usage message, to accept input from stdin for command chaining via pipes, and to be a bit more sensibly internally structured (although it also now has to Modified: trunk/bin/aida2flat ============================================================================== --- trunk/bin/aida2flat Sun May 8 14:16:28 2011 (r3084) +++ trunk/bin/aida2flat Sun May 8 18:32:55 2011 (r3085) @@ -89,10 +89,13 @@ ## Initialise steering variables which need a bit more care + import re if opts.PATHPATTERNS is None: opts.PATHPATTERNS = [] + opts.PATHPATTERNS = [re.compile(r) for r in opts.PATHPATTERNS] if opts.PATHUNPATTERNS is None: opts.PATHUNPATTERNS = [] + opts.PATHUNPATTERNS = [re.compile(r) for r in opts.PATHUNPATTERNS] if opts.GNUPLOT: opts.SPLITOUTPUT = True @@ -117,34 +120,32 @@ ## Run over the files and build histo objects selected by the pattern filtering histos = {} for aidafile in args: - if aidafile != "-": - if not os.access(aidafile, os.R_OK): - logging.error("%s can not be read" % aidafile) - sys.exit(1) try: if aidafile == "-": tree = ET.parse(sys.stdin) else: + if not os.access(aidafile, os.R_OK): + logging.error("%s can not be read" % aidafile) + sys.exit(1) tree = ET.parse(aidafile) except: logging.error("%s can not be parsed as XML" % aidafile) sys.exit(1) for dps in tree.findall("dataPointSet"): - useThisDps = True + useThis = True dpspath = os.path.join(dps.get("path"), dps.get("name")) - import re if opts.PATHPATTERNS: - useThisDps = False + useThis = False for regex in opts.PATHPATTERNS: - if re.compile(regex).search(dpspath): - useThisDps = True + if regex.search(dpspath): + useThis = True break - if useThisDps and opts.PATHUNPATTERNS: + if useThis and opts.PATHUNPATTERNS: for regex in opts.PATHUNPATTERNS: - if re.compile(regex).search(dpspath): - useThisDps = False + if regex.search(dpspath): + useThis = False break - if useThisDps: + if useThis: hist = lighthisto.Histo.fromDPS(dps) try: plotparser.updateHistoHeaders(hist) @@ -157,13 +158,10 @@ if histos: ## Split output per-histogram if opts.SPLITOUTPUT: - paper = os.path.basename(aidafile).replace(".aida", "") for f, hs in sorted(histos.iteritems()): for h in sorted(hs): histo = h.fullPath()[1:].replace("/", "_") outfile = "%s.dat" % histo - if opts.SMARTOUTPUT: - outfile = "%s-%s" % (paper, outfile) #print "Writing to", outfile out = open(outfile, "w") if not opts.GNUPLOT: Modified: trunk/bin/flat2aida ============================================================================== --- trunk/bin/flat2aida Sun May 8 14:16:28 2011 (r3084) +++ trunk/bin/flat2aida Sun May 8 18:32:55 2011 (r3085) @@ -1,7 +1,15 @@ #! /usr/bin/env python """\ -%prog flatfile [flatfile2 ...] +%prog [options] flatfile [flatfile2 ...] + +Convert make-plots data files to AIDA XML format. The output is by default +written out to a file with the same name as the input (out.aida in the case of +stdin) unless the --output option is specified. When specifying either input or +output filenames, a '-' is used to refer to stdin or stdout as appropriate. + +Histograms can also be filtered by histo path, using the -m or -M options for a +positive or negative regex pattern patch respectively. """ import sys @@ -10,145 +18,162 @@ sys.exit(1) -import os -from htmlentitydefs import codepoint2name -unichr2entity = dict((unichr(code), u'&%s;' % name) \ - for code,name in codepoint2name.iteritems() \ - if code != 38) # exclude "&" -def htmlescape(text, d=unichr2entity): - if u"&" in text: - text = text.replace(u"&", u"&") - for key, value in d.iteritems(): - if key in text: - text = text.replace(key, value) - return text - +import os, logging +import lighthisto -class Inputdata: - def __init__(self, filename): - self.histos = {} - self.description = {} - self.description['DrawOnly'] = [] - f = open(filename+'.dat', 'r') - for line in f: - if (line.count('#',0,1)): - if (line.count('BEGIN HISTOGRAM')): - title = line.split('BEGIN HISTOGRAM', 1)[1].strip() - self.description['DrawOnly'].append(title) - self.histos[title] = Histogram(f) - if title: - self.histos[title].path = title - f.close() - -class Histogram: - def __init__(self, f): - self.read_input(f) - self.path = None - - def read_input(self, f): - self.description = {} - self.data = [] - for line in f: - if (line.count('#',0,1)): - if (line.count('END HISTOGRAM')): - break - else: - line = line.rstrip() - if (line.count('=')): - linearray = line.split('=', 1) - key = linearray[0].strip() - val = linearray[1].strip() - #print "@", key, val - self.description[key] = val - else: - linearray = line.split() - if len(linearray)==4: - self.data.append({'LowEdge': float(linearray[0]), - 'UpEdge': float(linearray[1]), - 'Content': float(linearray[2]), - 'Error': [float(linearray[3]),float(linearray[3])]}) - elif len(linearray)==5: - self.data.append({'LowEdge': float(linearray[0]), - 'UpEdge': float(linearray[1]), - 'Content': float(linearray[2]), - 'Error': [float(linearray[3]),float(linearray[4])]}) - else: - raise Exception("Unexpected line format: '%s'" % linearray) - - def write_datapoint(self, f, xval, xerr, yval, yerr): - f.write(' <dataPoint>\n') - f.write(' <measurement errorPlus="%e" value="%e" errorMinus="%e"/>\n' %(xerr, xval, xerr)) - f.write(' <measurement errorPlus="%e" value="%e" errorMinus="%e"/>\n' %(yerr[1], yval, yerr[0])) - f.write(' </dataPoint>\n') - - def write_datapointset_header(self, f, count, bin): - path = '/REF/%s/d%02d-x01-y%02d' % (filename.split('/')[-1], count, bin+1) - if self.description.has_key("AidaPath"): - path = self.description["AidaPath"] - if not self.path: - self.path = path - f.write(' <dataPointSet name="%s" dimension="2"\n' % (os.path.basename(self.path))) - if not self.description.has_key('Title'): - self.description['Title'] = "" - f.write(' path="%s" title="%s">\n' % (os.path.dirname(self.path), htmlescape(self.description['Title']))) - if self.description.has_key("XLabel") and self.description["XLabel"] is not None: - f.write(' <dimension dim="0" title="%s" />\n' % htmlescape(self.description['XLabel'])) - if self.description.has_key("YLabel") and self.description["YLabel"] is not None: - f.write(' <dimension dim="1" title="%s" />\n' % htmlescape(self.description['YLabel'])) - f.write(' <annotation>\n') - f.write(' <item key="Title" value="%s" sticky="true"/>\n' %(htmlescape(self.description['Title']))) - f.write(' <item key="AidaPath" value="%s" sticky="true"/>\n' %(self.path)) - f.write(' <item key="FullPath" value="/%s.aida%s" sticky="true"/>\n' %(filename.split('/')[-1], self.path)) - f.write(' </annotation>\n') - - def write_datapointset_footer(self, f): - f.write(' </dataPointSet>\n') - - def write_datapointset(self, f, count): - if not opts.SPLITHISTOS: - self.write_datapointset_header(f, count, 0) - for bin, bindata in enumerate(self.data): - xval = 0.5*(bindata['UpEdge'] + bindata['LowEdge']) - if bindata['UpEdge'] == bindata['LowEdge']: - xerr = 0.5 - else: - xerr = 0.5*(bindata['UpEdge'] - bindata['LowEdge']) - yval = bindata['Content'] - yerr = bindata['Error'] - if opts.SPLITHISTOS: - self.write_datapointset_header(f, count, bin) - self.write_datapoint(f, xval, xerr, yval, yerr) - if opts.SPLITHISTOS: - self.write_datapointset_footer(f) - if not opts.SPLITHISTOS: - self.write_datapointset_footer(f) +########################################################## if __name__ == "__main__": - from optparse import OptionParser + + ## Default plot file search paths + default_plotdirs = ["."] + try: + import rivet + default_plotdirs += rivet.getAnalysisPlotPaths() + except: + pass + + + ## Parse command line options + from optparse import OptionParser, OptionGroup parser = OptionParser(usage=__doc__) + parser.add_option("-o", "--output", default=None, + help="Write all histos to a single output file. " + "stdout can be explicitly specified by setting '-' as the output filename. This option will " + "be disregarded if --split is specified.", + dest="OUTPUT") parser.add_option("-s", "--split", action="store_true", default=False, - help="Split histograms into individual files", dest="SPLITHISTOS") + help="Split histograms into individual files", dest="SPLITOUTPUT") + parser.add_option("--plotinfodir", dest="PLOTINFODIR", action="append", + default=default_plotdirs, help="directory which may contain plot header information") + parser.add_option("-m", "--match", action="append", + help="Only write out histograms whose $path/$name string matches these regexes", + dest="PATHPATTERNS") + parser.add_option("-M", "--unmatch", action="append", + help="Exclude histograms whose $path/$name string matches these regexes", + dest="PATHUNPATTERNS") + verbgroup = OptionGroup(parser, "Verbosity control") + verbgroup.add_option("-v", "--verbose", action="store_const", const=logging.DEBUG, dest="LOGLEVEL", + default=logging.INFO, help="print debug (very verbose) messages") + verbgroup.add_option("-q", "--quiet", action="store_const", const=logging.WARNING, dest="LOGLEVEL", + default=logging.INFO, help="be very quiet") opts, args = parser.parse_args() + + ## Configure logging + logging.basicConfig(level=opts.LOGLEVEL, format="%(message)s") + + + ## Initialise steering variables which need a bit more care + import re + if opts.PATHPATTERNS is None: + opts.PATHPATTERNS = [] + opts.PATHPATTERNS = [re.compile(r) for r in opts.PATHPATTERNS] + if opts.PATHUNPATTERNS is None: + opts.PATHUNPATTERNS = [] + opts.PATHUNPATTERNS = [re.compile(r) for r in opts.PATHUNPATTERNS] + + + ## Check that at least one file has been supplied if len(args) < 1: - sys.stderr.write("Must specity at least one histogram file\n") + sys.stderr.write("Must specity at least one histogram file (or stdin)\n") sys.exit(1) - for flatfile in args: - filename = flatfile.replace(".dat", "") - inputdata = Inputdata(filename) - - f = open(filename+'.aida', 'w') - f.write('<?xml version="1.0" encoding="ISO-8859-1" ?>\n') - f.write('<!DOCTYPE aida SYSTEM "http://aida.freehep.org/schemas/3.3/aida.dtd">\n') - f.write('<aida version="3.3">\n') - f.write(' <implementation version="1.1" package="FreeHEP"/>\n') + ## Add directories to the plotinfo path + for flatfile in args: + if flatfile != "-": + flatdir = os.path.dirname(flatfile) + if flatdir not in opts.PLOTINFODIR: + opts.PLOTINFODIR.append(flatdir) + ## Remove empty path entries + opts.PLOTINFODIR = filter(lambda s: len(s) > 0, opts.PLOTINFODIR) + ## Create plot file parser + plotparser = lighthisto.PlotParser(opts.PLOTINFODIR) - for i, d in enumerate(inputdata.description['DrawOnly']): - inputdata.histos[d].write_datapointset(f, i+1) - f.write('</aida>\n') - f.close + ## Run over the files and build histo objects selected by the pattern filtering + histos = {} + for flatfile in args: + if flatfile != "-" and not os.access(flatfile, os.R_OK): + logging.error("%s can not be read" % flatfile) + sys.exit(1) + try: + allhistos = lighthisto.Histo.fromFlat(flatfile) + except Exception, e: + logging.error("%s can not be parsed" % flatfile) + print e + sys.exit(1) + + for histpath, hist in allhistos.iteritems(): + useThis = True + if opts.PATHPATTERNS: + useThis = False + for regex in opts.PATHPATTERNS: + if regex.search(histpath): + useThis = True + break + if useThis and opts.PATHUNPATTERNS: + for regex in opts.PATHUNPATTERNS: + if regex.search(histpath): + useThis = False + break + if useThis: + try: + plotparser.updateHistoHeaders(hist) + except ValueError, err: + logging.debug(err) + histos.setdefault(flatfile, []).append(hist) + + + ## Write output + if histos: + ## Split output per-histogram + if opts.SPLITOUTPUT: + for f, hs in sorted(histos.iteritems()): + for h in sorted(hs): + histo = h.fullPath()[1:].replace("/", "_") + outfile = "%s.aida" % histo + #print "Writing to", outfile + out = open(outfile, "w") + out.write('<?xml version="1.0" ?>\n') + out.write('<!DOCTYPE aida SYSTEM "http://aida.freehep.org/schemas/3.3/aida.dtd">\n') + out.write('<aida version="3.3">\n') + out.write(' <implementation version="1.1" package="Rivet"/>\n') + out.write(h.asAIDA()) + out.write('</aida>\n') + out.close() + ## Write all output to a single file (stdout by default) + elif opts.OUTPUT: + outfile = opts.OUTPUT + if outfile == "-": + out = sys.stdout + else: + out = open(outfile, "w") + out.write('<?xml version="1.0" ?>\n') + out.write('<!DOCTYPE aida SYSTEM "http://aida.freehep.org/schemas/3.3/aida.dtd">\n') + out.write('<aida version="3.3">\n') + out.write(' <implementation version="1.1" package="Rivet"/>\n') + for f, hs in sorted(histos.iteritems()): + for h in sorted(hs): + out.write(h.asAIDA()) + out.write('</aida>\n') + if outfile != "-": + out.close() + ## Split output per-infile + else: + for f, hs in sorted(histos.iteritems()): + outfile = os.path.basename(f).replace(".dat", ".aida") + if f == "-": + outfile = "out.dat" + out = open(outfile, "w") + out.write('<?xml version="1.0" ?>\n') + out.write('<!DOCTYPE aida SYSTEM "http://aida.freehep.org/schemas/3.3/aida.dtd">\n') + out.write('<aida version="3.3">\n') + out.write(' <implementation version="1.1" package="Rivet"/>\n') + for h in sorted(hs): + out.write(h.asAIDA()) + out.write('</aida>\n') + out.close() Modified: trunk/pyext/lighthisto.py ============================================================================== --- trunk/pyext/lighthisto.py Sun May 8 14:16:28 2011 (r3084) +++ trunk/pyext/lighthisto.py Sun May 8 18:32:55 2011 (r3085) @@ -1,8 +1,7 @@ # Use posixpath instead of os.path for AIDA path handling to be platform # independent, i.e. always use "/" as path delimiter. import posixpath -import os -import re +import os, sys, re from htmlentitydefs import codepoint2name @@ -248,7 +247,7 @@ br[0] <= curran[1] <= br[1])): new.addBin(b) else: - logging.debug("Chopping bin %s: %e" % (self.fullPath(), b.getBinCenter())) + sys.stderr.write("Chopping bin %s: %e\n" % (self.fullPath(), b.getBinCenter())) return new def renormalise(self, newarea): @@ -321,15 +320,25 @@ @classmethod - def fromFlat(cls, stringbuf): - """Build a histogram from a string buffer containing flat-format.""" + def fromFlatHisto(cls, stringbuf): + """Build a histogram from its flat text representation. + """ desc = {} new = cls() - for line in stringbuf: - line = line.rstrip() - if "=" in line: - linearray = line.split("=", 0) + for line in stringbuf.splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + if 'BEGIN HISTOGRAM' in line: + fullpath = line.split('BEGIN HISTOGRAM', 1)[1].strip() + new.path = os.path.dirname(fullpath) + new.name = os.path.basename(fullpath) + continue + elif "=" in line: + linearray = line.split("=", 1) desc[linearray[0]] = linearray[1] + elif 'END HISTOGRAM' in line: + break else: linearray = line.split() if len(linearray) == 4: @@ -341,8 +350,10 @@ float(linearray[2]), float(linearray[3]), float(linearray[4]))) else: - logging.error("Unknown line format in '%s'" % (line)) - new.path, new.name = posixpath.split(desc["AidaPath"]) + sys.stderr.write("Unknown line format in '%s'\n" % line) + ## Apply special annotations as histo obj attributes + if desc.has_key("AidaPath"): + new.path, new.name = posixpath.split(desc["AidaPath"]) if desc.has_key("Title"): new.title = desc["Title"] if desc.has_key("XLabel"): @@ -353,16 +364,51 @@ @classmethod + def fromFlat(cls, path): + """Load all histograms in file 'path' into a histo-path=>histo dict. + + The keys of the dictionary are the full paths of the histogram, i.e. + AnalysisID/HistoID, a leading "/REF" is stripped from the keys. + """ + runhistos = dict() + if path == "-": + f = sys.stdin + else: + f = open(path, "r") + fullpath = None + s = "" + for line in f: + if "BEGIN HISTOGRAM" in line: + fullpath = line.split('BEGIN HISTOGRAM', 1)[1].strip() + # TODO: Really? Here? + if fullpath.startswith("/REF"): + fullpath = fullpath[4:] + if fullpath: + s += line + if "END HISTOGRAM" in line: + runhistos[fullpath] = cls.fromFlatHisto(s) + ## Reset for next histo + fullpath = None + s = "" + if f is not sys.stdin: + f.close() + return runhistos + + + @classmethod def fromAIDA(cls, path): """Load all histograms in file 'path' into a histo-path=>histo dict. The keys of the dictionary are the full paths of the histogram, i.e. - AnaylsisID/HistoID, a leading "/REF" is stripped from the keys. + AnalysisID/HistoID, a leading "/REF" is stripped from the keys. + + TODO: /REF stripping should really happen in user code... """ runhistos = dict() tree = ET.parse(path) for dps in tree.findall("dataPointSet"): fullpath = posixpath.join(dps.get("path"), dps.get("name")) + # TODO: Really? Here? if fullpath.startswith("/REF"): fullpath = fullpath[4:] runhistos[fullpath] = cls.fromDPS(dps)
More information about the Rivet-svn mailing list |