|
[Rivet-svn] r2847 - in trunk: . binblackhole at projects.hepforge.org blackhole at projects.hepforge.orgSat Dec 11 21:03:58 GMT 2010
Author: buckley Date: Sat Dec 11 21:03:58 2010 New Revision: 2847 Log: Adding an 3600 second timeout for initialising an event file. If it takes longer than (or anywhere close to) this long, chances are that the event source is inactive for some reason (perhaps accidentally unspecified and stdin is not active, or the event generator has died at the other end of the pipe. The reason for not making it something shorter is that e.g. Herwig++ or Sherpa can have long initialisation times to set up the MPI handler or to run the matrix element integration. An timeout after an hour is still better than a batch job which runs for two days before you realise that you forgot to generate any events! Modified: trunk/ChangeLog trunk/bin/rivet Modified: trunk/ChangeLog ============================================================================== --- trunk/ChangeLog Sat Dec 11 19:26:18 2010 (r2846) +++ trunk/ChangeLog Sat Dec 11 21:03:58 2010 (r2847) @@ -1,3 +1,16 @@ +2010-12-11 Andy Buckley <andy at insectnation.org> + + * Adding an 3600 second timeout for initialising an event file. If + it takes longer than (or anywhere close to) this long, chances are + that the event source is inactive for some reason (perhaps + accidentally unspecified and stdin is not active, or the event + generator has died at the other end of the pipe. The reason for + not making it something shorter is that e.g. Herwig++ or Sherpa + can have long initialisation times to set up the MPI handler or to + run the matrix element integration. An timeout after an hour is + still better than a batch job which runs for two days before you + realise that you forgot to generate any events! + 2010-12-10 Andy Buckley <andy at insectnation.org> * Fixing unbooked-histo segfault in UA1_1990_S2044935 at 63 GeV. Modified: trunk/bin/rivet ============================================================================== --- trunk/bin/rivet Sat Dec 11 19:26:18 2010 (r2846) +++ trunk/bin/rivet Sat Dec 11 21:03:58 2010 (r2847) @@ -349,7 +349,7 @@ if n % 10000 == 0: nevtloglevel = logging.CRITICAL timecurrent = time.time() - timeelapsed = timecurrent - starttime; + timeelapsed = timecurrent - starttime if maxevtnum is None: logging.log(nevtloglevel, "Event %d (%d s elapsed)" % (n, timeelapsed)) else: @@ -368,9 +368,10 @@ a_up = a.upper() ## Print warning message and exit if not a valid analysis name if not a_up in all_analyses: - print "'%s' is not a valid analysis. Available analyses are:" % a_up + logging.warning("'%s' is not a valid analysis. Available analyses are:" % a_up) for aa in all_analyses: - print " %s" % aa + logging.warning(" %s" % aa) + logging.warning("Exiting...") sys.exit(1) logging.debug("Adding analysis '%s'" % a_up) ah.addAnalysis(a_up) @@ -387,34 +388,63 @@ import platform logging.info("Rivet running on machine %s (%s)" % (platform.node(), platform.machine())) + +## Timeout handlers for the event initialisation and loop +EVENT_TIMEOUT = 600 +def evtinithandler(signum, frame): + logging.warn("It has taken more than %d secs to get the first event! Is the input event stream working?" % EVENT_TIMEOUT) + raise Exception("Event initialisation timeout") +# def evtloophandler(signum, frame): +# global evtnum, HEPMCFILES +# logging.warn("Event #%i timeout: it has taken more than %d secs to process this event! Is the input event stream working?" % (evtnum, EVENT_TIMEOUT)) +# logging.warn("Abandoning processing of events from %s; is this input event stream actually working?" % hepmcfile) +# raise Exception("Event generation timeout") + + ## Init run based on one event -evtfile = HEPMCFILES[0] -if not run.init(evtfile): - logging.error("Failed to initialise on event file %s" % evtfile) - sys.exit(2) +hepmcfile = HEPMCFILES[0] +signal.signal(signal.SIGALRM, evtinithandler) +signal.alarm(EVENT_TIMEOUT) +try: + init_ok = run.init(hepmcfile) + if not init_ok: + logging.error("Failed to initialise on event file %s... exiting" % hepmcfile) + sys.exit(2) +except: + logging.error("Timeout in initialisation from event file %s... exiting" % hepmcfile) + sys.exit(3) + ## Event loop -starttime = time.time() evtnum = 0 -for fileidx in range(len(HEPMCFILES)): - logging.info("Reading events from '%s'" % HEPMCFILES[fileidx]) +starttime = time.time() +for fileidx, hepmcfile in enumerate(HEPMCFILES): + ## Open next HepMC file (does not apply to first file: it was already used for the run init) + if fileidx > 0: + run.openFile(hepmcfile) + if not run.readEvent(): + logging.warning("Could not read events from '%s'" % hepmcfile) + continue + logging.info("Reading events from '%s'" % hepmcfile) while opts.MAXEVTNUM is None or evtnum < opts.MAXEVTNUM: evtnum += 1 logNEvt(evtnum, starttime, opts.MAXEVTNUM) - if not run.processEvent(): + ## Process this event + processed_ok = run.processEvent() + if not processed_ok: logging.warn("Event processing failed for evt #%i!" % evtnum) break + ## Exit the loop if signalled if RECVD_KILL_SIGNAL is not None: break - if not run.readEvent(): + ## Read next event + read_ok = run.readEvent() + if not read_ok: break + ## Write a histo file snapshot if appropriate if opts.HISTO_WRITE_INTERVAL is not None: if evtnum % opts.HISTO_WRITE_INTERVAL == 0: ah.writeData(opts.HISTOFILE) - if fileidx < len(HEPMCFILES)-1: - run.openFile(HEPMCFILES[fileidx+1]) - if not run.readEvent(): - continue logging.info("Finished event loop") run.finalize()
More information about the Rivet-svn mailing list |