#!/usr/bin/env python3

import os
import sys
from optparse import OptionParser

def getDSName(basename, n):
    """ Helper for composing an output dataset name from the input dataset name.
    Uses the dataset ID (DSID) and AMI tags from the full input dataset name
    """
    n =  n.split(":")[-1]
    split_dsname = n.split(".")
    add = 0
    # The open data for research comes with different scopes
    # and the jobs submission script needs to be adjusted accordingly
    if "user.zmarshal" in n:
        dsid = split_dsname[2].split("_")[0]
        tags = "_".join(split_dsname[2].split("_")[1:])
    elif "opendata_pp" in n:
        dsid = split_dsname[-1].split("_")[2]
        tags = split_dsname[-1].split("_")[5]
    else:
        dsid = split_dsname[1]
        tags = split_dsname[5]
    name = basename
    name += dsid
    name += "."
    name += tags
    return name

def createTempInput(cur_ds):
    """ Create a temporary text file with our input dataset for running
    """
    with open("input.txt","w") as temp_file:
        temp_file.write(cur_ds+"\n")
    return

def submit(cur_ds,dsName,nfiles=0):
    """ Do the job submission with prun.
    This assumes you have already set up pathena tools (`lsetup pathena`)
    """
    comm = 'prun --excludeFile=*build*/*,*run*/*'
    comm += ' --exec "OpenDataNtupler.py -i input.txt && mv submitDir/data-ANALYSIS/output.root output_ntup.root && mv submitDir/hist-output.root output_hist.root"'
    comm += f' --outDS="user.{os.environ[\'USER\']}.{dsName}"'
    comm += ' --outputs output_ntup:output_ntup.root,output_hist:output_hist.root --maxNFilesPerJob '
    if nfiles: comm += '--maxNFilesPerJob '+str(nfiles)
    comm += ' --writeInputToTxt'
    comm += ' IN:input.txt'
    comm += ' --useAthenaPackages'
    comm += ' --inDsTxt=./input.txt'
    #print(comm)
    os.system(comm)
    return

if __name__=="__main__":

    required = ["inputfilelist","prefix"]

    usage = "usage: %prog [options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-i", "--inputs", action="store", dest="inputfilelist", help="Text file containing list of datasets, one dataset per line [REQUIRED]")
    parser.add_option("-p", "--prefix", action="store", dest="prefix", help="Grid job name prefix, e.g. run_version8 [REQUIRED]")
    parser.add_option("-n", "--maxNFilesPerJob", action="store", dest="maxNFilesPerJob", help="Max number of files from dataset per grid job", default=0)

    (opts, args) = parser.parse_args()

    for r in required:
        if opts.__dict__[r] is None:
            parser.error("parameter %s required"%r)

    # Always add a '.' to the prefix we are provided
    basename = opts.__dict__["prefix"]+"."
    # Option to limit the number of files per grid job
    nfiles = opts.__dict__["maxNFilesPerJob"]

    # Open up the input file list, loop through all the datasets there
    with open(opts.__dict__["inputfilelist"],"r") as datasets:
        for line in datasets:
            # Handy way to comment out files to not run over in the input list
            if "#" in line: continue
            cur_ds = line.rstrip()
            # Create a temporary file for submitting the job
            createTempInput(cur_ds)
            # Compose the output dataset name from the input dataset name
            dsName = getDSName(basename, cur_ds)
            # Submit the job to the grid
            submit(cur_ds,dsName,nfiles)
