##
## This awk script consolidates multiple OpenPegasus log files from
## the various OOP processes (Each process generates a separate log file)
## into a single log file sorted by time.  
## It consolidates multiple line log records into single line records for
## the sort process and maps the time from the original trace format
## for subsequent sort.

## It produces an output of the records from all input trace files consolidated
## into a single output stream to standard output

## awk -f consolidateTrace.awk [input file list] >outputfile

## 
## This script substitutes for the EOL character in those lines where
## the EOL does not represent a complete record for the input files and
## outputs the result to a temp output file the substitution is defined in
## localRS below. This is to ensure that multiline trace elements (ex.
## the XML output) gets sorted as a single entity.

BEGIN {
    accum = ""
    ## local record separator.  NOTE: This may be to simplistic since we
    ## are just using a single character (formfeed) as the separator right
    ## now. We could consider a more complex pattern but then would have
    ## to modify also the script that reseparates this into multiple lines.
    LocalRS = "\f"
    namesFile = "FileNames.txt"
}

## This line is not a complete log record, time field not correct
## add the current record to the record accumulator with the localRS
## (Record Separator) since this is  a multiline log record (i.e. no time as
## first field)
$0 !~ /^[0-9]{10}s-[0-9]{1,6}us: / {
    accum = accum LocalRS $0
}

## For all trace records (i.e. with time), accumulate everything until the next
## trace record in an accumulator record. Also map to a fixed length output
## for the time field so that it sorts properly.  This is because the
## microseconds part today is variable length (i.e. number of microseconds
## as opposed to parts of a second so that the field length is variable

/^[0-9]{10}s-[0-9]{1,6}us: / {
    ## Normalize the time field for a sort from  seconds and microseconds
    ## seconds.microseconds as a single decimal number of fixed length so 
    ## we can use the sort program correctly.
    split($1, timeFields,"-")
    sub("s$","",timeFields[1])
    sub("us:$","",timeFields[2])

    ## expand the us field to 6 characters.
    while (length(timeFields[2]) < 6)
    {
        timeFields[2] = "0" timeFields[2]
    }
    $1 = timeFields[1] "." timeFields[2] ":"

    if (accum != "")
    {
        print accum
        accum = ""
    }
    accum = $0
}

## get the filename if this is the first record of a file
## Assumes the first record is a valid log record.
FNR == 1{
    ## get the process id and save the file name for that process ID
    processID = substr($3,2,4)
    filenameArray[processID] = FILENAME
}

## output the array of process IDs vs filenames into a file for later use.
END {
    for (id in filenameArray)
    {
        print  id, filenameArray[id] >> namesFile
    }
}