Return to formatTrace.awk CVS log | Up to [Pegasus] / pegasus_unsupported / scripts / formattrace |
File: [Pegasus] / pegasus_unsupported / scripts / formattrace / formatTrace.awk
(download)
Revision: 1.2, Sat Nov 8 19:41:03 2014 UTC (9 years, 7 months ago) by karl Branch: MAIN CVS Tags: HEAD Changes since 1.1: +29 -11 lines BUG#:9999 TITLE: Add scripts for chotesting and update the formattrace scripts DESCRIPTION: These are unsupported test extensions for use in pegasus development and testing. Each has help and readme files. |
### ### AWK Script to clean up an OpenPegasus trace by modifiying the info ### for a number of fields including ### - Change from absolute time to relative time to make seeing ### time between events easier ### - Change from numberic thread ID to symbolic threadID where the ### symbolic thread ID is THREADxx ### - Indent XML that is not indented in the trace ### - Display only trace lines after a defined point where the ### - defined point is the first trace line with a defined Component ### The component is defined by the startElement variable on the ### command line with default of XmlIO ### - set Marker on selected lines. Selection defined by input file ### This requires match of each item in the input array of strings ### against each line ### #### TODO #### Add filter on display by component type #### Add color for particular things we find (Color would be option vs some #### other marker for particular things (ex. error found) #### Find way to trace through complete operation from thread to thread #### Find way to match enter and exit calls. Could be indent or some marker. #### Would probably assume same thread #### NOTE: This code uses one regex expression specific to to posix awk #### the expression interval (i.e. the {number}. This may requie #### that the calling function include either the --posix or #### -re-interval option on the command line to include this #### functionality. If the time and thread info do not translate #### it is probably this incompatibility.. #### BEGIN { false = 0 ## defintion of true and false values for readability true = 1 startTime = 0 ## Time of first trace entry threadID = 1 ## variable to hold current thread number processID = 1 ## variable to hold current process number showOutput = 0 ## Boolean flag defines when output display is started indent = 0 ## indent level for xml isCompleteElement = false; keywordsExist = false ## Boolean enables or disables keyword search ## disabled unless keywords variable is non-zero outputLineCount = 0 ## count of lines displayed namesFile = "FileNames.txt" ## default name for the nameFile, the file that ## is built at end and contains summary information fileName = "None" ## filename on input. May be supplied as variable or ## or derived from first input record outputFileSuffix = ".fmtd" outputFileName = fileName outputFileSuffix markThisLine = false ## indicating that the current line will be marked ## with leading + per markCount = 0 ## count of marks set. ## If the name of the trace components is not defined on the cmd line ## default it to XmlIO so that we do not show anything until the ## first XmlIO record is encountered. ## NOTE: This is too limited in that it does not allow having ## no startElement, i.e. display starting at first line of trace. print "Cmd line input of startElement=" startElement "." "length= " length(startElement) if (startElement == "") { startElement = "XmlIO" } if (length(startElement)) { print "Display Starts at first Trace line component = " startElement } else { print "Display starts at first line of original trace" } ## Get the pegasus variables pegRoot = ENVIRON["PEGASUS_ROOT"] ## Create the keywordRegex from list of possible strings ## Comma separated list of keywords which will be marked on output keywords = "XmlIO,error,IMETHODCALL,TestServerProfileProvider,Request: queue id:," keywords = keywords "Response has error,Enumeration Set Error msg,After putCache insert," keywords = keywords "TestServerProfileProvider:,putcache,getcache," keywords = keywords "setupFuture,_issueImmediateOpenOr,_issueOpenOr" keywords = keywords "IMETHODRESPONSE,SetupFutureResponse," keywords = keywords "ContextId=," keywords = keywords "setProcessingState," keywords = keywords "setProcessingState," keywords = keywords "StopTimer," keywords = keywords "StartTimer" if (keywords == "") { ##keywords = "error,IMETHODCALL,TestServerProfileProvider" } ## Need to test to be sure no | in keywords if (keywords != "") { keywordsExist = true ## split the input keyword list and create a regex or statement numKeywords = split(keywords, keyWordArray,",") if (numKeywords > 1) { keywordRegex = keyWordArray[1] for ( i = 2; i <= numKeywords; i++) { keywordRegex = keywordRegex "|" keyWordArray[i] } } print "Mark lines with the following keywords", keywordRegex } ## Exclude components filter. This is a command line input parameter ## TODO all these should be input in form a,b,c and we map to the ## regex. excludeComponentFilter = "" if (excludeComponentFilter != "") { print "Excluding the following components from output", excludeComponentFilter } } ############################################################################ ## ## Process the file ## ########################################################################### ## ## Get the first input file name and create the output file name based on ## this name. If filename was already set, do not change. This is done once ## on the first line input because the name on the input line does not exist ## in FILENAME until after the first read. FNR == NR && NR == 1 { fileName = FILENAME if (match(fileName,"\\.tmp$")) { ## remove the tmp suffix since we want only the main file name ## for output sub("\\.tmp$", "", fileName) } ## define the output file name and initialize it empty outputFileName = fileName outputFileSuffix print "output fileName= ", outputFileName } ## Trace element name that acts as flag to turn on trace. Output is enabled ## first time the element name defined by startElement is encountered. ## Once turned on, there is no way to turn off the output !(showOutput) && ($2 == startElement) { showOutput = 1 print "Start Display at Line " FNR " in file " FILENAME " TraceComponent " startElement } ## Test to see if this line is to be marked by matching to the regexList ## If it matches, set the markThisLine variable so it can be marked on output (keywordsExist){ if (match($0,keywordRegex)) { markThisLine = true } else { markThisLine = false } } ## Scan line for XML Line that is not indented. We will indent all ## consecutive XML lines in accord with xml rules ## TODO: Modify so we do not indent the first line. { if($0 ~ /^</) { #### print "FOUND XML START" inXMLState = true if ($1 == "<?xml") { indent = 0 } if ($1 == "</CIM>") { indent = -1 } } ## non-consecutive xml line found. turn off xml indent else { #### print "FOUND XML END" inXMLState = false } } ## Indent any unindented XML ## If inXml state indent the line by indent variable ## this indents all consecutive lines that start with the XML left caret ## character in line with their xml indent level. ## NOTE: It does not indent any case of xml on a single line (inXMLState) { #### DEBUG for xml displayprint "FOUND NEXT XML Line", "indent=" indent, " isComplete " isCompleteElement, $0 ## if this is not complete element (i.e start and end on single line ## include </elementName> as possible terminator on same line ### issue with element that terminates on same line. needs to indent and ### then outdent after print. ## <IMETHODCALL NAME="References"> ## <LOCALNAMESPACEPATH> ## <NAMESPACE NAME="test"/> ## <NAMESPACE NAME="TestProvider"/> ## </LOCALNAMESPACEPATH> elementName = substr($1,2) terminator = "</" elementName ">$" ## if this line is an entity close element ## TODO why the second char is /. seems to match </A/> if ($0 ~ "^</[A-Z]/>$") { type = 1 indent-- isCompleteElement = true } ## if this is a complete start and end pair else if (($0 ~ "^<[A-Z]+>.+</[A-Z]+>$")) { type = 6 ## Really should match the element name begin and end ## complete open and close of an element if (!isCompleteElement) { indent++ } isCompleteElement = true } ## else if this is a complete element ## <NAMESPACE NAME="PG_InterOp"/> This is NOT a 5. It is a 2 ## 11 2 <NAMESPACE NAME="root"/> ## 11 5 <NAMESPACE NAME="PG_InterOp"/> This should be type 2 else if ($0 ~ "^<[A-Z]+[a-z=\" ]*/>$") { type = 2 if (!isCompleteElement) { indent++ } isCompleteElement = true } ## if there is a complete start element and end element ## <KEYVALUE VALUETYPE="string">PG:127-0-1-1</KEYVALUE> else if($0 ~ terminator) { type = 3 if (!isCompleteElement) { indent++ isCompleteElement = true } } else { ## if terminating element if ($0 ~ "^</") { ##if ((indent != 0) && (type != 3)) if ((indent != 0) && isCompleteElement) { indent-- } type = 4 isCompleteElement = true } else { type = 5 if (!isCompleteElement) { indent++ } isCompleteElement = false } } ## create the indent string, (2 char indent per level) indentStr = "" for (x = 0; x < indent; x++) { indentStr = indentStr " " } #### print "indentStr" indentStr "EndIndentStr", "indent=" indent, $0 ## prepend the indent string to $0 $0 = indentStr $0 } ## input record times not corrected yet. See consolidateTrace which ## should do this if called. This allows direct use of this formatter ## with .trc files without the format conversion. ## substitute the time into the time field to move from absolute seconds ## to relative seconds. This must be the first field in the input variable ## substitute time of yyyy.zzzzzz for relative sec and usec # yyyyyyyyyys-zzzzzzs absolute sec and usec at BOL ## NOTE: This function was from before we used the consolidate.awk function ## to normalize the us time field. If you use the formattrace script ## this should never be called. /^[0-9]{10}s-[0-9]{1,6}us:/ { if (showOutput) { ## Normalize the time field for a sort from seconds-microseconds ## seconds.microseconds as a single decimal number of fixed length split($1, timeFields,"-") sub("s$","",timeFields[1]) sub("us:$","",timeFields[2]) while (length(timeFields[2]) < 6) { timeFields[2] = "0" timeFields[2] } ## if this is the first entry, save the current time as start time. if (startTime == 0) { startTime = timeFields[1] } time = timeFields[1] - startTime newTime = sprintf("%.4u", time) newTime = newTime "." timeFields[2] ":" #### print "Time 1 " $1 " formatted to " newTime " starttime " startTime $1 = newTime } } ## If the file was already mapped to the new time format then we ## want to adjust to relative time which is a slightly different format ## than the original trace format. If you use the formattrace script ## this is the reformatter for the log record time field. It assumes that ## the second part of the field (us) is fixed length and the oveall format ## of the time field is ## ssssssssss.uuuuuu: /^[0-9]{10}.[0-9]{6}: / { if (showOutput) { ## if this is the first entry, save the current time as start time. split($1, timeFields,".") if (startTime == 0) { startTime = timeFields[1] } time = timeFields[1] - startTime newTime = sprintf("%.4u", time) newTime = newTime "." timeFields[2] #### print "Time 2 " $1 " formatted to " newTime " starttime " startTime $1 = newTime ## get the ProcessName, threadName, etc. from trace entry 3 ## Use the two expressions because the second one required for a very ## few trace entries (ex. header files include complete path) but makes ## the execution extremly long so we try to match on first. if (($3 ~ /^\[[0-9]{2,5}:[0-9]{5,15}:[A-Za-z0-9_.]{1,40}:[0-9]{1,6}\]/ ) || ($3 ~ /^\[[0-9]{2,5}:[0-9]{5,15}:[[:print:]]{1,120}:[0-9]{1,6}\]/ )) { split($3, procFields, ":") processName = substr(procFields[1],2) ## get and delete leading [ threadName = procFields[2] srcFileName = procFields[3] lineNum = procFields[4] ##print "Processinfo = " " |" $3 "| processName= ", processName, " ThreadName= ", threadName ## Display name for the thread. The unknown is for errors only localProcessName = "Unknown" localThreadName = "Unknown" ## if this is new process, add to the array of process names if (!(processName in processNameArray)) { ## create the new process name x = sprintf("%.2u", processID++) processNameArray[processName] = ("P" x) } localProcessName = processNameArray[processName] ## if this is new thread, add to the array of thread names. if (!(threadName in threadNameArray)) { ## create the new thread name x = sprintf("%.2u", threadID++) threadNameArray[threadName] = ("TH" x) } localThreadName = threadNameArray[threadName] ##print "ThreadElementFound ", $2, " thread info=" $3, " threadName=" threadName, "newThreadName=" newThreadName ## substitute the new name into $3 $3 = "[" localProcessName ":" localThreadName ":" srcFileName ":" lineNum } else { print "No Match for field Process:Thread:Source:LineNum field " $3 print "line " $0 } } ## Accumulate info an functions entered and exited. ## We could actually accumulte time in function here. But more complex ## because of multitask so we would have to match input to output ## threads and process. Action = $4 $5 if (Action ~ /^Enteringmethod$/) { functionArray[$6]++ } else if (Action ~ /^Exitingmedhod$/ ) { functionArray[$6]-- } } ## exclude from display any lines where the component name matches the ## excludeFilter regex. Note that this is a match against arguement 2 ## directly. This is the last item so that all other processing is complete ## before we decide to terminate. Also, this could be directly done ## in the next pattern action just as well. { if ((excludeComponentFilter != "") && !inXmlState) { if (match($2,excludeComponentFilter)) { next } } } ## print any resulting line so all lines get output.\ once showOutput has ## been enabled. This function ## inserts the marker in the line either a single + or minus showOutput { marker = "" if (markThisLine != 0) { marker = "+" markCount++ } outputLineCount++ ## debugging output ##printf("%u %u%s%s\n", indent, type, marker, $0) >> outputFileName printf("%s%s\n", marker, $0) >> outputFileName } function USAGE() { print "Usage: awk --posix -f formattrace.awk [variables] [inputTraceName]" print "Formats and organizes the OpenPegasus trace files " print "and produces an output file named CIM_Tr_*.fmtd" print "That:" print " - consolidates the multiple trace files into a single file, " print " - modifies the time relative to the first displayed entry" print " - renames the thread and process ids to something more readable" print " - Marks selected log entries so they can be found easily" print " - Excludes selected comonents" print " - Cuts off log entries before the first instance of a defined" print " component (default XmlIO) which also serves as the relative" print " - start time." print "where --posix is required for awk compatibility" print " -f defines this awk file name" print " variables are any variables that may be used by the program" print " in the form -v name=value" print " startElement=ComponentName that will start display output" print " where default is XmlIO" } ############################################################################ ## Close by outputting the summary information to the namesFile ############################################################################ END{ if (!(showOutput)) { print "ERROR: Never found trace component to start display" print "ERROR: Never found trace component to start display" >> namesFile } print "Input " NR " lines from file " fileName "." >> namesFile ## The function not defined getStart = strftime("%b %d %Y-%T", startTime) ## apparentlynot defined because of posix compatibility flag. print "Original time for first record was " startTime " (" getStartTime ")" print markCount " lines marked" >> namesFile print "marks are: " keywordRegex >>namesFile print "Output " outputLineCount " lines to " outputFileName >> namesFile print "Thread IDs for ", arrayLength(threadNameArray) " threads" >> namesFile for (item in threadNameArray) { print item, threadNameArray[item] >> namesFile } print "" >> namesFile print "Process IDs for " arrayLength(processNameArray) " processes" >> namesFile for (item in processNameArray) { print item, processNameArray[item] >> namesFile } print "Count of functions called = " arrayLength(functionArray) >> namesFile for (item in funtionArray) { if ((functionArray[item] % 2) == 1) { print "No exit match for function" item >> namesFile } } for (item in funtionArray) { print item " "(functionArray[item] / 2) " " >> namesFile } ## show this on console as last thing because it is handy for next ## step which is usually edit of the file. print "output fileName= ", outputFileName } ########################################################################### ## Miscellaneous functions ## function to get length of an array. This exists because awk inconsistent ## between implementation about the existence of a size or length function ## for arrays. One input, the array. Returns the count if items in the ## array. The variable i and k are local function arrayLength( a, i, k) { for( i in a) k++ return k }
No CVS admin address has been configured |
Powered by ViewCVS 0.9.2 |