### ### AWK Script to clean up an OpenPegasus trace by modifiying the info ### for a number of fields including ### - Change from absolute time to relative time to make seeing ### time between events easier ### - Change from numberic thread ID to symbolic threadID where the ### symbolic thread ID is THREADxx ### - Indent XML that is not indented in the trace ### - Display only trace lines after a defined point where the ### - defined point is the first trace line with a defined Component ### The component is defined by the startElement variable on the ### command line with default of XmlIO ### - set Marker on selected lines. Selection defined by input file ### This requires match of each item in the input array of strings ### against each line ### #### TODO #### Add filter on display by component type #### Add color for particular things we find (Color would be option vs some #### other marker for particular things (ex. error found) #### Find way to trace through complete operation from thread to thread #### Find way to match enter and exit calls. Could be indent or some marker. #### Would probably assume same thread #### NOTE: This code uses one regex expression specific to to posix awk #### the expression interval (i.e. the {number}. This may requie #### that the calling function include either the --posix or #### -re-interval option on the command line to include this #### functionality. If the time and thread info do not translate #### it is probably this incompatibility.. #### BEGIN { false = 0 ## defintion of true and false values for readability true = 1 startTime = 0 ## Time of first trace entry threadID = 1 ## variable to hold current thread number processID = 1 ## variable to hold current process number showOutput = 0 ## Boolean flag defines when output display is started indent = 0 ## indent level for xml isCompleteElement = false; keywordsExist = false ## Boolean enables or disables keyword search ## disabled unless keywords variable is non-zero outputLineCount = 0 ## count of lines displayed namesFile = "FileNames.txt" ## default name for the nameFile, the file that ## is built at end and contains summary information fileName = "None" ## filename on input. May be supplied as variable or ## or derived from first input record outputFileSuffix = ".fmtd" outputFileName = fileName outputFileSuffix markThisLine = false ## indicating that the current line will be marked ## with leading + per markCount = 0 ## count of marks set. ## If the name of the trace components is not defined on the cmd line ## default it to XmlIO so that we do not show anything until the ## first XmlIO record is encountered. ## NOTE: This is too limited in that it does not allow having ## no startElement, i.e. display starting at first line of trace. print "Cmd line input of startElement=" startElement "." "length= " length(startElement) if (startElement == "") { startElement = "XmlIO" } if (length(startElement)) { print "Display Starts at first Trace line component = " startElement } else { print "Display starts at first line of original trace" } ## Get the pegasus variables pegRoot = ENVIRON["PEGASUS_ROOT"] ## Create the keywordRegex from list of possible strings ## Comma separated list of keywords which will be marked on output keywords = "XmlIO,error,IMETHODCALL,TestServerProfileProvider,Request: queue id:," keywords = keywords "Response has error,Enumeration Set Error msg,After putCache insert," keywords = keywords "TestServerProfileProvider:,putcache,getcache," keywords = keywords "setupFuture,_issueImmediateOpenOr,_issueOpenOr" keywords = keywords "IMETHODRESPONSE,SetupFutureResponse," keywords = keywords "ContextId=," keywords = keywords "setProcessingState," keywords = keywords "setProcessingState," keywords = keywords "StopTimer," keywords = keywords "StartTimer" if (keywords == "") { ##keywords = "error,IMETHODCALL,TestServerProfileProvider" } ## Need to test to be sure no | in keywords if (keywords != "") { keywordsExist = true ## split the input keyword list and create a regex or statement numKeywords = split(keywords, keyWordArray,",") if (numKeywords > 1) { keywordRegex = keyWordArray[1] for ( i = 2; i <= numKeywords; i++) { keywordRegex = keywordRegex "|" keyWordArray[i] } } print "Mark lines with the following keywords", keywordRegex } ## Exclude components filter. This is a command line input parameter ## TODO all these should be input in form a,b,c and we map to the ## regex. excludeComponentFilter = "" if (excludeComponentFilter != "") { print "Excluding the following components from output", excludeComponentFilter } } ############################################################################ ## ## Process the file ## ########################################################################### ## ## Get the first input file name and create the output file name based on ## this name. If filename was already set, do not change. This is done once ## on the first line input because the name on the input line does not exist ## in FILENAME until after the first read. FNR == NR && NR == 1 { fileName = FILENAME if (match(fileName,"\\.tmp$")) { ## remove the tmp suffix since we want only the main file name ## for output sub("\\.tmp$", "", fileName) } ## define the output file name and initialize it empty outputFileName = fileName outputFileSuffix print "output fileName= ", outputFileName } ## Trace element name that acts as flag to turn on trace. Output is enabled ## first time the element name defined by startElement is encountered. ## Once turned on, there is no way to turn off the output !(showOutput) && ($2 == startElement) { showOutput = 1 print "Start Display at Line " FNR " in file " FILENAME " TraceComponent " startElement } ## Test to see if this line is to be marked by matching to the regexList ## If it matches, set the markThisLine variable so it can be marked on output (keywordsExist){ if (match($0,keywordRegex)) { markThisLine = true } else { markThisLine = false } } ## Scan line for XML Line that is not indented. We will indent all ## consecutive XML lines in accord with xml rules ## TODO: Modify so we do not indent the first line. { if($0 ~ /^") { indent = -1 } } ## non-consecutive xml line found. turn off xml indent else { #### print "FOUND XML END" inXMLState = false } } ## Indent any unindented XML ## If inXml state indent the line by indent variable ## this indents all consecutive lines that start with the XML left caret ## character in line with their xml indent level. ## NOTE: It does not indent any case of xml on a single line (inXMLState) { #### DEBUG for xml displayprint "FOUND NEXT XML Line", "indent=" indent, " isComplete " isCompleteElement, $0 ## if this is not complete element (i.e start and end on single line ## include as possible terminator on same line ### issue with element that terminates on same line. needs to indent and ### then outdent after print. ## ## ## ## ## elementName = substr($1,2) terminator = "$" ## if this line is an entity close element ## TODO why the second char is /. seems to match if ($0 ~ "^$") { type = 1 indent-- isCompleteElement = true } ## if this is a complete start and end pair else if (($0 ~ "^<[A-Z]+>.+$")) { type = 6 ## Really should match the element name begin and end ## complete open and close of an element if (!isCompleteElement) { indent++ } isCompleteElement = true } ## else if this is a complete element ## This is NOT a 5. It is a 2 ## 11 2 ## 11 5 This should be type 2 else if ($0 ~ "^<[A-Z]+[a-z=\" ]*/>$") { type = 2 if (!isCompleteElement) { indent++ } isCompleteElement = true } ## if there is a complete start element and end element ## PG:127-0-1-1 else if($0 ~ terminator) { type = 3 if (!isCompleteElement) { indent++ isCompleteElement = true } } else { ## if terminating element if ($0 ~ "^> outputFileName printf("%s%s\n", marker, $0) >> outputFileName } function USAGE() { print "Usage: awk --posix -f formattrace.awk [variables] [inputTraceName]" print "Formats and organizes the OpenPegasus trace files " print "and produces an output file named CIM_Tr_*.fmtd" print "That:" print " - consolidates the multiple trace files into a single file, " print " - modifies the time relative to the first displayed entry" print " - renames the thread and process ids to something more readable" print " - Marks selected log entries so they can be found easily" print " - Excludes selected comonents" print " - Cuts off log entries before the first instance of a defined" print " component (default XmlIO) which also serves as the relative" print " - start time." print "where --posix is required for awk compatibility" print " -f defines this awk file name" print " variables are any variables that may be used by the program" print " in the form -v name=value" print " startElement=ComponentName that will start display output" print " where default is XmlIO" } ############################################################################ ## Close by outputting the summary information to the namesFile ############################################################################ END{ if (!(showOutput)) { print "ERROR: Never found trace component to start display" print "ERROR: Never found trace component to start display" >> namesFile } print "Input " NR " lines from file " fileName "." >> namesFile ## The function not defined getStart = strftime("%b %d %Y-%T", startTime) ## apparentlynot defined because of posix compatibility flag. print "Original time for first record was " startTime " (" getStartTime ")" print markCount " lines marked" >> namesFile print "marks are: " keywordRegex >>namesFile print "Output " outputLineCount " lines to " outputFileName >> namesFile print "Thread IDs for ", arrayLength(threadNameArray) " threads" >> namesFile for (item in threadNameArray) { print item, threadNameArray[item] >> namesFile } print "" >> namesFile print "Process IDs for " arrayLength(processNameArray) " processes" >> namesFile for (item in processNameArray) { print item, processNameArray[item] >> namesFile } print "Count of functions called = " arrayLength(functionArray) >> namesFile for (item in funtionArray) { if ((functionArray[item] % 2) == 1) { print "No exit match for function" item >> namesFile } } for (item in funtionArray) { print item " "(functionArray[item] / 2) " " >> namesFile } ## show this on console as last thing because it is handy for next ## step which is usually edit of the file. print "output fileName= ", outputFileName } ########################################################################### ## Miscellaneous functions ## function to get length of an array. This exists because awk inconsistent ## between implementation about the existence of a size or length function ## for arrays. One input, the array. Returns the count if items in the ## array. The variable i and k are local function arrayLength( a, i, k) { for( i in a) k++ return k }