pegasus_unsupported/scripts/formattrace/formatTrace.awk - annotate

Return to formatTrace.awk CVS log
Up to [Pegasus] / pegasus_unsupported / scripts / formattrace
  1 karl  1.1 ###
  2           ###  AWK Script to clean up an OpenPegasus trace by modifiying the info
  3           ###  for a number of fields including
  4           ###      - Change from absolute time to relative time to make seeing
  5           ###        time between events easier
  6           ###      - Change from numberic thread ID to symbolic threadID where the
  7           ###        symbolic thread ID is THREADxx 
  8           ###      - Indent XML that is not indented in the trace
  9           ###      - Display only trace lines after a defined point where the
 10           ###      - defined point is the first trace line with a defined Component
 11           ###        The component is defined by the startElement variable on the
 12           ###        command line with default of XmlIO
 13           ###       - set Marker on selected lines.  Selection defined by input file
 14           ###        This requires match of each item in the input array of strings
 15           ###        against each line
 16           ###
 17           #### TODO
 18           #### Add filter on display by component type
 19           #### Add color for particular things we find (Color would be option vs some
 20           ####    other marker for particular things (ex. error found)
 21           #### Find way to trace through complete operation from thread to thread
 22 karl  1.1 #### Find way to match enter and exit calls. Could be indent or some marker.
 23           #### Would probably assume same thread
 24           
 25           #### NOTE: This code uses one regex expression specific to to posix awk
 26           ####       the expression interval (i.e. the {number}.  This may requie
 27           ####       that the calling function include either the --posix or
 28           ####       -re-interval option on the command line to include this
 29           ####       functionality. If the time and thread info do not translate
 30           ####       it is probably this incompatibility..
 31           #### 
 32           BEGIN {
 33                 false = 0          ## defintion of true and false values for readability
 34                 true = 1
 35           
 36                 startTime = 0       ## Time of first trace entry 
 37                 threadID = 1        ## variable to hold current thread number
 38                 processID = 1       ## variable to hold current process number
 39                 showOutput = 0      ## Boolean flag defines when output display is started
 40                 indent = 0          ## indent level for xml
 41                 isCompleteElement = false;
 42                 keywordsExist = false   ## Boolean enables or disables keyword search
 43 karl  1.1                           ## disabled unless keywords variable is non-zero
 44                 outputLineCount = 0 ## count of lines displayed
 45           
 46                 namesFile = "FileNames.txt"  ## default name for the nameFile, the file that
 47                                     ## is built at end and contains summary information
 48           
 49                 fileName = "None"   ## filename on input. May be supplied as variable or 
 50                                     ## or derived from first input record
 51                 outputFileSuffix = ".fmtd"
 52                 outputFileName = fileName outputFileSuffix
 53           
 54                 markThisLine = false   ## indicating that the current line will be marked
 55                                        ## with leading + per
 56           
 57                 markCount = 0       ## count of marks set.
 58           
 59                 ## If the name of the trace components is not defined on the cmd line
 60                 ## default it to XmlIO so that we do not show anything until the
 61                 ## first XmlIO record is encountered.
 62                 ## NOTE: This is too limited in that it does not allow having
 63                 ## no startElement, i.e. display starting at first line of trace.
 64 karl  1.1 
 65                 print "Cmd line input of startElement=" startElement "." "length= " length(startElement)
 66                 if (startElement == "")
 67                 {
 68                    startElement = "XmlIO"
 69                 }
 70           
 71                 if (length(startElement))
 72                 {
 73                    print "Display Starts at first Trace line component  = " startElement
 74                 }
 75                 else
 76                 {
 77                    print "Display starts at first line of original trace"
 78                 }
 79           
 80                 ## Get the pegasus variables
 81                 pegRoot = ENVIRON["PEGASUS_ROOT"]
 82           
 83                 ## Create the keywordRegex from list of possible strings
 84                 ## Comma separated list of keywords which will be marked on output
 85 karl  1.1       keywords = "error,IMETHODCALL,TestServerProfileProvider,Request: queue id:,Response has error,Enumeration Set Error msg,After putCache insert,TestServerProfileProvider:"
 86                 if (keywords == "")
 87                 {
 88                    ##keywords = "error,IMETHODCALL,TestServerProfileProvider"
 89                 }
 90           
 91                 ## Need to test to be sure no | in keywords
 92           
 93                 if (keywords != "")
 94                 {
 95                    keywordsExist = true
 96           
 97                    ## split the input keyword list and create a regex or statement
 98                    numKeywords = split(keywords, keyWordArray,",")
 99                    if (numKeywords > 1)
100                    {
101                       keywordRegex = keyWordArray[1]
102                       for ( i = 2; i <= numKeywords; i++)
103                       {
104                          keywordRegex = keywordRegex "|" keyWordArray[i]
105                       }
106 karl  1.1          }
107                    print "Mark lines with the following keywords", keywordRegex
108                 }
109           
110                 ## Exclude components filter. This is a command line input parameter
111                 ## TODO all these should be input in form a,b,c and we map to the
112                 ## regex.
113                 excludeComponentFilter = ""
114           
115                 if (excludeComponentFilter != "")
116                 {
117                    print "Excluding the following components from output", excludeComponentFilter
118                 }
119                 print "Begin Done"
120           }
121           
122           ############################################################################
123           ##
124           ##       Process the file
125           ##
126           ###########################################################################
127 karl  1.1 ##
128           ## Get the first input file name and create the output file name based on
129           ## this name. If filename was already set, do not change. This is done once
130           ## on the first line input because the name on the input line does not exist
131           ## in FILENAME until after the first read.
132           FNR == NR && NR == 1 {
133              fileName = FILENAME
134           
135              if (match(fileName,"\\.tmp$"))
136              {
137                 ## remove the tmp suffix since we want only the main file name
138                 ## for output
139                 sub("\\.tmp$", "", fileName)
140              }
141           
142              ## define the output file name and initialize it empty
143              outputFileName = fileName outputFileSuffix
144              print "" >outputFileName
145              print "output fileName= ", outputFileName
146           }
147           
148 karl  1.1 ## Trace element name that acts as flag to turn on trace. Output is enabled
149           ## first time the element name defined by startElement is encountered.
150           ## Once turned on, there is no way to turn off the output
151           !(showOutput) && ($2 == startElement) {
152              showOutput = 1
153              print "Start Display at Line " FNR " in file " FILENAME " TraceComponent " startElement
154           }
155           
156           ## Test to see if this line is to be marked by matching to the regexList
157           ## If it matches, set the markThisLine variable so it can be marked on output
158           (keywordsExist){
159              if (match($0,keywordRegex))
160              {
161                 markThisLine = true
162              }
163              else
164              {
165                 markThisLine = false
166              }
167           }
168           
169 karl  1.1 ## Scan line for XML Line that is not indented.  We will indent all
170           ## consecutive XML lines in accord with xml rules
171           ## TODO: Modify so we do not indent the first line.
172           { if($0 ~ /^</)
173              {
174                 #### print "FOUND XML START"
175                 inXMLState = true
176                 if ($1 == "<?xml")
177                 {
178                    indent = 0
179                 }
180                 if ($1 == "</CIM>")
181                 {
182                    indent = -1
183                 }
184              }
185              ## non-consecutive xml line found. turn off xml indent
186              else
187              {
188                 #### print "FOUND XML END"
189                 inXMLState = false
190 karl  1.1    }
191           }
192           
193           ## Indent any unindented XML
194           ## If inXml state indent the line by indent variable
195           ## this indents all consecutive lines that start with the XML left caret
196           ## character in line with their xml indent level.
197           ## NOTE: It does not indent any case of xml on a single line
198           (inXMLState) {
199              #### DEBUG for xml displayprint "FOUND NEXT XML Line", "indent=" indent, " isComplete " isCompleteElement, $0
200              ## if this is not complete element (i.e start and end on single line
201              ## include </elementName> as possible terminator on same line
202              ### isue with element that terminates on same line. needs to indent and
203              ### then outdent after print.
204           ##          <IMETHODCALL NAME="References">
205           ##            <LOCALNAMESPACEPATH>
206           ##            <NAMESPACE NAME="test"/>
207           ##            <NAMESPACE NAME="TestProvider"/>
208           ##          </LOCALNAMESPACEPATH>
209           
210              elementName = substr($1,2)
211 karl  1.1    terminator = "</" elementName ">$"
212              ## if this line is an entity close element
213              if ($0 ~ "^</[A-Z]/>$")
214              {
215                 type = 1
216                 indent--
217                 isCompleteElement = true
218              }
219              ## if this is a complete start and end pair
220              else if (($0 ~ "^<[A-Z]+>.+</[A-Z]+>$"))
221              {
222                 type = 6
223                 ## Really should match the element name begin and end
224                 ## complete open and close of an element
225                 if (!isCompleteElement)
226                 {
227                    indent++
228                 }
229                 isCompleteElement = true
230              }
231              ## else if this is a complete element
232 karl  1.1    ## <NAMESPACE NAME="PG_InterOp"/> This is NOT a 5. It is a 2
233              ## 11 2                      <NAMESPACE NAME="root"/>
234              ## 11 5                      <NAMESPACE NAME="PG_InterOp"/> This should be type 2
235              else if ($0 ~ "^<[A-Z]+a-z=\" ]*/>$")
236              {
237                 type = 2
238                 if (!isCompleteElement)
239                 {
240                    indent++
241                 }
242                 isCompleteElement = true
243              }
244           
245              ## if there is a complete start element and end element
246              ## <KEYVALUE VALUETYPE="string">PG:127-0-1-1</KEYVALUE>
247              else if($0 ~ terminator)
248              {
249                 type = 3
250                 if (!isCompleteElement)
251                 {
252                   indent++
253 karl  1.1        isCompleteElement = true
254                 }
255              }
256              else
257              {
258                 ## if terminating element
259                 if ($0 ~ "^</")
260                 {
261                    ##if ((indent != 0) && (type != 3))
262                    if ((indent != 0) && isCompleteElement)
263                    {
264                       indent--
265                    }
266                    type = 4
267                    isCompleteElement = true
268                 }
269                 else
270                 {
271                   type = 5
272                   if (!isCompleteElement)
273                   {
274 karl  1.1            indent++
275                   }
276                   isCompleteElement = false
277                 }
278              }
279           
280              ## create the indent string, (4 char indent per level)
281              indentStr = ""
282              for (x = 0; x < indent; x++)
283              {
284                 indentStr = indentStr "  "
285              }
286              #### print "indentStr" indentStr "EndIndentStr", "indent=" indent, $0
287           
288              ## prepend the indent string to $0
289              $0 = indentStr $0  
290           }
291           
292           ## input record times not corrected yet.  See consolidateTrace which
293           ## should do this if called.  This allows direct use of this formatter
294           ## with .trc files without the format conversion.
295 karl  1.1 ## substitute the time into the time field to move from absolute seconds
296           ## to relative seconds. This must be the first field in the input variable
297           ## substitute time of yyyy.zzzzzz for  relative sec and usec
298           #                     yyyyyyyyyys-zzzzzzs absolute sec and usec at BOL
299           ## NOTE: This function was from before we used the consolidate.awk function
300           ## to normalize the us time field.  If you use the formattrace script
301           ## this should never be called.
302           /^[0-9]{10}s-[0-9]{1,6}us:/ {
303              if (showOutput)
304              {
305                 ## Normalize the time field for a sort from  seconds-microseconds
306                 ## seconds.microseconds as a single decimal number of fixed length
307                 split($1, timeFields,"-")
308                 sub("s$","",timeFields[1])
309                 sub("us:$","",timeFields[2])
310                 while (length(timeFields[2]) < 6)
311                 {
312                   timeFields[2] = "0" timeFields[2]
313                 }
314           
315                 ## if this is the first entry, save the current time as start time.
316 karl  1.1       if (startTime == 0)
317                 {
318                    startTime = timeFields[1]
319                 }
320           
321                 time = timeFields[1] - startTime
322                 newTime = sprintf("%.4u", time)
323                 newTime = newTime "." timeFields[2] ":"
324           
325                 #### print "Time 1 " $1 " formatted to  " newTime " starttime " startTime
326                 $1 = newTime
327               }
328           }
329           
330           ## If the file was already mapped to the new time format then we
331           ## want to adjust to relative time which is a slightly different format
332           ## than the original trace format. If you use the formattrace script
333           ## this is the reformatter for the log record time field. It assumes that
334           ## the second part of the field (us) is fixed length and the oveall format
335           ## of the time field is
336           ##      ssssssssss.uuuuuu:
337 karl  1.1 
338           /^[0-9]{10}.[0-9]{6}: / {
339              if (showOutput)
340              {
341                 ## if this is the first entry, save the current time as start time.
342                 split($1, timeFields,".")
343                 if (startTime == 0)
344                 {
345                    startTime = timeFields[1]
346                 }
347                 time = timeFields[1] - startTime
348                 newTime = sprintf("%.4u", time)
349                 newTime = newTime "." timeFields[2]
350           
351                 #### print "Time 2 " $1 " formatted to  " newTime " starttime " startTime
352                 $1 = newTime
353           
354                 ## get the ProcessName, threadName, etc.  from trace entry 3
355                 ## Use the two expressions because the second one required for a very
356                 ## few trace entries (ex. header files include complete path) but makes
357                 ## the execution extremly long.
358 karl  1.1       if (($3 ~ /^\[[0-9]{2,5}:[0-9]{5,15}:[A-Za-z0-9_.]{1,40}:[0-9]{1,6}\]/ ) ||
359                      ($3 ~ /^\[[0-9]{2,5}:[0-9]{5,15}:[[:print:]]{1,120}:[0-9]{1,6}\]/ ))
360                 {
361                    split($3, procFields, ":")
362           
363                    processName = substr(procFields[1],2)  ## get and delete leading [
364                    threadName = procFields[2]
365                    srcFileName = procFields[3]
366                    lineNum = procFields[4]
367           
368                    ##print "Processinfo = " " |" $3 "| processName= ", processName, " ThreadName= ", threadName
369                    ## Display name for the thread. The unknown is for errors only
370                    localProcessName = "Unknown"
371                    localThreadName = "Unknown"
372           
373                    ## if this is new process, add to the array of process names
374                    if (!(processName in processNameArray))
375                    {
376                        ## create the new process name
377                        x = sprintf("%.2u", processID++)
378                        processNameArray[processName] = ("P" x)
379 karl  1.1          }
380                    localProcessName = processNameArray[processName]
381           
382                    ## if this is new thread, add to the array of thread names.
383                    if (!(threadName in threadNameArray))
384                    {
385                        ## create the new thread name
386                        x = sprintf("%.2u", threadID++)
387                        threadNameArray[threadName] = ("TH" x)
388                    }
389                    localThreadName = threadNameArray[threadName]
390           
391                    ##print "ThreadElementFound ", $2, " thread info=" $3, " threadName=" threadName, "newThreadName=" newThreadName
392                    ## substitute the new name into $3
393                    $3 = "[" localProcessName ":" localThreadName ":" srcFileName ":" lineNum
394                 }
395                 else
396                 {
397                    print "No Match for Process:Thread:Source:LineNum field " $3
398                 }
399              }
400 karl  1.1 
401              ## Accumulate info an functions entered and exited.
402              ## We could actually accumulte time in function here. But more complex
403              ## because of multitask so we would have to match input to output 
404              ## threads and process.
405              Action = $4 $5
406              if (Action ~ /^Enteringmethod$/)
407              {
408                 functionArray[$6]++
409              }
410              else if (Action ~ /^Exitingmedhod$/ )
411              {
412                 functionArray[$6]--
413              }
414           
415           }
416           
417           ## exclude from display any lines where the component name matches the
418           ## excludeFilter regex.  Note that this is a match against arguement 2
419           ## directly.  This is the last item so that all other processing is complete
420           ## before we decide to terminate.  Also, this could be directly done
421 karl  1.1 ## in the next pattern action just as well.
422           { if ((excludeComponentFilter != "") && !inXmlState)
423              {
424                 if (match($2,excludeComponentFilter))
425                 {
426                    next
427                 }
428              }
429           }
430           
431           ## print any resulting line so all lines get output.\ once showOutput has
432           ## been enabled.  This function
433           ## inserts the marker in the line either a single + or minus
434           showOutput {
435               marker = ""
436               if (markThisLine != 0)
437               {
438                   marker = "+"
439                   markCount++
440               }
441               outputLineCount++
442 karl  1.1     ## debugging output
443               ##printf("%u %u%s%s\n", indent, type, marker, $0) >> outputFileName
444           
445               printf("%s%s\n", marker, $0) >> outputFileName
446           }
447           
448           function USAGE()
449           {
450               print "Usage: awk --posix -f formattrace.awk [variables] [inputTraceName]"
451           
452               print "Formats and organizes the OpenPegasus trace files "
453               print "and produces an output file named CIM_Tr_*.fmtd"
454               print "That:"
455               print "    - consolidates the multiple trace files into a single file, "
456               print "    - modifies the time relative to the first displayed entry"
457               print "    - renames the thread and process ids to something more readable"
458               print "    - Marks selected log entries so they can be found easily"
459               print "    - Excludes selected comonents"
460               print "    - Cuts off log entries before the first instance of a defined"
461               print "      component (default XmlIO) which also serves as the relative"
462               print "    - start time."
463 karl  1.1     print "where --posix is required for awk compatibility"
464               print "  -f defines this awk file name"
465               print "  variables are any variables that may be used by the program"
466               print "     in the form -v name=value"
467               print "  startElement=ComponentName that will start display output"
468               print "    where default is XmlIO"
469           }
470           
471           ############################################################################
472           ## Close by outputting the summary information to the namesFile
473           ############################################################################
474           END{
475               if (!(showOutput))
476               {
477                 print "ERROR: Never found trace component to start display"
478           
479                 print "ERROR: Never found trace component to start display" >> namesFile
480               }
481               print "Input " NR " lines from file " fileName "." >> namesFile
482               print "Original time for first record was " startTime
483               print markCount " lines marked" >> namesFile
484 karl  1.1     print "marks are: " keywordRegex >>namesFile
485               print "Output " outputLineCount " lines to " outputFileName >> namesFile
486               print "Thread IDs for ", arrayLength(threadNameArray) " threads" >> namesFile
487               for (item in threadNameArray)
488               {
489                   print item, threadNameArray[item] >> namesFile
490               }
491               print "" >> namesFile
492               print "Process IDs for " arrayLength(processNameArray) " processes" >> namesFile
493               for (item in processNameArray)
494               {
495                   print item, processNameArray[item] >> namesFile
496               }
497           
498               print "Count of functions called = " arrayLength(functionArray) >> namesFile
499               for (item in funtionArray)
500               {
501                  if ((functionArray[item] % 2) == 1)
502                  {
503                     print "No exit match for function" item >> namesFile
504                  }
505 karl  1.1     }
506           
507               for (item in funtionArray)
508               {
509                     print item " "(functionArray[item] / 2) " " >> namesFile
510               }
511           }
512           
513           
514           ###########################################################################
515           ## Miscellaneous functions
516           
517           
518           ## function to get length of an array.  This exists because awk inconsistent
519           ## between implementation about the existence of a size or length function
520           ## for arrays.  One input, the array. Returns the count if items in the
521           ## array. The variable i and k are local
522           function arrayLength( a, i, k) {
523               for( i in a) k++
524               return k
525           }
526 karl  1.1
No CVS admin address has been configured