(file) Return to formatTrace.awk CVS log (file) (dir) Up to [Pegasus] / pegasus_unsupported / scripts / formattrace

  1 karl  1.1 ###
  2           ###  AWK Script to clean up an OpenPegasus trace by modifiying the info
  3           ###  for a number of fields including
  4           ###      - Change from absolute time to relative time to make seeing
  5           ###        time between events easier
  6           ###      - Change from numberic thread ID to symbolic threadID where the
  7           ###        symbolic thread ID is THREADxx 
  8           ###      - Indent XML that is not indented in the trace
  9           ###      - Display only trace lines after a defined point where the
 10           ###      - defined point is the first trace line with a defined Component
 11           ###        The component is defined by the startElement variable on the
 12           ###        command line with default of XmlIO
 13           ###       - set Marker on selected lines.  Selection defined by input file
 14           ###        This requires match of each item in the input array of strings
 15           ###        against each line
 16           ###
 17           #### TODO
 18           #### Add filter on display by component type
 19           #### Add color for particular things we find (Color would be option vs some
 20           ####    other marker for particular things (ex. error found)
 21           #### Find way to trace through complete operation from thread to thread
 22 karl  1.1 #### Find way to match enter and exit calls. Could be indent or some marker.
 23           #### Would probably assume same thread
 24           
 25           #### NOTE: This code uses one regex expression specific to to posix awk
 26           ####       the expression interval (i.e. the {number}.  This may requie
 27           ####       that the calling function include either the --posix or
 28           ####       -re-interval option on the command line to include this
 29           ####       functionality. If the time and thread info do not translate
 30           ####       it is probably this incompatibility..
 31           #### 
 32           BEGIN {
 33                 false = 0          ## defintion of true and false values for readability
 34                 true = 1
 35           
 36                 startTime = 0       ## Time of first trace entry 
 37                 threadID = 1        ## variable to hold current thread number
 38                 processID = 1       ## variable to hold current process number
 39                 showOutput = 0      ## Boolean flag defines when output display is started
 40                 indent = 0          ## indent level for xml
 41                 isCompleteElement = false;
 42                 keywordsExist = false   ## Boolean enables or disables keyword search
 43 karl  1.1                           ## disabled unless keywords variable is non-zero
 44                 outputLineCount = 0 ## count of lines displayed
 45           
 46                 namesFile = "FileNames.txt"  ## default name for the nameFile, the file that
 47                                     ## is built at end and contains summary information
 48           
 49                 fileName = "None"   ## filename on input. May be supplied as variable or 
 50                                     ## or derived from first input record
 51                 outputFileSuffix = ".fmtd"
 52                 outputFileName = fileName outputFileSuffix
 53           
 54                 markThisLine = false   ## indicating that the current line will be marked
 55                                        ## with leading + per
 56           
 57                 markCount = 0       ## count of marks set.
 58           
 59                 ## If the name of the trace components is not defined on the cmd line
 60                 ## default it to XmlIO so that we do not show anything until the
 61                 ## first XmlIO record is encountered.
 62                 ## NOTE: This is too limited in that it does not allow having
 63                 ## no startElement, i.e. display starting at first line of trace.
 64 karl  1.1 
 65                 print "Cmd line input of startElement=" startElement "." "length= " length(startElement)
 66                 if (startElement == "")
 67                 {
 68                    startElement = "XmlIO"
 69                 }
 70           
 71                 if (length(startElement))
 72                 {
 73                    print "Display Starts at first Trace line component  = " startElement
 74                 }
 75                 else
 76                 {
 77                    print "Display starts at first line of original trace"
 78                 }
 79           
 80                 ## Get the pegasus variables
 81                 pegRoot = ENVIRON["PEGASUS_ROOT"]
 82           
 83                 ## Create the keywordRegex from list of possible strings
 84                 ## Comma separated list of keywords which will be marked on output
 85 karl  1.2       keywords = "XmlIO,error,IMETHODCALL,TestServerProfileProvider,Request: queue id:,"
 86                 keywords = keywords "Response has error,Enumeration Set Error msg,After putCache insert,"
 87                 keywords = keywords "TestServerProfileProvider:,putcache,getcache,"
 88                 keywords = keywords "setupFuture,_issueImmediateOpenOr,_issueOpenOr"
 89                 keywords = keywords "IMETHODRESPONSE,SetupFutureResponse,"
 90                 keywords = keywords "ContextId=,"
 91                 keywords = keywords "setProcessingState,"
 92                 keywords = keywords "setProcessingState,"
 93                 keywords = keywords "StopTimer,"
 94                 keywords = keywords "StartTimer"
 95 karl  1.1       if (keywords == "")
 96                 {
 97                    ##keywords = "error,IMETHODCALL,TestServerProfileProvider"
 98                 }
 99           
100                 ## Need to test to be sure no | in keywords
101           
102                 if (keywords != "")
103                 {
104                    keywordsExist = true
105           
106                    ## split the input keyword list and create a regex or statement
107                    numKeywords = split(keywords, keyWordArray,",")
108                    if (numKeywords > 1)
109                    {
110                       keywordRegex = keyWordArray[1]
111                       for ( i = 2; i <= numKeywords; i++)
112                       {
113                          keywordRegex = keywordRegex "|" keyWordArray[i]
114                       }
115                    }
116 karl  1.1          print "Mark lines with the following keywords", keywordRegex
117                 }
118           
119                 ## Exclude components filter. This is a command line input parameter
120                 ## TODO all these should be input in form a,b,c and we map to the
121                 ## regex.
122                 excludeComponentFilter = ""
123           
124                 if (excludeComponentFilter != "")
125                 {
126                    print "Excluding the following components from output", excludeComponentFilter
127                 }
128           }
129           
130           ############################################################################
131           ##
132           ##       Process the file
133           ##
134           ###########################################################################
135           ##
136           ## Get the first input file name and create the output file name based on
137 karl  1.1 ## this name. If filename was already set, do not change. This is done once
138           ## on the first line input because the name on the input line does not exist
139           ## in FILENAME until after the first read.
140           FNR == NR && NR == 1 {
141              fileName = FILENAME
142           
143              if (match(fileName,"\\.tmp$"))
144              {
145                 ## remove the tmp suffix since we want only the main file name
146                 ## for output
147                 sub("\\.tmp$", "", fileName)
148              }
149           
150              ## define the output file name and initialize it empty
151              outputFileName = fileName outputFileSuffix
152 karl  1.2 
153 karl  1.1    print "output fileName= ", outputFileName
154           }
155           
156           ## Trace element name that acts as flag to turn on trace. Output is enabled
157           ## first time the element name defined by startElement is encountered.
158           ## Once turned on, there is no way to turn off the output
159           !(showOutput) && ($2 == startElement) {
160              showOutput = 1
161              print "Start Display at Line " FNR " in file " FILENAME " TraceComponent " startElement
162           }
163           
164           ## Test to see if this line is to be marked by matching to the regexList
165           ## If it matches, set the markThisLine variable so it can be marked on output
166           (keywordsExist){
167              if (match($0,keywordRegex))
168              {
169                 markThisLine = true
170              }
171              else
172              {
173                 markThisLine = false
174 karl  1.1    }
175           }
176           
177           ## Scan line for XML Line that is not indented.  We will indent all
178           ## consecutive XML lines in accord with xml rules
179           ## TODO: Modify so we do not indent the first line.
180           { if($0 ~ /^</)
181              {
182                 #### print "FOUND XML START"
183                 inXMLState = true
184                 if ($1 == "<?xml")
185                 {
186                    indent = 0
187                 }
188                 if ($1 == "</CIM>")
189                 {
190                    indent = -1
191                 }
192              }
193              ## non-consecutive xml line found. turn off xml indent
194              else
195 karl  1.1    {
196                 #### print "FOUND XML END"
197                 inXMLState = false
198              }
199           }
200           
201           ## Indent any unindented XML
202           ## If inXml state indent the line by indent variable
203           ## this indents all consecutive lines that start with the XML left caret
204           ## character in line with their xml indent level.
205           ## NOTE: It does not indent any case of xml on a single line
206           (inXMLState) {
207              #### DEBUG for xml displayprint "FOUND NEXT XML Line", "indent=" indent, " isComplete " isCompleteElement, $0
208              ## if this is not complete element (i.e start and end on single line
209              ## include </elementName> as possible terminator on same line
210 karl  1.2    ### issue with element that terminates on same line. needs to indent and
211 karl  1.1    ### then outdent after print.
212           ##          <IMETHODCALL NAME="References">
213           ##            <LOCALNAMESPACEPATH>
214           ##            <NAMESPACE NAME="test"/>
215           ##            <NAMESPACE NAME="TestProvider"/>
216           ##          </LOCALNAMESPACEPATH>
217           
218              elementName = substr($1,2)
219              terminator = "</" elementName ">$"
220              ## if this line is an entity close element
221 karl  1.2    ## TODO why the second char is /. seems to match </A/>
222 karl  1.1    if ($0 ~ "^</[A-Z]/>$")
223              {
224                 type = 1
225                 indent--
226                 isCompleteElement = true
227              }
228              ## if this is a complete start and end pair
229              else if (($0 ~ "^<[A-Z]+>.+</[A-Z]+>$"))
230              {
231                 type = 6
232                 ## Really should match the element name begin and end
233                 ## complete open and close of an element
234                 if (!isCompleteElement)
235                 {
236                    indent++
237                 }
238                 isCompleteElement = true
239              }
240              ## else if this is a complete element
241              ## <NAMESPACE NAME="PG_InterOp"/> This is NOT a 5. It is a 2
242              ## 11 2                      <NAMESPACE NAME="root"/>
243 karl  1.1    ## 11 5                      <NAMESPACE NAME="PG_InterOp"/> This should be type 2
244 karl  1.2    else if ($0 ~ "^<[A-Z]+[a-z=\" ]*/>$")
245 karl  1.1    {
246                 type = 2
247                 if (!isCompleteElement)
248                 {
249                    indent++
250                 }
251                 isCompleteElement = true
252              }
253           
254              ## if there is a complete start element and end element
255              ## <KEYVALUE VALUETYPE="string">PG:127-0-1-1</KEYVALUE>
256              else if($0 ~ terminator)
257              {
258                 type = 3
259                 if (!isCompleteElement)
260                 {
261                   indent++
262 karl  1.2         isCompleteElement = true
263 karl  1.1       }
264              }
265              else
266              {
267                 ## if terminating element
268                 if ($0 ~ "^</")
269                 {
270                    ##if ((indent != 0) && (type != 3))
271                    if ((indent != 0) && isCompleteElement)
272                    {
273                       indent--
274                    }
275                    type = 4
276                    isCompleteElement = true
277                 }
278                 else
279                 {
280                   type = 5
281                   if (!isCompleteElement)
282                   {
283                      indent++
284 karl  1.1         }
285                   isCompleteElement = false
286                 }
287              }
288           
289 karl  1.2    ## create the indent string, (2 char indent per level)
290 karl  1.1    indentStr = ""
291              for (x = 0; x < indent; x++)
292              {
293                 indentStr = indentStr "  "
294              }
295              #### print "indentStr" indentStr "EndIndentStr", "indent=" indent, $0
296           
297              ## prepend the indent string to $0
298              $0 = indentStr $0  
299           }
300           
301           ## input record times not corrected yet.  See consolidateTrace which
302           ## should do this if called.  This allows direct use of this formatter
303           ## with .trc files without the format conversion.
304           ## substitute the time into the time field to move from absolute seconds
305           ## to relative seconds. This must be the first field in the input variable
306           ## substitute time of yyyy.zzzzzz for  relative sec and usec
307           #                     yyyyyyyyyys-zzzzzzs absolute sec and usec at BOL
308           ## NOTE: This function was from before we used the consolidate.awk function
309           ## to normalize the us time field.  If you use the formattrace script
310           ## this should never be called.
311 karl  1.1 /^[0-9]{10}s-[0-9]{1,6}us:/ {
312              if (showOutput)
313              {
314                 ## Normalize the time field for a sort from  seconds-microseconds
315                 ## seconds.microseconds as a single decimal number of fixed length
316                 split($1, timeFields,"-")
317                 sub("s$","",timeFields[1])
318                 sub("us:$","",timeFields[2])
319                 while (length(timeFields[2]) < 6)
320                 {
321                   timeFields[2] = "0" timeFields[2]
322                 }
323           
324                 ## if this is the first entry, save the current time as start time.
325                 if (startTime == 0)
326                 {
327                    startTime = timeFields[1]
328                 }
329           
330                 time = timeFields[1] - startTime
331                 newTime = sprintf("%.4u", time)
332 karl  1.1       newTime = newTime "." timeFields[2] ":"
333           
334                 #### print "Time 1 " $1 " formatted to  " newTime " starttime " startTime
335                 $1 = newTime
336               }
337           }
338           
339           ## If the file was already mapped to the new time format then we
340           ## want to adjust to relative time which is a slightly different format
341           ## than the original trace format. If you use the formattrace script
342           ## this is the reformatter for the log record time field. It assumes that
343           ## the second part of the field (us) is fixed length and the oveall format
344           ## of the time field is
345           ##      ssssssssss.uuuuuu:
346           
347           /^[0-9]{10}.[0-9]{6}: / {
348              if (showOutput)
349              {
350                 ## if this is the first entry, save the current time as start time.
351                 split($1, timeFields,".")
352                 if (startTime == 0)
353 karl  1.1       {
354                    startTime = timeFields[1]
355                 }
356                 time = timeFields[1] - startTime
357                 newTime = sprintf("%.4u", time)
358                 newTime = newTime "." timeFields[2]
359           
360                 #### print "Time 2 " $1 " formatted to  " newTime " starttime " startTime
361                 $1 = newTime
362           
363                 ## get the ProcessName, threadName, etc.  from trace entry 3
364                 ## Use the two expressions because the second one required for a very
365                 ## few trace entries (ex. header files include complete path) but makes
366 karl  1.2       ## the execution extremly long so we try to match on first.
367 karl  1.1       if (($3 ~ /^\[[0-9]{2,5}:[0-9]{5,15}:[A-Za-z0-9_.]{1,40}:[0-9]{1,6}\]/ ) ||
368 karl  1.2           ($3 ~ /^\[[0-9]{2,5}:[0-9]{5,15}:[[:print:]]{1,120}:[0-9]{1,6}\]/ ))
369 karl  1.1       {
370                    split($3, procFields, ":")
371           
372                    processName = substr(procFields[1],2)  ## get and delete leading [
373                    threadName = procFields[2]
374                    srcFileName = procFields[3]
375                    lineNum = procFields[4]
376           
377                    ##print "Processinfo = " " |" $3 "| processName= ", processName, " ThreadName= ", threadName
378                    ## Display name for the thread. The unknown is for errors only
379                    localProcessName = "Unknown"
380                    localThreadName = "Unknown"
381           
382                    ## if this is new process, add to the array of process names
383                    if (!(processName in processNameArray))
384                    {
385                        ## create the new process name
386                        x = sprintf("%.2u", processID++)
387                        processNameArray[processName] = ("P" x)
388                    }
389                    localProcessName = processNameArray[processName]
390 karl  1.1 
391                    ## if this is new thread, add to the array of thread names.
392                    if (!(threadName in threadNameArray))
393                    {
394                        ## create the new thread name
395                        x = sprintf("%.2u", threadID++)
396                        threadNameArray[threadName] = ("TH" x)
397                    }
398                    localThreadName = threadNameArray[threadName]
399           
400                    ##print "ThreadElementFound ", $2, " thread info=" $3, " threadName=" threadName, "newThreadName=" newThreadName
401                    ## substitute the new name into $3
402                    $3 = "[" localProcessName ":" localThreadName ":" srcFileName ":" lineNum
403                 }
404                 else
405                 {
406 karl  1.2          print "No Match for field Process:Thread:Source:LineNum field " $3
407                    print "line " $0
408 karl  1.1       }
409              }
410           
411              ## Accumulate info an functions entered and exited.
412              ## We could actually accumulte time in function here. But more complex
413              ## because of multitask so we would have to match input to output 
414              ## threads and process.
415              Action = $4 $5
416              if (Action ~ /^Enteringmethod$/)
417              {
418                 functionArray[$6]++
419              }
420              else if (Action ~ /^Exitingmedhod$/ )
421              {
422                 functionArray[$6]--
423              }
424           
425           }
426           
427           ## exclude from display any lines where the component name matches the
428           ## excludeFilter regex.  Note that this is a match against arguement 2
429 karl  1.1 ## directly.  This is the last item so that all other processing is complete
430           ## before we decide to terminate.  Also, this could be directly done
431           ## in the next pattern action just as well.
432           { if ((excludeComponentFilter != "") && !inXmlState)
433              {
434                 if (match($2,excludeComponentFilter))
435                 {
436                    next
437                 }
438              }
439           }
440           
441           ## print any resulting line so all lines get output.\ once showOutput has
442           ## been enabled.  This function
443           ## inserts the marker in the line either a single + or minus
444           showOutput {
445               marker = ""
446               if (markThisLine != 0)
447               {
448                   marker = "+"
449                   markCount++
450 karl  1.1     }
451               outputLineCount++
452               ## debugging output
453               ##printf("%u %u%s%s\n", indent, type, marker, $0) >> outputFileName
454           
455               printf("%s%s\n", marker, $0) >> outputFileName
456           }
457           
458           function USAGE()
459           {
460               print "Usage: awk --posix -f formattrace.awk [variables] [inputTraceName]"
461           
462               print "Formats and organizes the OpenPegasus trace files "
463               print "and produces an output file named CIM_Tr_*.fmtd"
464               print "That:"
465               print "    - consolidates the multiple trace files into a single file, "
466               print "    - modifies the time relative to the first displayed entry"
467               print "    - renames the thread and process ids to something more readable"
468               print "    - Marks selected log entries so they can be found easily"
469               print "    - Excludes selected comonents"
470               print "    - Cuts off log entries before the first instance of a defined"
471 karl  1.1     print "      component (default XmlIO) which also serves as the relative"
472               print "    - start time."
473               print "where --posix is required for awk compatibility"
474               print "  -f defines this awk file name"
475               print "  variables are any variables that may be used by the program"
476               print "     in the form -v name=value"
477               print "  startElement=ComponentName that will start display output"
478               print "    where default is XmlIO"
479           }
480           
481           ############################################################################
482           ## Close by outputting the summary information to the namesFile
483           ############################################################################
484           END{
485               if (!(showOutput))
486               {
487                 print "ERROR: Never found trace component to start display"
488           
489                 print "ERROR: Never found trace component to start display" >> namesFile
490               }
491 karl  1.2 
492 karl  1.1     print "Input " NR " lines from file " fileName "." >> namesFile
493 karl  1.2     ## The function not defined getStart = strftime("%b %d %Y-%T", startTime)
494               ## apparentlynot defined because of posix compatibility flag.
495               print "Original time for first record was " startTime " (" getStartTime ")"
496 karl  1.1     print markCount " lines marked" >> namesFile
497               print "marks are: " keywordRegex >>namesFile
498               print "Output " outputLineCount " lines to " outputFileName >> namesFile
499 karl  1.2 
500 karl  1.1     print "Thread IDs for ", arrayLength(threadNameArray) " threads" >> namesFile
501               for (item in threadNameArray)
502               {
503                   print item, threadNameArray[item] >> namesFile
504               }
505               print "" >> namesFile
506 karl  1.2 
507 karl  1.1     print "Process IDs for " arrayLength(processNameArray) " processes" >> namesFile
508               for (item in processNameArray)
509               {
510                   print item, processNameArray[item] >> namesFile
511               }
512           
513               print "Count of functions called = " arrayLength(functionArray) >> namesFile
514               for (item in funtionArray)
515               {
516                  if ((functionArray[item] % 2) == 1)
517                  {
518                     print "No exit match for function" item >> namesFile
519                  }
520               }
521           
522               for (item in funtionArray)
523               {
524                     print item " "(functionArray[item] / 2) " " >> namesFile
525               }
526 karl  1.2    ## show this on console as last thing because it is handy for next
527              ## step which is usually edit of the file.
528               print "output fileName= ", outputFileName
529 karl  1.1 }
530           
531           
532           ###########################################################################
533           ## Miscellaneous functions
534           
535           
536           ## function to get length of an array.  This exists because awk inconsistent
537           ## between implementation about the existence of a size or length function
538           ## for arrays.  One input, the array. Returns the count if items in the
539           ## array. The variable i and k are local
540           function arrayLength( a, i, k) {
541               for( i in a) k++
542               return k
543           }
544           

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2