1 karl 1.1 ###
2 ### AWK Script to clean up an OpenPegasus trace by modifiying the info
3 ### for a number of fields including
4 ### - Change from absolute time to relative time to make seeing
5 ### time between events easier
6 ### - Change from numberic thread ID to symbolic threadID where the
7 ### symbolic thread ID is THREADxx
8 ### - Indent XML that is not indented in the trace
9 ### - Display only trace lines after a defined point where the
10 ### - defined point is the first trace line with a defined Component
11 ### The component is defined by the startElement variable on the
12 ### command line with default of XmlIO
13 ### - set Marker on selected lines. Selection defined by input file
14 ### This requires match of each item in the input array of strings
15 ### against each line
16 ###
17 #### TODO
18 #### Add filter on display by component type
19 #### Add color for particular things we find (Color would be option vs some
20 #### other marker for particular things (ex. error found)
21 #### Find way to trace through complete operation from thread to thread
22 karl 1.1 #### Find way to match enter and exit calls. Could be indent or some marker.
23 #### Would probably assume same thread
24
25 #### NOTE: This code uses one regex expression specific to to posix awk
26 #### the expression interval (i.e. the {number}. This may requie
27 #### that the calling function include either the --posix or
28 #### -re-interval option on the command line to include this
29 #### functionality. If the time and thread info do not translate
30 #### it is probably this incompatibility..
31 ####
32 BEGIN {
33 false = 0 ## defintion of true and false values for readability
34 true = 1
35
36 startTime = 0 ## Time of first trace entry
37 threadID = 1 ## variable to hold current thread number
38 processID = 1 ## variable to hold current process number
39 showOutput = 0 ## Boolean flag defines when output display is started
40 indent = 0 ## indent level for xml
41 isCompleteElement = false;
42 keywordsExist = false ## Boolean enables or disables keyword search
43 karl 1.1 ## disabled unless keywords variable is non-zero
44 outputLineCount = 0 ## count of lines displayed
45
46 namesFile = "FileNames.txt" ## default name for the nameFile, the file that
47 ## is built at end and contains summary information
48
49 fileName = "None" ## filename on input. May be supplied as variable or
50 ## or derived from first input record
51 outputFileSuffix = ".fmtd"
52 outputFileName = fileName outputFileSuffix
53
54 markThisLine = false ## indicating that the current line will be marked
55 ## with leading + per
56
57 markCount = 0 ## count of marks set.
58
59 ## If the name of the trace components is not defined on the cmd line
60 ## default it to XmlIO so that we do not show anything until the
61 ## first XmlIO record is encountered.
62 ## NOTE: This is too limited in that it does not allow having
63 ## no startElement, i.e. display starting at first line of trace.
64 karl 1.1
65 print "Cmd line input of startElement=" startElement "." "length= " length(startElement)
66 if (startElement == "")
67 {
68 startElement = "XmlIO"
69 }
70
71 if (length(startElement))
72 {
73 print "Display Starts at first Trace line component = " startElement
74 }
75 else
76 {
77 print "Display starts at first line of original trace"
78 }
79
80 ## Get the pegasus variables
81 pegRoot = ENVIRON["PEGASUS_ROOT"]
82
83 ## Create the keywordRegex from list of possible strings
84 ## Comma separated list of keywords which will be marked on output
|
85 karl 1.2 keywords = "XmlIO,error,IMETHODCALL,TestServerProfileProvider,Request: queue id:,"
86 keywords = keywords "Response has error,Enumeration Set Error msg,After putCache insert,"
87 keywords = keywords "TestServerProfileProvider:,putcache,getcache,"
88 keywords = keywords "setupFuture,_issueImmediateOpenOr,_issueOpenOr"
89 keywords = keywords "IMETHODRESPONSE,SetupFutureResponse,"
90 keywords = keywords "ContextId=,"
91 keywords = keywords "setProcessingState,"
92 keywords = keywords "setProcessingState,"
93 keywords = keywords "StopTimer,"
94 keywords = keywords "StartTimer"
|
95 karl 1.1 if (keywords == "")
96 {
97 ##keywords = "error,IMETHODCALL,TestServerProfileProvider"
98 }
99
100 ## Need to test to be sure no | in keywords
101
102 if (keywords != "")
103 {
104 keywordsExist = true
105
106 ## split the input keyword list and create a regex or statement
107 numKeywords = split(keywords, keyWordArray,",")
108 if (numKeywords > 1)
109 {
110 keywordRegex = keyWordArray[1]
111 for ( i = 2; i <= numKeywords; i++)
112 {
113 keywordRegex = keywordRegex "|" keyWordArray[i]
114 }
115 }
116 karl 1.1 print "Mark lines with the following keywords", keywordRegex
117 }
118
119 ## Exclude components filter. This is a command line input parameter
120 ## TODO all these should be input in form a,b,c and we map to the
121 ## regex.
122 excludeComponentFilter = ""
123
124 if (excludeComponentFilter != "")
125 {
126 print "Excluding the following components from output", excludeComponentFilter
127 }
128 }
129
130 ############################################################################
131 ##
132 ## Process the file
133 ##
134 ###########################################################################
135 ##
136 ## Get the first input file name and create the output file name based on
137 karl 1.1 ## this name. If filename was already set, do not change. This is done once
138 ## on the first line input because the name on the input line does not exist
139 ## in FILENAME until after the first read.
140 FNR == NR && NR == 1 {
141 fileName = FILENAME
142
143 if (match(fileName,"\\.tmp$"))
144 {
145 ## remove the tmp suffix since we want only the main file name
146 ## for output
147 sub("\\.tmp$", "", fileName)
148 }
149
150 ## define the output file name and initialize it empty
151 outputFileName = fileName outputFileSuffix
|
152 karl 1.2
|
153 karl 1.1 print "output fileName= ", outputFileName
154 }
155
156 ## Trace element name that acts as flag to turn on trace. Output is enabled
157 ## first time the element name defined by startElement is encountered.
158 ## Once turned on, there is no way to turn off the output
159 !(showOutput) && ($2 == startElement) {
160 showOutput = 1
161 print "Start Display at Line " FNR " in file " FILENAME " TraceComponent " startElement
162 }
163
164 ## Test to see if this line is to be marked by matching to the regexList
165 ## If it matches, set the markThisLine variable so it can be marked on output
166 (keywordsExist){
167 if (match($0,keywordRegex))
168 {
169 markThisLine = true
170 }
171 else
172 {
173 markThisLine = false
174 karl 1.1 }
175 }
176
177 ## Scan line for XML Line that is not indented. We will indent all
178 ## consecutive XML lines in accord with xml rules
179 ## TODO: Modify so we do not indent the first line.
180 { if($0 ~ /^</)
181 {
182 #### print "FOUND XML START"
183 inXMLState = true
184 if ($1 == "<?xml")
185 {
186 indent = 0
187 }
188 if ($1 == "</CIM>")
189 {
190 indent = -1
191 }
192 }
193 ## non-consecutive xml line found. turn off xml indent
194 else
195 karl 1.1 {
196 #### print "FOUND XML END"
197 inXMLState = false
198 }
199 }
200
201 ## Indent any unindented XML
202 ## If inXml state indent the line by indent variable
203 ## this indents all consecutive lines that start with the XML left caret
204 ## character in line with their xml indent level.
205 ## NOTE: It does not indent any case of xml on a single line
206 (inXMLState) {
207 #### DEBUG for xml displayprint "FOUND NEXT XML Line", "indent=" indent, " isComplete " isCompleteElement, $0
208 ## if this is not complete element (i.e start and end on single line
209 ## include </elementName> as possible terminator on same line
|
210 karl 1.2 ### issue with element that terminates on same line. needs to indent and
|
211 karl 1.1 ### then outdent after print.
212 ## <IMETHODCALL NAME="References">
213 ## <LOCALNAMESPACEPATH>
214 ## <NAMESPACE NAME="test"/>
215 ## <NAMESPACE NAME="TestProvider"/>
216 ## </LOCALNAMESPACEPATH>
217
218 elementName = substr($1,2)
219 terminator = "</" elementName ">$"
220 ## if this line is an entity close element
|
221 karl 1.2 ## TODO why the second char is /. seems to match </A/>
|
222 karl 1.1 if ($0 ~ "^</[A-Z]/>$")
223 {
224 type = 1
225 indent--
226 isCompleteElement = true
227 }
228 ## if this is a complete start and end pair
229 else if (($0 ~ "^<[A-Z]+>.+</[A-Z]+>$"))
230 {
231 type = 6
232 ## Really should match the element name begin and end
233 ## complete open and close of an element
234 if (!isCompleteElement)
235 {
236 indent++
237 }
238 isCompleteElement = true
239 }
240 ## else if this is a complete element
241 ## <NAMESPACE NAME="PG_InterOp"/> This is NOT a 5. It is a 2
242 ## 11 2 <NAMESPACE NAME="root"/>
243 karl 1.1 ## 11 5 <NAMESPACE NAME="PG_InterOp"/> This should be type 2
|
244 karl 1.2 else if ($0 ~ "^<[A-Z]+[a-z=\" ]*/>$")
|
245 karl 1.1 {
246 type = 2
247 if (!isCompleteElement)
248 {
249 indent++
250 }
251 isCompleteElement = true
252 }
253
254 ## if there is a complete start element and end element
255 ## <KEYVALUE VALUETYPE="string">PG:127-0-1-1</KEYVALUE>
256 else if($0 ~ terminator)
257 {
258 type = 3
259 if (!isCompleteElement)
260 {
261 indent++
|
262 karl 1.2 isCompleteElement = true
|
263 karl 1.1 }
264 }
265 else
266 {
267 ## if terminating element
268 if ($0 ~ "^</")
269 {
270 ##if ((indent != 0) && (type != 3))
271 if ((indent != 0) && isCompleteElement)
272 {
273 indent--
274 }
275 type = 4
276 isCompleteElement = true
277 }
278 else
279 {
280 type = 5
281 if (!isCompleteElement)
282 {
283 indent++
284 karl 1.1 }
285 isCompleteElement = false
286 }
287 }
288
|
289 karl 1.2 ## create the indent string, (2 char indent per level)
|
290 karl 1.1 indentStr = ""
291 for (x = 0; x < indent; x++)
292 {
293 indentStr = indentStr " "
294 }
295 #### print "indentStr" indentStr "EndIndentStr", "indent=" indent, $0
296
297 ## prepend the indent string to $0
298 $0 = indentStr $0
299 }
300
301 ## input record times not corrected yet. See consolidateTrace which
302 ## should do this if called. This allows direct use of this formatter
303 ## with .trc files without the format conversion.
304 ## substitute the time into the time field to move from absolute seconds
305 ## to relative seconds. This must be the first field in the input variable
306 ## substitute time of yyyy.zzzzzz for relative sec and usec
307 # yyyyyyyyyys-zzzzzzs absolute sec and usec at BOL
308 ## NOTE: This function was from before we used the consolidate.awk function
309 ## to normalize the us time field. If you use the formattrace script
310 ## this should never be called.
311 karl 1.1 /^[0-9]{10}s-[0-9]{1,6}us:/ {
312 if (showOutput)
313 {
314 ## Normalize the time field for a sort from seconds-microseconds
315 ## seconds.microseconds as a single decimal number of fixed length
316 split($1, timeFields,"-")
317 sub("s$","",timeFields[1])
318 sub("us:$","",timeFields[2])
319 while (length(timeFields[2]) < 6)
320 {
321 timeFields[2] = "0" timeFields[2]
322 }
323
324 ## if this is the first entry, save the current time as start time.
325 if (startTime == 0)
326 {
327 startTime = timeFields[1]
328 }
329
330 time = timeFields[1] - startTime
331 newTime = sprintf("%.4u", time)
332 karl 1.1 newTime = newTime "." timeFields[2] ":"
333
334 #### print "Time 1 " $1 " formatted to " newTime " starttime " startTime
335 $1 = newTime
336 }
337 }
338
339 ## If the file was already mapped to the new time format then we
340 ## want to adjust to relative time which is a slightly different format
341 ## than the original trace format. If you use the formattrace script
342 ## this is the reformatter for the log record time field. It assumes that
343 ## the second part of the field (us) is fixed length and the oveall format
344 ## of the time field is
345 ## ssssssssss.uuuuuu:
346
347 /^[0-9]{10}.[0-9]{6}: / {
348 if (showOutput)
349 {
350 ## if this is the first entry, save the current time as start time.
351 split($1, timeFields,".")
352 if (startTime == 0)
353 karl 1.1 {
354 startTime = timeFields[1]
355 }
356 time = timeFields[1] - startTime
357 newTime = sprintf("%.4u", time)
358 newTime = newTime "." timeFields[2]
359
360 #### print "Time 2 " $1 " formatted to " newTime " starttime " startTime
361 $1 = newTime
362
363 ## get the ProcessName, threadName, etc. from trace entry 3
364 ## Use the two expressions because the second one required for a very
365 ## few trace entries (ex. header files include complete path) but makes
|
366 karl 1.2 ## the execution extremly long so we try to match on first.
|
367 karl 1.1 if (($3 ~ /^\[[0-9]{2,5}:[0-9]{5,15}:[A-Za-z0-9_.]{1,40}:[0-9]{1,6}\]/ ) ||
|
368 karl 1.2 ($3 ~ /^\[[0-9]{2,5}:[0-9]{5,15}:[[:print:]]{1,120}:[0-9]{1,6}\]/ ))
|
369 karl 1.1 {
370 split($3, procFields, ":")
371
372 processName = substr(procFields[1],2) ## get and delete leading [
373 threadName = procFields[2]
374 srcFileName = procFields[3]
375 lineNum = procFields[4]
376
377 ##print "Processinfo = " " |" $3 "| processName= ", processName, " ThreadName= ", threadName
378 ## Display name for the thread. The unknown is for errors only
379 localProcessName = "Unknown"
380 localThreadName = "Unknown"
381
382 ## if this is new process, add to the array of process names
383 if (!(processName in processNameArray))
384 {
385 ## create the new process name
386 x = sprintf("%.2u", processID++)
387 processNameArray[processName] = ("P" x)
388 }
389 localProcessName = processNameArray[processName]
390 karl 1.1
391 ## if this is new thread, add to the array of thread names.
392 if (!(threadName in threadNameArray))
393 {
394 ## create the new thread name
395 x = sprintf("%.2u", threadID++)
396 threadNameArray[threadName] = ("TH" x)
397 }
398 localThreadName = threadNameArray[threadName]
399
400 ##print "ThreadElementFound ", $2, " thread info=" $3, " threadName=" threadName, "newThreadName=" newThreadName
401 ## substitute the new name into $3
402 $3 = "[" localProcessName ":" localThreadName ":" srcFileName ":" lineNum
403 }
404 else
405 {
|
406 karl 1.2 print "No Match for field Process:Thread:Source:LineNum field " $3
407 print "line " $0
|
408 karl 1.1 }
409 }
410
411 ## Accumulate info an functions entered and exited.
412 ## We could actually accumulte time in function here. But more complex
413 ## because of multitask so we would have to match input to output
414 ## threads and process.
415 Action = $4 $5
416 if (Action ~ /^Enteringmethod$/)
417 {
418 functionArray[$6]++
419 }
420 else if (Action ~ /^Exitingmedhod$/ )
421 {
422 functionArray[$6]--
423 }
424
425 }
426
427 ## exclude from display any lines where the component name matches the
428 ## excludeFilter regex. Note that this is a match against arguement 2
429 karl 1.1 ## directly. This is the last item so that all other processing is complete
430 ## before we decide to terminate. Also, this could be directly done
431 ## in the next pattern action just as well.
432 { if ((excludeComponentFilter != "") && !inXmlState)
433 {
434 if (match($2,excludeComponentFilter))
435 {
436 next
437 }
438 }
439 }
440
441 ## print any resulting line so all lines get output.\ once showOutput has
442 ## been enabled. This function
443 ## inserts the marker in the line either a single + or minus
444 showOutput {
445 marker = ""
446 if (markThisLine != 0)
447 {
448 marker = "+"
449 markCount++
450 karl 1.1 }
451 outputLineCount++
452 ## debugging output
453 ##printf("%u %u%s%s\n", indent, type, marker, $0) >> outputFileName
454
455 printf("%s%s\n", marker, $0) >> outputFileName
456 }
457
458 function USAGE()
459 {
460 print "Usage: awk --posix -f formattrace.awk [variables] [inputTraceName]"
461
462 print "Formats and organizes the OpenPegasus trace files "
463 print "and produces an output file named CIM_Tr_*.fmtd"
464 print "That:"
465 print " - consolidates the multiple trace files into a single file, "
466 print " - modifies the time relative to the first displayed entry"
467 print " - renames the thread and process ids to something more readable"
468 print " - Marks selected log entries so they can be found easily"
469 print " - Excludes selected comonents"
470 print " - Cuts off log entries before the first instance of a defined"
471 karl 1.1 print " component (default XmlIO) which also serves as the relative"
472 print " - start time."
473 print "where --posix is required for awk compatibility"
474 print " -f defines this awk file name"
475 print " variables are any variables that may be used by the program"
476 print " in the form -v name=value"
477 print " startElement=ComponentName that will start display output"
478 print " where default is XmlIO"
479 }
480
481 ############################################################################
482 ## Close by outputting the summary information to the namesFile
483 ############################################################################
484 END{
485 if (!(showOutput))
486 {
487 print "ERROR: Never found trace component to start display"
488
489 print "ERROR: Never found trace component to start display" >> namesFile
490 }
|
491 karl 1.2
|
492 karl 1.1 print "Input " NR " lines from file " fileName "." >> namesFile
|
493 karl 1.2 ## The function not defined getStart = strftime("%b %d %Y-%T", startTime)
494 ## apparentlynot defined because of posix compatibility flag.
495 print "Original time for first record was " startTime " (" getStartTime ")"
|
496 karl 1.1 print markCount " lines marked" >> namesFile
497 print "marks are: " keywordRegex >>namesFile
498 print "Output " outputLineCount " lines to " outputFileName >> namesFile
|
499 karl 1.2
|
500 karl 1.1 print "Thread IDs for ", arrayLength(threadNameArray) " threads" >> namesFile
501 for (item in threadNameArray)
502 {
503 print item, threadNameArray[item] >> namesFile
504 }
505 print "" >> namesFile
|
506 karl 1.2
|
507 karl 1.1 print "Process IDs for " arrayLength(processNameArray) " processes" >> namesFile
508 for (item in processNameArray)
509 {
510 print item, processNameArray[item] >> namesFile
511 }
512
513 print "Count of functions called = " arrayLength(functionArray) >> namesFile
514 for (item in funtionArray)
515 {
516 if ((functionArray[item] % 2) == 1)
517 {
518 print "No exit match for function" item >> namesFile
519 }
520 }
521
522 for (item in funtionArray)
523 {
524 print item " "(functionArray[item] / 2) " " >> namesFile
525 }
|
526 karl 1.2 ## show this on console as last thing because it is handy for next
527 ## step which is usually edit of the file.
528 print "output fileName= ", outputFileName
|
529 karl 1.1 }
530
531
532 ###########################################################################
533 ## Miscellaneous functions
534
535
536 ## function to get length of an array. This exists because awk inconsistent
537 ## between implementation about the existence of a size or length function
538 ## for arrays. One input, the array. Returns the count if items in the
539 ## array. The variable i and k are local
540 function arrayLength( a, i, k) {
541 for( i in a) k++
542 return k
543 }
544
|