1 karl 1.1 ###
2 ### AWK Script to clean up an OpenPegasus trace by modifiying the info
3 ### for a number of fields including
4 ### - Change from absolute time to relative time to make seeing
5 ### time between events easier
6 ### - Change from numberic thread ID to symbolic threadID where the
7 ### symbolic thread ID is THREADxx
8 ### - Indent XML that is not indented in the trace
9 ### - Display only trace lines after a defined point where the
10 ### - defined point is the first trace line with a defined Component
11 ### The component is defined by the startElement variable on the
12 ### command line with default of XmlIO
13 ### - set Marker on selected lines. Selection defined by input file
14 ### This requires match of each item in the input array of strings
15 ### against each line
16 ###
17 #### TODO
18 #### Add filter on display by component type
19 #### Add color for particular things we find (Color would be option vs some
20 #### other marker for particular things (ex. error found)
21 #### Find way to trace through complete operation from thread to thread
22 karl 1.1 #### Find way to match enter and exit calls. Could be indent or some marker.
23 #### Would probably assume same thread
24
25 #### NOTE: This code uses one regex expression specific to to posix awk
26 #### the expression interval (i.e. the {number}. This may requie
27 #### that the calling function include either the --posix or
28 #### -re-interval option on the command line to include this
29 #### functionality. If the time and thread info do not translate
30 #### it is probably this incompatibility..
31 ####
32 BEGIN {
33 false = 0 ## defintion of true and false values for readability
34 true = 1
35
36 startTime = 0 ## Time of first trace entry
37 threadID = 1 ## variable to hold current thread number
38 processID = 1 ## variable to hold current process number
39 showOutput = 0 ## Boolean flag defines when output display is started
40 indent = 0 ## indent level for xml
41 isCompleteElement = false;
42 keywordsExist = false ## Boolean enables or disables keyword search
43 karl 1.1 ## disabled unless keywords variable is non-zero
44 outputLineCount = 0 ## count of lines displayed
45
46 namesFile = "FileNames.txt" ## default name for the nameFile, the file that
47 ## is built at end and contains summary information
48
49 fileName = "None" ## filename on input. May be supplied as variable or
50 ## or derived from first input record
51 outputFileSuffix = ".fmtd"
52 outputFileName = fileName outputFileSuffix
53
54 markThisLine = false ## indicating that the current line will be marked
55 ## with leading + per
56
57 markCount = 0 ## count of marks set.
58
59 ## If the name of the trace components is not defined on the cmd line
60 ## default it to XmlIO so that we do not show anything until the
61 ## first XmlIO record is encountered.
62 ## NOTE: This is too limited in that it does not allow having
63 ## no startElement, i.e. display starting at first line of trace.
64 karl 1.1
65 print "Cmd line input of startElement=" startElement "." "length= " length(startElement)
66 if (startElement == "")
67 {
68 startElement = "XmlIO"
69 }
70
71 if (length(startElement))
72 {
73 print "Display Starts at first Trace line component = " startElement
74 }
75 else
76 {
77 print "Display starts at first line of original trace"
78 }
79
80 ## Get the pegasus variables
81 pegRoot = ENVIRON["PEGASUS_ROOT"]
82
83 ## Create the keywordRegex from list of possible strings
84 ## Comma separated list of keywords which will be marked on output
85 karl 1.1 keywords = "error,IMETHODCALL,TestServerProfileProvider,Request: queue id:,Response has error,Enumeration Set Error msg,After putCache insert,TestServerProfileProvider:"
86 if (keywords == "")
87 {
88 ##keywords = "error,IMETHODCALL,TestServerProfileProvider"
89 }
90
91 ## Need to test to be sure no | in keywords
92
93 if (keywords != "")
94 {
95 keywordsExist = true
96
97 ## split the input keyword list and create a regex or statement
98 numKeywords = split(keywords, keyWordArray,",")
99 if (numKeywords > 1)
100 {
101 keywordRegex = keyWordArray[1]
102 for ( i = 2; i <= numKeywords; i++)
103 {
104 keywordRegex = keywordRegex "|" keyWordArray[i]
105 }
106 karl 1.1 }
107 print "Mark lines with the following keywords", keywordRegex
108 }
109
110 ## Exclude components filter. This is a command line input parameter
111 ## TODO all these should be input in form a,b,c and we map to the
112 ## regex.
113 excludeComponentFilter = ""
114
115 if (excludeComponentFilter != "")
116 {
117 print "Excluding the following components from output", excludeComponentFilter
118 }
119 print "Begin Done"
120 }
121
122 ############################################################################
123 ##
124 ## Process the file
125 ##
126 ###########################################################################
127 karl 1.1 ##
128 ## Get the first input file name and create the output file name based on
129 ## this name. If filename was already set, do not change. This is done once
130 ## on the first line input because the name on the input line does not exist
131 ## in FILENAME until after the first read.
132 FNR == NR && NR == 1 {
133 fileName = FILENAME
134
135 if (match(fileName,"\\.tmp$"))
136 {
137 ## remove the tmp suffix since we want only the main file name
138 ## for output
139 sub("\\.tmp$", "", fileName)
140 }
141
142 ## define the output file name and initialize it empty
143 outputFileName = fileName outputFileSuffix
144 print "" >outputFileName
145 print "output fileName= ", outputFileName
146 }
147
148 karl 1.1 ## Trace element name that acts as flag to turn on trace. Output is enabled
149 ## first time the element name defined by startElement is encountered.
150 ## Once turned on, there is no way to turn off the output
151 !(showOutput) && ($2 == startElement) {
152 showOutput = 1
153 print "Start Display at Line " FNR " in file " FILENAME " TraceComponent " startElement
154 }
155
156 ## Test to see if this line is to be marked by matching to the regexList
157 ## If it matches, set the markThisLine variable so it can be marked on output
158 (keywordsExist){
159 if (match($0,keywordRegex))
160 {
161 markThisLine = true
162 }
163 else
164 {
165 markThisLine = false
166 }
167 }
168
169 karl 1.1 ## Scan line for XML Line that is not indented. We will indent all
170 ## consecutive XML lines in accord with xml rules
171 ## TODO: Modify so we do not indent the first line.
172 { if($0 ~ /^</)
173 {
174 #### print "FOUND XML START"
175 inXMLState = true
176 if ($1 == "<?xml")
177 {
178 indent = 0
179 }
180 if ($1 == "</CIM>")
181 {
182 indent = -1
183 }
184 }
185 ## non-consecutive xml line found. turn off xml indent
186 else
187 {
188 #### print "FOUND XML END"
189 inXMLState = false
190 karl 1.1 }
191 }
192
193 ## Indent any unindented XML
194 ## If inXml state indent the line by indent variable
195 ## this indents all consecutive lines that start with the XML left caret
196 ## character in line with their xml indent level.
197 ## NOTE: It does not indent any case of xml on a single line
198 (inXMLState) {
199 #### DEBUG for xml displayprint "FOUND NEXT XML Line", "indent=" indent, " isComplete " isCompleteElement, $0
200 ## if this is not complete element (i.e start and end on single line
201 ## include </elementName> as possible terminator on same line
202 ### isue with element that terminates on same line. needs to indent and
203 ### then outdent after print.
204 ## <IMETHODCALL NAME="References">
205 ## <LOCALNAMESPACEPATH>
206 ## <NAMESPACE NAME="test"/>
207 ## <NAMESPACE NAME="TestProvider"/>
208 ## </LOCALNAMESPACEPATH>
209
210 elementName = substr($1,2)
211 karl 1.1 terminator = "</" elementName ">$"
212 ## if this line is an entity close element
213 if ($0 ~ "^</[A-Z]/>$")
214 {
215 type = 1
216 indent--
217 isCompleteElement = true
218 }
219 ## if this is a complete start and end pair
220 else if (($0 ~ "^<[A-Z]+>.+</[A-Z]+>$"))
221 {
222 type = 6
223 ## Really should match the element name begin and end
224 ## complete open and close of an element
225 if (!isCompleteElement)
226 {
227 indent++
228 }
229 isCompleteElement = true
230 }
231 ## else if this is a complete element
232 karl 1.1 ## <NAMESPACE NAME="PG_InterOp"/> This is NOT a 5. It is a 2
233 ## 11 2 <NAMESPACE NAME="root"/>
234 ## 11 5 <NAMESPACE NAME="PG_InterOp"/> This should be type 2
235 else if ($0 ~ "^<[A-Z]+a-z=\" ]*/>$")
236 {
237 type = 2
238 if (!isCompleteElement)
239 {
240 indent++
241 }
242 isCompleteElement = true
243 }
244
245 ## if there is a complete start element and end element
246 ## <KEYVALUE VALUETYPE="string">PG:127-0-1-1</KEYVALUE>
247 else if($0 ~ terminator)
248 {
249 type = 3
250 if (!isCompleteElement)
251 {
252 indent++
253 karl 1.1 isCompleteElement = true
254 }
255 }
256 else
257 {
258 ## if terminating element
259 if ($0 ~ "^</")
260 {
261 ##if ((indent != 0) && (type != 3))
262 if ((indent != 0) && isCompleteElement)
263 {
264 indent--
265 }
266 type = 4
267 isCompleteElement = true
268 }
269 else
270 {
271 type = 5
272 if (!isCompleteElement)
273 {
274 karl 1.1 indent++
275 }
276 isCompleteElement = false
277 }
278 }
279
280 ## create the indent string, (4 char indent per level)
281 indentStr = ""
282 for (x = 0; x < indent; x++)
283 {
284 indentStr = indentStr " "
285 }
286 #### print "indentStr" indentStr "EndIndentStr", "indent=" indent, $0
287
288 ## prepend the indent string to $0
289 $0 = indentStr $0
290 }
291
292 ## input record times not corrected yet. See consolidateTrace which
293 ## should do this if called. This allows direct use of this formatter
294 ## with .trc files without the format conversion.
295 karl 1.1 ## substitute the time into the time field to move from absolute seconds
296 ## to relative seconds. This must be the first field in the input variable
297 ## substitute time of yyyy.zzzzzz for relative sec and usec
298 # yyyyyyyyyys-zzzzzzs absolute sec and usec at BOL
299 ## NOTE: This function was from before we used the consolidate.awk function
300 ## to normalize the us time field. If you use the formattrace script
301 ## this should never be called.
302 /^[0-9]{10}s-[0-9]{1,6}us:/ {
303 if (showOutput)
304 {
305 ## Normalize the time field for a sort from seconds-microseconds
306 ## seconds.microseconds as a single decimal number of fixed length
307 split($1, timeFields,"-")
308 sub("s$","",timeFields[1])
309 sub("us:$","",timeFields[2])
310 while (length(timeFields[2]) < 6)
311 {
312 timeFields[2] = "0" timeFields[2]
313 }
314
315 ## if this is the first entry, save the current time as start time.
316 karl 1.1 if (startTime == 0)
317 {
318 startTime = timeFields[1]
319 }
320
321 time = timeFields[1] - startTime
322 newTime = sprintf("%.4u", time)
323 newTime = newTime "." timeFields[2] ":"
324
325 #### print "Time 1 " $1 " formatted to " newTime " starttime " startTime
326 $1 = newTime
327 }
328 }
329
330 ## If the file was already mapped to the new time format then we
331 ## want to adjust to relative time which is a slightly different format
332 ## than the original trace format. If you use the formattrace script
333 ## this is the reformatter for the log record time field. It assumes that
334 ## the second part of the field (us) is fixed length and the oveall format
335 ## of the time field is
336 ## ssssssssss.uuuuuu:
337 karl 1.1
338 /^[0-9]{10}.[0-9]{6}: / {
339 if (showOutput)
340 {
341 ## if this is the first entry, save the current time as start time.
342 split($1, timeFields,".")
343 if (startTime == 0)
344 {
345 startTime = timeFields[1]
346 }
347 time = timeFields[1] - startTime
348 newTime = sprintf("%.4u", time)
349 newTime = newTime "." timeFields[2]
350
351 #### print "Time 2 " $1 " formatted to " newTime " starttime " startTime
352 $1 = newTime
353
354 ## get the ProcessName, threadName, etc. from trace entry 3
355 ## Use the two expressions because the second one required for a very
356 ## few trace entries (ex. header files include complete path) but makes
357 ## the execution extremly long.
358 karl 1.1 if (($3 ~ /^\[[0-9]{2,5}:[0-9]{5,15}:[A-Za-z0-9_.]{1,40}:[0-9]{1,6}\]/ ) ||
359 ($3 ~ /^\[[0-9]{2,5}:[0-9]{5,15}:[[:print:]]{1,120}:[0-9]{1,6}\]/ ))
360 {
361 split($3, procFields, ":")
362
363 processName = substr(procFields[1],2) ## get and delete leading [
364 threadName = procFields[2]
365 srcFileName = procFields[3]
366 lineNum = procFields[4]
367
368 ##print "Processinfo = " " |" $3 "| processName= ", processName, " ThreadName= ", threadName
369 ## Display name for the thread. The unknown is for errors only
370 localProcessName = "Unknown"
371 localThreadName = "Unknown"
372
373 ## if this is new process, add to the array of process names
374 if (!(processName in processNameArray))
375 {
376 ## create the new process name
377 x = sprintf("%.2u", processID++)
378 processNameArray[processName] = ("P" x)
379 karl 1.1 }
380 localProcessName = processNameArray[processName]
381
382 ## if this is new thread, add to the array of thread names.
383 if (!(threadName in threadNameArray))
384 {
385 ## create the new thread name
386 x = sprintf("%.2u", threadID++)
387 threadNameArray[threadName] = ("TH" x)
388 }
389 localThreadName = threadNameArray[threadName]
390
391 ##print "ThreadElementFound ", $2, " thread info=" $3, " threadName=" threadName, "newThreadName=" newThreadName
392 ## substitute the new name into $3
393 $3 = "[" localProcessName ":" localThreadName ":" srcFileName ":" lineNum
394 }
395 else
396 {
397 print "No Match for Process:Thread:Source:LineNum field " $3
398 }
399 }
400 karl 1.1
401 ## Accumulate info an functions entered and exited.
402 ## We could actually accumulte time in function here. But more complex
403 ## because of multitask so we would have to match input to output
404 ## threads and process.
405 Action = $4 $5
406 if (Action ~ /^Enteringmethod$/)
407 {
408 functionArray[$6]++
409 }
410 else if (Action ~ /^Exitingmedhod$/ )
411 {
412 functionArray[$6]--
413 }
414
415 }
416
417 ## exclude from display any lines where the component name matches the
418 ## excludeFilter regex. Note that this is a match against arguement 2
419 ## directly. This is the last item so that all other processing is complete
420 ## before we decide to terminate. Also, this could be directly done
421 karl 1.1 ## in the next pattern action just as well.
422 { if ((excludeComponentFilter != "") && !inXmlState)
423 {
424 if (match($2,excludeComponentFilter))
425 {
426 next
427 }
428 }
429 }
430
431 ## print any resulting line so all lines get output.\ once showOutput has
432 ## been enabled. This function
433 ## inserts the marker in the line either a single + or minus
434 showOutput {
435 marker = ""
436 if (markThisLine != 0)
437 {
438 marker = "+"
439 markCount++
440 }
441 outputLineCount++
442 karl 1.1 ## debugging output
443 ##printf("%u %u%s%s\n", indent, type, marker, $0) >> outputFileName
444
445 printf("%s%s\n", marker, $0) >> outputFileName
446 }
447
448 function USAGE()
449 {
450 print "Usage: awk --posix -f formattrace.awk [variables] [inputTraceName]"
451
452 print "Formats and organizes the OpenPegasus trace files "
453 print "and produces an output file named CIM_Tr_*.fmtd"
454 print "That:"
455 print " - consolidates the multiple trace files into a single file, "
456 print " - modifies the time relative to the first displayed entry"
457 print " - renames the thread and process ids to something more readable"
458 print " - Marks selected log entries so they can be found easily"
459 print " - Excludes selected comonents"
460 print " - Cuts off log entries before the first instance of a defined"
461 print " component (default XmlIO) which also serves as the relative"
462 print " - start time."
463 karl 1.1 print "where --posix is required for awk compatibility"
464 print " -f defines this awk file name"
465 print " variables are any variables that may be used by the program"
466 print " in the form -v name=value"
467 print " startElement=ComponentName that will start display output"
468 print " where default is XmlIO"
469 }
470
471 ############################################################################
472 ## Close by outputting the summary information to the namesFile
473 ############################################################################
474 END{
475 if (!(showOutput))
476 {
477 print "ERROR: Never found trace component to start display"
478
479 print "ERROR: Never found trace component to start display" >> namesFile
480 }
481 print "Input " NR " lines from file " fileName "." >> namesFile
482 print "Original time for first record was " startTime
483 print markCount " lines marked" >> namesFile
484 karl 1.1 print "marks are: " keywordRegex >>namesFile
485 print "Output " outputLineCount " lines to " outputFileName >> namesFile
486 print "Thread IDs for ", arrayLength(threadNameArray) " threads" >> namesFile
487 for (item in threadNameArray)
488 {
489 print item, threadNameArray[item] >> namesFile
490 }
491 print "" >> namesFile
492 print "Process IDs for " arrayLength(processNameArray) " processes" >> namesFile
493 for (item in processNameArray)
494 {
495 print item, processNameArray[item] >> namesFile
496 }
497
498 print "Count of functions called = " arrayLength(functionArray) >> namesFile
499 for (item in funtionArray)
500 {
501 if ((functionArray[item] % 2) == 1)
502 {
503 print "No exit match for function" item >> namesFile
504 }
505 karl 1.1 }
506
507 for (item in funtionArray)
508 {
509 print item " "(functionArray[item] / 2) " " >> namesFile
510 }
511 }
512
513
514 ###########################################################################
515 ## Miscellaneous functions
516
517
518 ## function to get length of an array. This exists because awk inconsistent
519 ## between implementation about the existence of a size or length function
520 ## for arrays. One input, the array. Returns the count if items in the
521 ## array. The variable i and k are local
522 function arrayLength( a, i, k) {
523 for( i in a) k++
524 return k
525 }
526 karl 1.1
|