Deleted Added
sdiff udiff text old ( 268437 ) new ( 281812 )
full compact
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files. Specifically:
15
16'CWD' to initialize our notion.
17
18'C' to track chdir(2) on a per process basis
19
20'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
24 dir read.
25
26'W' files opened for write or read-write,
27 for filemon V3 and earlier.
28
29'E' files executed.
30
31'L' files linked
32
33'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40 $Id: meta2deps.py,v 1.17 2014/04/05 22:56:54 sjg Exp $
41
42 Copyright (c) 2011-2013, Juniper Networks, Inc.
43 All rights reserved.
44
45 Redistribution and use in source and binary forms, with or without
46 modification, are permitted provided that the following conditions
47 are met:
48 1. Redistributions of source code must retain the above copyright
49 notice, this list of conditions and the following disclaimer.
50 2. Redistributions in binary form must reproduce the above copyright
51 notice, this list of conditions and the following disclaimer in the
52 documentation and/or other materials provided with the distribution.
53
54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65
66"""
67
68import os, re, sys
69
70def getv(dict, key, d=None):
71 """Lookup key in dict and return value or the supplied default."""
72 if key in dict:
73 return dict[key]
74 return d
75
76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
77 """
78 Return an absolute path, resolving via cwd or last_dir if needed.
79 """
80 if path.endswith('/.'):
81 path = path[0:-2]
82 if len(path) > 0 and path[0] == '/':
83 return path
84 if path == '.':
85 return cwd
86 if path.startswith('./'):
87 return cwd + path[1:]
88 if last_dir == cwd:
89 last_dir = None
90 for d in [last_dir, cwd]:
91 if not d:
92 continue
93 p = '/'.join([d,path])
94 if debug > 2:
95 print("looking for:", p, end=' ', file=debug_out)
96 if not os.path.exists(p):
97 if debug > 2:
98 print("nope", file=debug_out)
99 p = None
100 continue
101 if debug > 2:
102 print("found:", p, file=debug_out)
103 return p
104 return None
105
106def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
107 """
108 Return an absolute path, resolving via cwd or last_dir if needed.
109 this gets called a lot, so we try to avoid calling realpath
110 until we know we have something.
111 """
112 rpath = resolve(path, cwd, last_dir, debug, debug_out)
113 if rpath:
114 path = rpath
115 if (path.find('./') > 0 or
116 path.endswith('/..') or
117 os.path.islink(path)):
118 return os.path.realpath(path)
119 return path
120
121def sort_unique(list, cmp=None, key=None, reverse=False):
122 list.sort(cmp, key, reverse)
123 nl = []
124 le = None
125 for e in list:
126 if e == le:
127 continue
128 nl.append(e)
129 return nl
130
131def add_trims(x):
132 return ['/' + x + '/',
133 '/' + x,
134 x + '/',
135 x]
136
137class MetaFile:
138 """class to parse meta files generated by bmake."""
139
140 conf = None
141 dirdep_re = None
142 host_target = None
143 srctops = []
144 objroots = []
145
146 seen = {}
147 obj_deps = []
148 src_deps = []
149 file_deps = []
150
151 def __init__(self, name, conf={}):
152 """if name is set we will parse it now.
153 conf can have the follwing keys:
154
155 SRCTOPS list of tops of the src tree(s).
156
157 CURDIR the src directory 'bmake' was run from.
158
159 RELDIR the relative path from SRCTOP to CURDIR
160
161 MACHINE the machine we built for.
162 set to 'none' if we are not cross-building.
163 More specifically if machine cannot be deduced from objdirs.
164
165 TARGET_SPEC
166 Sometimes MACHINE isn't enough.
167
168 HOST_TARGET
169 when we build for the pseudo machine 'host'
170 the object tree uses HOST_TARGET rather than MACHINE.
171
172 OBJROOTS a list of the common prefix for all obj dirs it might
173 end in '/' or '-'.
174
175 DPDEPS names an optional file to which per file dependencies
176 will be appended.
177 For example if 'some/path/foo.h' is read from SRCTOP
178 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
179 This can allow 'bmake' to learn all the dirs within
180 the tree that depend on 'foo.h'
181
182 debug desired debug level
183
184 debug_out open file to send debug output to (sys.stderr)
185
186 """
187
188 self.name = name
189 self.debug = getv(conf, 'debug', 0)
190 self.debug_out = getv(conf, 'debug_out', sys.stderr)
191
192 self.machine = getv(conf, 'MACHINE', '')
193 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
194 self.target_spec = getv(conf, 'TARGET_SPEC', '')
195 self.curdir = getv(conf, 'CURDIR')
196 self.reldir = getv(conf, 'RELDIR')
197 self.dpdeps = getv(conf, 'DPDEPS')
198 self.line = 0
199
200 if not self.conf:
201 # some of the steps below we want to do only once
202 self.conf = conf
203 self.host_target = getv(conf, 'HOST_TARGET')
204 for srctop in getv(conf, 'SRCTOPS', []):
205 if srctop[-1] != '/':
206 srctop += '/'
207 if not srctop in self.srctops:
208 self.srctops.append(srctop)
209 _srctop = os.path.realpath(srctop)
210 if _srctop[-1] != '/':
211 _srctop += '/'
212 if not _srctop in self.srctops:
213 self.srctops.append(_srctop)
214
215 trim_list = add_trims(self.machine)
216 if self.machine == 'host':
217 trim_list += add_trims(self.host_target)
218 if self.target_spec:
219 trim_list += add_trims(self.target_spec)
220
221 for objroot in getv(conf, 'OBJROOTS', []):
222 for e in trim_list:
223 if objroot.endswith(e):
224 # this is not what we want - fix it
225 objroot = objroot[0:-len(e)]
226 if e.endswith('/'):
227 objroot += '/'
228 if not objroot in self.objroots:
229 self.objroots.append(objroot)
230 _objroot = os.path.realpath(objroot)
231 if objroot[-1] == '/':
232 _objroot += '/'
233 if not _objroot in self.objroots:
234 self.objroots.append(_objroot)
235
236 # we want the longest match
237 self.srctops.sort(reverse=True)
238 self.objroots.sort(reverse=True)
239
240 if self.debug:
241 print("host_target=", self.host_target, file=self.debug_out)
242 print("srctops=", self.srctops, file=self.debug_out)
243 print("objroots=", self.objroots, file=self.debug_out)
244
245 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
246
247 if self.dpdeps and not self.reldir:
248 if self.debug:
249 print("need reldir:", end=' ', file=self.debug_out)
250 if self.curdir:
251 srctop = self.find_top(self.curdir, self.srctops)
252 if srctop:
253 self.reldir = self.curdir.replace(srctop,'')
254 if self.debug:
255 print(self.reldir, file=self.debug_out)
256 if not self.reldir:
257 self.dpdeps = None # we cannot do it?
258
259 self.cwd = os.getcwd() # make sure this is initialized
260
261 if name:
262 self.try_parse()
263
264 def reset(self):
265 """reset state if we are being passed meta files from multiple directories."""
266 self.seen = {}
267 self.obj_deps = []
268 self.src_deps = []
269 self.file_deps = []
270
271 def dirdeps(self, sep='\n'):
272 """return DIRDEPS"""
273 return sep.strip() + sep.join(self.obj_deps)
274
275 def src_dirdeps(self, sep='\n'):
276 """return SRC_DIRDEPS"""
277 return sep.strip() + sep.join(self.src_deps)
278
279 def file_depends(self, out=None):
280 """Append DPDEPS_${file} += ${RELDIR}
281 for each file we saw, to the output file."""
282 if not self.reldir:
283 return None
284 for f in sort_unique(self.file_deps):
285 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
286
287 def seenit(self, dir):
288 """rememer that we have seen dir."""
289 self.seen[dir] = 1
290
291 def add(self, list, data, clue=''):
292 """add data to list if it isn't already there."""
293 if data not in list:
294 list.append(data)
295 if self.debug:
296 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
297
298 def find_top(self, path, list):
299 """the logical tree may be split across multiple trees"""
300 for top in list:
301 if path.startswith(top):
302 if self.debug > 2:
303 print("found in", top, file=self.debug_out)
304 return top
305 return None
306
307 def find_obj(self, objroot, dir, path, input):
308 """return path within objroot, taking care of .dirdep files"""
309 ddep = None
310 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
311 if not ddep and os.path.exists(ddepf):
312 ddep = open(ddepf, 'r').readline().strip('# \n')
313 if self.debug > 1:
314 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
315 if ddep.endswith(self.machine):
316 ddep = ddep[0:-(1+len(self.machine))]
317 elif self.target_spec and ddep.endswith(self.target_spec):
318 ddep = ddep[0:-(1+len(self.target_spec))]
319
320 if not ddep:
321 # no .dirdeps, so remember that we've seen the raw input
322 self.seenit(input)
323 self.seenit(dir)
324 if self.machine == 'none':
325 if dir.startswith(objroot):
326 return dir.replace(objroot,'')
327 return None
328 m = self.dirdep_re.match(dir.replace(objroot,''))
329 if m:
330 ddep = m.group(2)
331 dmachine = m.group(1)
332 if dmachine != self.machine:
333 if not (self.machine == 'host' and
334 dmachine == self.host_target):
335 if self.debug > 2:
336 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
337 ddep += '.' + dmachine
338
339 return ddep
340
341 def try_parse(self, name=None, file=None):
342 """give file and line number causing exception"""
343 try:
344 self.parse(name, file)
345 except:
346 # give a useful clue
347 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
348 raise
349
350 def parse(self, name=None, file=None):
351 """A meta file looks like:
352
353 # Meta data file "path"
354 CMD "command-line"
355 CWD "cwd"
356 TARGET "target"
357 -- command output --
358 -- filemon acquired metadata --
359 # buildmon version 3
360 V 3
361 C "pid" "cwd"
362 E "pid" "path"
363 F "pid" "child"
364 R "pid" "path"
365 W "pid" "path"
366 X "pid" "status"
367 D "pid" "path"
368 L "pid" "src" "target"
369 M "pid" "old" "new"
370 S "pid" "path"
371 # Bye bye
372
373 We go to some effort to avoid processing a dependency more than once.
374 Of the above record types only C,E,F,L,R,V and W are of interest.
375 """
376
377 version = 0 # unknown
378 if name:
379 self.name = name;
380 if file:
381 f = file
382 cwd = last_dir = self.cwd
383 else:
384 f = open(self.name, 'r')
385 skip = True
386 pid_cwd = {}
387 pid_last_dir = {}
388 last_pid = 0
389
390 self.line = 0
391 if self.curdir:
392 self.seenit(self.curdir) # we ignore this
393
394 interesting = 'CEFLRV'
395 for line in f:
396 self.line += 1
397 # ignore anything we don't care about
398 if not line[0] in interesting:
399 continue
400 if self.debug > 2:
401 print("input:", line, end=' ', file=self.debug_out)
402 w = line.split()
403
404 if skip:
405 if w[0] == 'V':
406 skip = False
407 version = int(w[1])
408 """
409 if version < 4:
410 # we cannot ignore 'W' records
411 # as they may be 'rw'
412 interesting += 'W'
413 """
414 elif w[0] == 'CWD':
415 self.cwd = cwd = last_dir = w[1]
416 self.seenit(cwd) # ignore this
417 if self.debug:
418 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
419 continue
420
421 pid = int(w[1])
422 if pid != last_pid:
423 if last_pid:
424 pid_cwd[last_pid] = cwd
425 pid_last_dir[last_pid] = last_dir
426 cwd = getv(pid_cwd, pid, self.cwd)
427 last_dir = getv(pid_last_dir, pid, self.cwd)
428 last_pid = pid
429
430 # process operations
431 if w[0] == 'F':
432 npid = int(w[2])
433 pid_cwd[npid] = cwd
434 pid_last_dir[npid] = cwd
435 last_pid = npid
436 continue
437 elif w[0] == 'C':
438 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
439 if cwd.endswith('/.'):
440 cwd = cwd[0:-2]
441 last_dir = cwd
442 if self.debug > 1:
443 print("cwd=", cwd, file=self.debug_out)
444 continue
445
446 if w[2] in self.seen:
447 if self.debug > 2:
448 print("seen:", w[2], file=self.debug_out)
449 continue
450 # file operations
451 if w[0] in 'ML':
452 path = w[2].strip("'")
453 else:
454 path = w[2]
455 # we are never interested in .dirdep files as dependencies
456 if path.endswith('.dirdep'):
457 continue
458 # we don't want to resolve the last component if it is
459 # a symlink
460 path = resolve(path, cwd, last_dir, self.debug, self.debug_out)
461 if not path:
462 continue
463 dir,base = os.path.split(path)
464 if dir in self.seen:
465 if self.debug > 2:
466 print("seen:", dir, file=self.debug_out)
467 continue
468 # we can have a path in an objdir which is a link
469 # to the src dir, we may need to add dependencies for each
470 rdir = dir
471 dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out)
472 if rdir == dir or rdir.find('./') > 0:
473 rdir = None
474 # now put path back together
475 path = '/'.join([dir,base])
476 if self.debug > 1:
477 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
478 if w[0] in 'SRWL':
479 if w[0] == 'W' and path.endswith('.dirdep'):
480 continue
481 if path in [last_dir, cwd, self.cwd, self.curdir]:
482 if self.debug > 1:
483 print("skipping:", path, file=self.debug_out)
484 continue
485 if os.path.isdir(path):
486 if w[0] in 'RW':
487 last_dir = path;
488 if self.debug > 1:
489 print("ldir=", last_dir, file=self.debug_out)
490 continue
491
492 if w[0] in 'REWML':
493 # finally, we get down to it
494 if dir == self.cwd or dir == self.curdir:
495 continue
496 srctop = self.find_top(path, self.srctops)
497 if srctop:
498 if self.dpdeps:
499 self.add(self.file_deps, path.replace(srctop,''), 'file')
500 self.add(self.src_deps, dir.replace(srctop,''), 'src')
501 self.seenit(w[2])
502 self.seenit(dir)
503 if rdir and not rdir.startswith(srctop):
504 dir = rdir # for below
505 rdir = None
506 else:
507 continue
508
509 objroot = None
510 for dir in [dir,rdir]:
511 if not dir:
512 continue
513 objroot = self.find_top(dir, self.objroots)
514 if objroot:
515 break
516 if objroot:
517 ddep = self.find_obj(objroot, dir, path, w[2])
518 if ddep:
519 self.add(self.obj_deps, ddep, 'obj')
520 else:
521 # don't waste time looking again
522 self.seenit(w[2])
523 self.seenit(dir)
524 if not file:
525 f.close()
526
527
528def main(argv, klass=MetaFile, xopts='', xoptf=None):
529 """Simple driver for class MetaFile.
530
531 Usage:
532 script [options] [key=value ...] "meta" ...
533
534 Options and key=value pairs contribute to the
535 dictionary passed to MetaFile.
536
537 -S "SRCTOP"
538 add "SRCTOP" to the "SRCTOPS" list.
539
540 -C "CURDIR"
541
542 -O "OBJROOT"
543 add "OBJROOT" to the "OBJROOTS" list.
544
545 -m "MACHINE"
546
547 -a "MACHINE_ARCH"
548
549 -H "HOST_TARGET"
550
551 -D "DPDEPS"
552
553 -d bumps debug level
554
555 """
556 import getopt
557
558 # import Psyco if we can
559 # it can speed things up quite a bit
560 have_psyco = 0
561 try:
562 import psyco
563 psyco.full()
564 have_psyco = 1
565 except:
566 pass
567
568 conf = {
569 'SRCTOPS': [],
570 'OBJROOTS': [],
571 }
572
573 try:
574 machine = os.environ['MACHINE']
575 if machine:
576 conf['MACHINE'] = machine
577 machine_arch = os.environ['MACHINE_ARCH']
578 if machine_arch:
579 conf['MACHINE_ARCH'] = machine_arch
580 srctop = os.environ['SB_SRC']
581 if srctop:
582 conf['SRCTOPS'].append(srctop)
583 objroot = os.environ['SB_OBJROOT']
584 if objroot:
585 conf['OBJROOTS'].append(objroot)
586 except:
587 pass
588
589 debug = 0
590 output = True
591
592 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:' + xopts)
593 for o, a in opts:
594 if o == '-a':
595 conf['MACHINE_ARCH'] = a
596 elif o == '-d':
597 debug += 1
598 elif o == '-q':
599 output = False
600 elif o == '-H':
601 conf['HOST_TARGET'] = a
602 elif o == '-S':
603 if a not in conf['SRCTOPS']:
604 conf['SRCTOPS'].append(a)
605 elif o == '-C':
606 conf['CURDIR'] = a
607 elif o == '-O':
608 if a not in conf['OBJROOTS']:
609 conf['OBJROOTS'].append(a)
610 elif o == '-R':
611 conf['RELDIR'] = a
612 elif o == '-D':
613 conf['DPDEPS'] = a
614 elif o == '-m':
615 conf['MACHINE'] = a
616 elif o == '-T':
617 conf['TARGET_SPEC'] = a
618 elif xoptf:
619 xoptf(o, a, conf)
620
621 conf['debug'] = debug
622
623 # get any var=val assignments
624 eaten = []
625 for a in args:
626 if a.find('=') > 0:
627 k,v = a.split('=')
628 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
629 if k == 'SRCTOP':
630 k = 'SRCTOPS'
631 elif k == 'OBJROOT':
632 k = 'OBJROOTS'
633 if v not in conf[k]:
634 conf[k].append(v)
635 else:
636 conf[k] = v
637 eaten.append(a)
638 continue
639 break
640
641 for a in eaten:
642 args.remove(a)
643
644 debug_out = getv(conf, 'debug_out', sys.stderr)
645
646 if debug:
647 print("config:", file=debug_out)
648 print("psyco=", have_psyco, file=debug_out)
649 for k,v in list(conf.items()):
650 print("%s=%s" % (k,v), file=debug_out)
651
652 for a in args:
653 if a.endswith('.meta'):
654 m = klass(a, conf)
655 elif a.startswith('@'):
656 # there can actually multiple files per line
657 for line in open(a[1:]):
658 for f in line.strip().split():
659 m = klass(f, conf)
660
661 if output:
662 print(m.dirdeps())
663
664 print(m.src_dirdeps('\nsrc:'))
665
666 dpdeps = getv(conf, 'DPDEPS')
667 if dpdeps:
668 m.file_depends(open(dpdeps, 'wb'))
669
670 return m
671
672if __name__ == '__main__':
673 try:
674 main(sys.argv)
675 except:
676 # yes, this goes to stdout
677 print("ERROR: ", sys.exc_info()[1])
678 raise
679