Deleted Added
sdiff udiff text old ( 268437 ) new ( 281812 )
full compact
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files. Specifically:
15
16'CWD' to initialize our notion.
17
18'C' to track chdir(2) on a per process basis
19
20'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
24 dir read.
25
26'W' files opened for write or read-write,
27 for filemon V3 and earlier.
28
29'E' files executed.
30
31'L' files linked
32
33'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40 $Id: meta2deps.py,v 1.18 2015/04/03 18:23:25 sjg Exp $
41
42 Copyright (c) 2011-2013, Juniper Networks, Inc.
43 All rights reserved.
44
45 Redistribution and use in source and binary forms, with or without
46 modification, are permitted provided that the following conditions
47 are met:
48 1. Redistributions of source code must retain the above copyright
49 notice, this list of conditions and the following disclaimer.
50 2. Redistributions in binary form must reproduce the above copyright
51 notice, this list of conditions and the following disclaimer in the
52 documentation and/or other materials provided with the distribution.
53
54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65
66"""
67
68import os, re, sys
69
70def getv(dict, key, d=None):
71 """Lookup key in dict and return value or the supplied default."""
72 if key in dict:
73 return dict[key]
74 return d
75
76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
77 """
78 Return an absolute path, resolving via cwd or last_dir if needed.
79 """
80 if path.endswith('/.'):
81 path = path[0:-2]
82 if len(path) > 0 and path[0] == '/':
83 return path
84 if path == '.':
85 return cwd
86 if path.startswith('./'):
87 return cwd + path[1:]
88 if last_dir == cwd:
89 last_dir = None
90 for d in [last_dir, cwd]:
91 if not d:
92 continue
93 p = '/'.join([d,path])
94 if debug > 2:
95 print("looking for:", p, end=' ', file=debug_out)
96 if not os.path.exists(p):
97 if debug > 2:
98 print("nope", file=debug_out)
99 p = None
100 continue
101 if debug > 2:
102 print("found:", p, file=debug_out)
103 return p
104 return None
105
106def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
107 """
108 Return an absolute path, resolving via cwd or last_dir if needed.
109 this gets called a lot, so we try to avoid calling realpath
110 until we know we have something.
111 """
112 rpath = resolve(path, cwd, last_dir, debug, debug_out)
113 if rpath:
114 path = rpath
115 if (path.find('/') < 0 or
116 path.find('./') > 0 or
117 path.endswith('/..') or
118 os.path.islink(path)):
119 return os.path.realpath(path)
120 return path
121
122def sort_unique(list, cmp=None, key=None, reverse=False):
123 list.sort(cmp, key, reverse)
124 nl = []
125 le = None
126 for e in list:
127 if e == le:
128 continue
129 nl.append(e)
130 return nl
131
132def add_trims(x):
133 return ['/' + x + '/',
134 '/' + x,
135 x + '/',
136 x]
137
138class MetaFile:
139 """class to parse meta files generated by bmake."""
140
141 conf = None
142 dirdep_re = None
143 host_target = None
144 srctops = []
145 objroots = []
146 excludes = []
147 seen = {}
148 obj_deps = []
149 src_deps = []
150 file_deps = []
151
152 def __init__(self, name, conf={}):
153 """if name is set we will parse it now.
154 conf can have the follwing keys:
155
156 SRCTOPS list of tops of the src tree(s).
157
158 CURDIR the src directory 'bmake' was run from.
159
160 RELDIR the relative path from SRCTOP to CURDIR
161
162 MACHINE the machine we built for.
163 set to 'none' if we are not cross-building.
164 More specifically if machine cannot be deduced from objdirs.
165
166 TARGET_SPEC
167 Sometimes MACHINE isn't enough.
168
169 HOST_TARGET
170 when we build for the pseudo machine 'host'
171 the object tree uses HOST_TARGET rather than MACHINE.
172
173 OBJROOTS a list of the common prefix for all obj dirs it might
174 end in '/' or '-'.
175
176 DPDEPS names an optional file to which per file dependencies
177 will be appended.
178 For example if 'some/path/foo.h' is read from SRCTOP
179 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
180 This can allow 'bmake' to learn all the dirs within
181 the tree that depend on 'foo.h'
182
183 EXCLUDES
184 A list of paths to ignore.
185 ccache(1) can otherwise be trouble.
186
187 debug desired debug level
188
189 debug_out open file to send debug output to (sys.stderr)
190
191 """
192
193 self.name = name
194 self.debug = getv(conf, 'debug', 0)
195 self.debug_out = getv(conf, 'debug_out', sys.stderr)
196
197 self.machine = getv(conf, 'MACHINE', '')
198 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
199 self.target_spec = getv(conf, 'TARGET_SPEC', '')
200 self.curdir = getv(conf, 'CURDIR')
201 self.reldir = getv(conf, 'RELDIR')
202 self.dpdeps = getv(conf, 'DPDEPS')
203 self.line = 0
204
205 if not self.conf:
206 # some of the steps below we want to do only once
207 self.conf = conf
208 self.host_target = getv(conf, 'HOST_TARGET')
209 for srctop in getv(conf, 'SRCTOPS', []):
210 if srctop[-1] != '/':
211 srctop += '/'
212 if not srctop in self.srctops:
213 self.srctops.append(srctop)
214 _srctop = os.path.realpath(srctop)
215 if _srctop[-1] != '/':
216 _srctop += '/'
217 if not _srctop in self.srctops:
218 self.srctops.append(_srctop)
219
220 trim_list = add_trims(self.machine)
221 if self.machine == 'host':
222 trim_list += add_trims(self.host_target)
223 if self.target_spec:
224 trim_list += add_trims(self.target_spec)
225
226 for objroot in getv(conf, 'OBJROOTS', []):
227 for e in trim_list:
228 if objroot.endswith(e):
229 # this is not what we want - fix it
230 objroot = objroot[0:-len(e)]
231 if e.endswith('/'):
232 objroot += '/'
233 if not objroot in self.objroots:
234 self.objroots.append(objroot)
235 _objroot = os.path.realpath(objroot)
236 if objroot[-1] == '/':
237 _objroot += '/'
238 if not _objroot in self.objroots:
239 self.objroots.append(_objroot)
240
241 # we want the longest match
242 self.srctops.sort(reverse=True)
243 self.objroots.sort(reverse=True)
244
245 self.excludes = getv(conf, 'EXCLUDES', [])
246
247 if self.debug:
248 print("host_target=", self.host_target, file=self.debug_out)
249 print("srctops=", self.srctops, file=self.debug_out)
250 print("objroots=", self.objroots, file=self.debug_out)
251 print("excludes=", self.excludes, file=self.debug_out)
252
253 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
254
255 if self.dpdeps and not self.reldir:
256 if self.debug:
257 print("need reldir:", end=' ', file=self.debug_out)
258 if self.curdir:
259 srctop = self.find_top(self.curdir, self.srctops)
260 if srctop:
261 self.reldir = self.curdir.replace(srctop,'')
262 if self.debug:
263 print(self.reldir, file=self.debug_out)
264 if not self.reldir:
265 self.dpdeps = None # we cannot do it?
266
267 self.cwd = os.getcwd() # make sure this is initialized
268 self.last_dir = self.cwd
269
270 if name:
271 self.try_parse()
272
273 def reset(self):
274 """reset state if we are being passed meta files from multiple directories."""
275 self.seen = {}
276 self.obj_deps = []
277 self.src_deps = []
278 self.file_deps = []
279
280 def dirdeps(self, sep='\n'):
281 """return DIRDEPS"""
282 return sep.strip() + sep.join(self.obj_deps)
283
284 def src_dirdeps(self, sep='\n'):
285 """return SRC_DIRDEPS"""
286 return sep.strip() + sep.join(self.src_deps)
287
288 def file_depends(self, out=None):
289 """Append DPDEPS_${file} += ${RELDIR}
290 for each file we saw, to the output file."""
291 if not self.reldir:
292 return None
293 for f in sort_unique(self.file_deps):
294 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
295
296 def seenit(self, dir):
297 """rememer that we have seen dir."""
298 self.seen[dir] = 1
299
300 def add(self, list, data, clue=''):
301 """add data to list if it isn't already there."""
302 if data not in list:
303 list.append(data)
304 if self.debug:
305 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
306
307 def find_top(self, path, list):
308 """the logical tree may be split across multiple trees"""
309 for top in list:
310 if path.startswith(top):
311 if self.debug > 2:
312 print("found in", top, file=self.debug_out)
313 return top
314 return None
315
316 def find_obj(self, objroot, dir, path, input):
317 """return path within objroot, taking care of .dirdep files"""
318 ddep = None
319 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
320 if not ddep and os.path.exists(ddepf):
321 ddep = open(ddepf, 'r').readline().strip('# \n')
322 if self.debug > 1:
323 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
324 if ddep.endswith(self.machine):
325 ddep = ddep[0:-(1+len(self.machine))]
326 elif self.target_spec and ddep.endswith(self.target_spec):
327 ddep = ddep[0:-(1+len(self.target_spec))]
328
329 if not ddep:
330 # no .dirdeps, so remember that we've seen the raw input
331 self.seenit(input)
332 self.seenit(dir)
333 if self.machine == 'none':
334 if dir.startswith(objroot):
335 return dir.replace(objroot,'')
336 return None
337 m = self.dirdep_re.match(dir.replace(objroot,''))
338 if m:
339 ddep = m.group(2)
340 dmachine = m.group(1)
341 if dmachine != self.machine:
342 if not (self.machine == 'host' and
343 dmachine == self.host_target):
344 if self.debug > 2:
345 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
346 ddep += '.' + dmachine
347
348 return ddep
349
350 def try_parse(self, name=None, file=None):
351 """give file and line number causing exception"""
352 try:
353 self.parse(name, file)
354 except:
355 # give a useful clue
356 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
357 raise
358
359 def parse(self, name=None, file=None):
360 """A meta file looks like:
361
362 # Meta data file "path"
363 CMD "command-line"
364 CWD "cwd"
365 TARGET "target"
366 -- command output --
367 -- filemon acquired metadata --
368 # buildmon version 3
369 V 3
370 C "pid" "cwd"
371 E "pid" "path"
372 F "pid" "child"
373 R "pid" "path"
374 W "pid" "path"
375 X "pid" "status"
376 D "pid" "path"
377 L "pid" "src" "target"
378 M "pid" "old" "new"
379 S "pid" "path"
380 # Bye bye
381
382 We go to some effort to avoid processing a dependency more than once.
383 Of the above record types only C,E,F,L,R,V and W are of interest.
384 """
385
386 version = 0 # unknown
387 if name:
388 self.name = name;
389 if file:
390 f = file
391 cwd = self.last_dir = self.cwd
392 else:
393 f = open(self.name, 'r')
394 skip = True
395 pid_cwd = {}
396 pid_last_dir = {}
397 last_pid = 0
398
399 self.line = 0
400 if self.curdir:
401 self.seenit(self.curdir) # we ignore this
402
403 interesting = 'CEFLRV'
404 for line in f:
405 self.line += 1
406 # ignore anything we don't care about
407 if not line[0] in interesting:
408 continue
409 if self.debug > 2:
410 print("input:", line, end=' ', file=self.debug_out)
411 w = line.split()
412
413 if skip:
414 if w[0] == 'V':
415 skip = False
416 version = int(w[1])
417 """
418 if version < 4:
419 # we cannot ignore 'W' records
420 # as they may be 'rw'
421 interesting += 'W'
422 """
423 elif w[0] == 'CWD':
424 self.cwd = cwd = self.last_dir = w[1]
425 self.seenit(cwd) # ignore this
426 if self.debug:
427 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
428 continue
429
430 pid = int(w[1])
431 if pid != last_pid:
432 if last_pid:
433 pid_cwd[last_pid] = cwd
434 pid_last_dir[last_pid] = self.last_dir
435 cwd = getv(pid_cwd, pid, self.cwd)
436 self.last_dir = getv(pid_last_dir, pid, self.cwd)
437 last_pid = pid
438
439 # process operations
440 if w[0] == 'F':
441 npid = int(w[2])
442 pid_cwd[npid] = cwd
443 pid_last_dir[npid] = cwd
444 last_pid = npid
445 continue
446 elif w[0] == 'C':
447 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
448 if cwd.endswith('/.'):
449 cwd = cwd[0:-2]
450 self.last_dir = cwd
451 if self.debug > 1:
452 print("cwd=", cwd, file=self.debug_out)
453 continue
454
455 if w[2] in self.seen:
456 if self.debug > 2:
457 print("seen:", w[2], file=self.debug_out)
458 continue
459 # file operations
460 if w[0] in 'ML':
461 # these are special, tread src as read and
462 # target as write
463 self.parse_path(w[1].strip("'"), cwd, 'R', w)
464 self.parse_path(w[2].strip("'"), cwd, 'W', w)
465 continue
466 elif w[0] in 'ERWS':
467 path = w[2]
468 self.parse_path(path, cwd, w[0], w)
469
470 if not file:
471 f.close()
472
473 def parse_path(self, path, cwd, op=None, w=[]):
474 """look at a path for the op specified"""
475
476 if not op:
477 op = w[0]
478
479 # we are never interested in .dirdep files as dependencies
480 if path.endswith('.dirdep'):
481 return
482 for p in self.excludes:
483 if p and path.startswith(p):
484 if self.debug > 2:
485 print >> self.debug_out, "exclude:", p, path
486 return
487 # we don't want to resolve the last component if it is
488 # a symlink
489 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
490 if not path:
491 return
492 dir,base = os.path.split(path)
493 if dir in self.seen:
494 if self.debug > 2:
495 print("seen:", dir, file=self.debug_out)
496 return
497 # we can have a path in an objdir which is a link
498 # to the src dir, we may need to add dependencies for each
499 rdir = dir
500 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
501 if rdir == dir or rdir.find('./') > 0:
502 rdir = None
503 # now put path back together
504 path = '/'.join([dir,base])
505 if self.debug > 1:
506 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
507 if op in 'RWS':
508 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
509 if self.debug > 1:
510 print("skipping:", path, file=self.debug_out)
511 return
512 if os.path.isdir(path):
513 if op in 'RW':
514 self.last_dir = path;
515 if self.debug > 1:
516 print("ldir=", self.last_dir, file=self.debug_out)
517 return
518
519 if op in 'ERW':
520 # finally, we get down to it
521 if dir == self.cwd or dir == self.curdir:
522 return
523 srctop = self.find_top(path, self.srctops)
524 if srctop:
525 if self.dpdeps:
526 self.add(self.file_deps, path.replace(srctop,''), 'file')
527 self.add(self.src_deps, dir.replace(srctop,''), 'src')
528 self.seenit(w[2])
529 self.seenit(dir)
530 if rdir and not rdir.startswith(srctop):
531 dir = rdir # for below
532 rdir = None
533 else:
534 return
535
536 objroot = None
537 for dir in [dir,rdir]:
538 if not dir:
539 continue
540 objroot = self.find_top(dir, self.objroots)
541 if objroot:
542 break
543 if objroot:
544 ddep = self.find_obj(objroot, dir, path, w[2])
545 if ddep:
546 self.add(self.obj_deps, ddep, 'obj')
547 else:
548 # don't waste time looking again
549 self.seenit(w[2])
550 self.seenit(dir)
551
552
553def main(argv, klass=MetaFile, xopts='', xoptf=None):
554 """Simple driver for class MetaFile.
555
556 Usage:
557 script [options] [key=value ...] "meta" ...
558
559 Options and key=value pairs contribute to the
560 dictionary passed to MetaFile.
561
562 -S "SRCTOP"
563 add "SRCTOP" to the "SRCTOPS" list.
564
565 -C "CURDIR"
566
567 -O "OBJROOT"
568 add "OBJROOT" to the "OBJROOTS" list.
569
570 -m "MACHINE"
571
572 -a "MACHINE_ARCH"
573
574 -H "HOST_TARGET"
575
576 -D "DPDEPS"
577
578 -d bumps debug level
579
580 """
581 import getopt
582
583 # import Psyco if we can
584 # it can speed things up quite a bit
585 have_psyco = 0
586 try:
587 import psyco
588 psyco.full()
589 have_psyco = 1
590 except:
591 pass
592
593 conf = {
594 'SRCTOPS': [],
595 'OBJROOTS': [],
596 'EXCLUDES': [],
597 }
598
599 try:
600 machine = os.environ['MACHINE']
601 if machine:
602 conf['MACHINE'] = machine
603 machine_arch = os.environ['MACHINE_ARCH']
604 if machine_arch:
605 conf['MACHINE_ARCH'] = machine_arch
606 srctop = os.environ['SB_SRC']
607 if srctop:
608 conf['SRCTOPS'].append(srctop)
609 objroot = os.environ['SB_OBJROOT']
610 if objroot:
611 conf['OBJROOTS'].append(objroot)
612 except:
613 pass
614
615 debug = 0
616 output = True
617
618 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
619 for o, a in opts:
620 if o == '-a':
621 conf['MACHINE_ARCH'] = a
622 elif o == '-d':
623 debug += 1
624 elif o == '-q':
625 output = False
626 elif o == '-H':
627 conf['HOST_TARGET'] = a
628 elif o == '-S':
629 if a not in conf['SRCTOPS']:
630 conf['SRCTOPS'].append(a)
631 elif o == '-C':
632 conf['CURDIR'] = a
633 elif o == '-O':
634 if a not in conf['OBJROOTS']:
635 conf['OBJROOTS'].append(a)
636 elif o == '-R':
637 conf['RELDIR'] = a
638 elif o == '-D':
639 conf['DPDEPS'] = a
640 elif o == '-m':
641 conf['MACHINE'] = a
642 elif o == '-T':
643 conf['TARGET_SPEC'] = a
644 elif o == '-X':
645 if a not in conf['EXCLUDES']:
646 conf['EXCLUDES'].append(a)
647 elif xoptf:
648 xoptf(o, a, conf)
649
650 conf['debug'] = debug
651
652 # get any var=val assignments
653 eaten = []
654 for a in args:
655 if a.find('=') > 0:
656 k,v = a.split('=')
657 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
658 if k == 'SRCTOP':
659 k = 'SRCTOPS'
660 elif k == 'OBJROOT':
661 k = 'OBJROOTS'
662 if v not in conf[k]:
663 conf[k].append(v)
664 else:
665 conf[k] = v
666 eaten.append(a)
667 continue
668 break
669
670 for a in eaten:
671 args.remove(a)
672
673 debug_out = getv(conf, 'debug_out', sys.stderr)
674
675 if debug:
676 print("config:", file=debug_out)
677 print("psyco=", have_psyco, file=debug_out)
678 for k,v in list(conf.items()):
679 print("%s=%s" % (k,v), file=debug_out)
680
681 m = None
682 for a in args:
683 if a.endswith('.meta'):
684 if not os.path.exists(a):
685 continue
686 m = klass(a, conf)
687 elif a.startswith('@'):
688 # there can actually multiple files per line
689 for line in open(a[1:]):
690 for f in line.strip().split():
691 if not os.path.exists(f):
692 continue
693 m = klass(f, conf)
694
695 if output and m:
696 print(m.dirdeps())
697
698 print(m.src_dirdeps('\nsrc:'))
699
700 dpdeps = getv(conf, 'DPDEPS')
701 if dpdeps:
702 m.file_depends(open(dpdeps, 'wb'))
703
704 return m
705
706if __name__ == '__main__':
707 try:
708 main(sys.argv)
709 except:
710 # yes, this goes to stdout
711 print("ERROR: ", sys.exc_info()[1])
712 raise
713