1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4#
5# Copyright 2017, Data61
6# Commonwealth Scientific and Industrial Research Organisation (CSIRO)
7# ABN 41 687 119 230.
8#
9# This software may be distributed and modified according to the terms of
10# the BSD 2-Clause license. Note that NO WARRANTY is provided.
11# See "LICENSE_BSD2.txt" for details.
12#
13# @TAG(DATA61_BSD)
14#
15
16from __future__ import absolute_import, division, print_function, \
17    unicode_literals
18from camkes.internal.seven import cmp, filter, map, zip
19
20import plyplus, re
21
22# The directives that CPP emits to indicate a change of source file and/or line
23# number.
24LINE_DIRECTIVE = re.compile(
25    r'\s*#\s*(?:line)?\s*(?P<lineno>\d+)(?:\s+"(?P<filename>[^"]*)")?.*$',
26    flags=re.UNICODE)
27
28class SourceLocation(object):
29    '''
30    The location of a parsed term in its original source file.
31
32    This class is primarily useful for showing users useful error messages and
33    for debugging. Note that "original source" above means the post-processed
34    source if we are using CPP. This class looks more complicated than you may
35    have expected because it parses CPP line directives, that are *not*
36    interpreted by the stage 1 parser, in order to give the user source
37    locations that match their own interpretation.
38    '''
39
40    def __init__(self, filename, term, full_source):
41        assert plyplus.is_stree(term) or isinstance(term, plyplus.ParseError)
42        self._filename = filename
43        self._lineno = None
44        self._min_col = None
45        self._max_col = None
46        self.term = term
47        self.full_source = full_source
48        # We defer narrowing the location because this can be expensive and
49        # typically goes unused unless we hit a parse error.
50        self.precise = False
51
52    def _locate(self):
53        '''
54        Find the exact location we are referencing.
55        '''
56
57        if plyplus.is_stree(self.term):
58            # First, force plyplus to calculate what it thinks the term's
59            # location is.
60            self.term.calc_position()
61
62            plyplus_line = self.term.min_line
63            self._min_col = self.term.min_col
64            self._max_col = self.term.max_col
65
66        else:
67            assert isinstance(self.term, plyplus.ParseError)
68
69            # Try to extract the line number from a plyplus error.
70            m = re.search(r'^Syntax error in input at \'(?P<token>[^\']*)\' '
71                r'\(type [^\)]*\) line\s+(?P<line>\d+)'
72                r'(?:\s+col\s+(?P<col>\d+))?$',
73                str(self.term), flags=re.MULTILINE)
74            if m is not None:
75                plyplus_line = int(m.group('line'))
76                if m.group('col') is not None:
77                    self._min_col = int(m.group('col'))
78                    if len(m.group('token')) > 0:
79                        self._max_col = self._min_col + len(m.group('token')) \
80                            - 1
81            else:
82                plyplus_line = None
83
84        if plyplus_line is None:
85            # We have no opportunity to find a more precise location.
86            self.precise = True
87            return
88
89        if self.full_source is None:
90            self._lineno = plyplus_line
91            self.precise = True
92            return
93
94        # Now parse the original source only looking for CPP line directives,
95        # to adjust our understanding of the location if necessary.
96        current_filename = self._filename
97        current_lineno = 1
98        for line_index, line in enumerate(self.full_source.split('\n')):
99            if line_index + 1 == plyplus_line:
100                # We've reached the line this term is on.
101                self._filename = current_filename
102                self._lineno = current_lineno
103                self.precise = True
104                return
105            m = LINE_DIRECTIVE.match(line)
106            if m is not None:
107                # The current line is a line directive.
108                if m.group('filename') is not None:
109                    current_filename = m.group('filename')
110                current_lineno = int(m.group('lineno'))
111            else:
112                # Standard (CAmkES) line.
113                current_lineno += 1
114        else:
115            assert False, \
116                'term line number points outside its containing source ' \
117                '(plyplus bug?)'
118
119    @property
120    def filename(self):
121        if not self.precise:
122            self._locate()
123        return self._filename
124
125    @property
126    def lineno(self):
127        if not self.precise:
128            self._locate()
129        return self._lineno
130
131    @property
132    def min_col(self):
133        if not self.precise:
134            self._locate()
135        return self._min_col
136
137    @property
138    def max_col(self):
139        if not self.precise:
140            self._locate()
141        return self._max_col
142