1##########################################################################
2# Copyright (c) 2013, 2014, University of Washington.
3# All rights reserved.
4#
5# This file is distributed under the terms in the attached LICENSE file.
6# If you do not find this file, copies can be found by writing to:
7# ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
8##########################################################################
9
10import sys, os, signal, time, getpass, subprocess, socket, pty
11import debug, machines, uw_machinedata
12from machines import Machine, MachineLockedError, MachineFactory
13
14TFTP_PATH='/var/lib/tftpboot'
15TOOLS_PATH='/usr/local/bin'
16RACKBOOT=os.path.join(TOOLS_PATH, 'rackboot.sh')
17RACKPOWER=os.path.join(TOOLS_PATH, 'rackpower')
18
19class UWMachine(Machine):
20    _uw_machines = uw_machinedata.machines
21
22    host2mgmt = {
23        'bigfish.cs.washington.edu': 'bigfish-e1k1.cs.washington.edu',
24        'swingout1.cs.washington.edu': 'swingout1-brcm1.cs.washington.edu',
25        'swingout5.cs.washington.edu': 'swingout5-brcm1.cs.washington.edu'
26        }
27
28    def __init__(self, options):
29        super(UWMachine, self).__init__(options)
30        self.lockprocess = None
31        self.masterfd = None
32
33    def get_bootarch(self):
34        b = self._uw_machines[self.name]['bootarch']
35        assert(b in self.get_buildarchs())
36        return b
37
38    def get_machine_name(self):
39        return self._uw_machines[self.name]['machine_name']
40
41    def get_buildarchs(self):
42        return self._uw_machines[self.name]['buildarchs']
43
44    def get_ncores(self):
45        return self._uw_machines[self.name]['ncores']
46
47    def get_cores_per_socket(self):
48        return self._uw_machines[self.name]['cores_per_socket']
49
50    def get_tickrate(self):
51        return self._uw_machines[self.name]['tickrate']
52
53    def get_perfcount_type(self):
54        return self._uw_machines[self.name]['perfcount_type']
55
56    def get_kernel_args(self):
57        return self._uw_machines[self.name].get('kernel_args')
58
59    def get_pci_args(self):
60        return self._uw_machines[self.name].get('pci_args')
61
62    def get_boot_timeout(self):
63        return self._uw_machines[self.name].get('boot_timeout')
64
65    def get_hostname(self):
66        return self.get_machine_name() + '.cs.washington.edu'
67
68    def get_ip(self):
69        return socket.gethostbyname(self.host2mgmt[self.get_hostname()])
70
71    def get_tftp_dir(self):
72        user = getpass.getuser()
73        return os.path.join(TFTP_PATH, user, self.name + "_harness")
74
75    def _write_menu_lst(self, data, path):
76        debug.verbose('writing %s' % path)
77        debug.debug(data)
78        f = open(path, 'w')
79        f.write(data)
80        f.close()
81
82    def _set_menu_lst(self, relpath):
83        ip_menu_name = os.path.join(TFTP_PATH, "menu.lst." + self.get_ip())
84        debug.verbose('relinking %s to %s' % (ip_menu_name, relpath))
85        os.remove(ip_menu_name)
86        os.symlink(relpath, ip_menu_name)
87
88    def set_bootmodules(self, modules):
89        fullpath = os.path.join(self.get_tftp_dir(), 'menu.lst')
90        relpath = os.path.relpath(fullpath, TFTP_PATH)
91        tftppath = '/' + os.path.relpath(self.get_tftp_dir(), TFTP_PATH)
92        self._write_menu_lst(modules.get_menu_data(tftppath), fullpath)
93        self._set_menu_lst(relpath)
94
95    def lock(self):
96        """Use conserver to lock the machine."""
97
98        # find out current status of console
99        debug.verbose('executing "console -i %s" to check state' %
100                      self.get_machine_name())
101        proc = subprocess.Popen(["console", "-i", self.get_machine_name()],
102                                stdout=subprocess.PIPE)
103        line = proc.communicate()[0]
104        assert(proc.returncode == 0)
105
106        # check that nobody else has it open for writing
107        myuser = getpass.getuser()
108        parts = line.strip().split(':')
109        conname, child, contype, details, users, state = parts[:6]
110        if users:
111            for userinfo in users.split(','):
112                mode, username, host, port = userinfo.split('@')[:4]
113                if 'w' in mode and username != myuser:
114                    raise MachineLockedError # Machine is not free
115
116        # run a console in the background to 'hold' the lock and read output
117        debug.verbose('starting "console %s"' % self.get_machine_name())
118        # run on a PTY to work around terminal mangling code in console
119        (self.masterfd, slavefd) = pty.openpty()
120        self.lockprocess = subprocess.Popen(["console", self.get_machine_name()],
121                                            close_fds=True,
122                                            stdout=slavefd, stdin=slavefd)
123        os.close(slavefd)
124        # XXX: open in binary mode with no buffering
125        # otherwise select.select() may block when there is data in the buffer
126        self.console_out = os.fdopen(self.masterfd, 'rb', 0)
127
128    def unlock(self):
129        if self.lockprocess is None:
130            return # noop
131        debug.verbose('quitting console process (%d)' % self.lockprocess.pid)
132        # os.kill(self.lockprocess.pid, signal.SIGTERM)
133        os.write(self.masterfd, "\x05c.")
134        self.lockprocess.wait()
135        self.lockprocess = None
136        self.masterfd = None
137
138    def __rackboot(self, args):
139        debug.checkcmd([RACKBOOT] + args + [self.get_machine_name()])
140
141    def setup(self):
142        self.__rackboot(["-b", "-n"])
143
144    def __rackpower(self, arg):
145        retries = 3
146        failed = False
147        while retries > 0:
148            try:
149                debug.checkcmd([RACKPOWER, arg, self.get_machine_name()])
150            except subprocess.CalledProcessError:
151                debug.warning("rackpower %s %s failed" %
152                        (arg, self.get_machine_name()))
153                failed = True
154                if retries > 0:
155                    debug.verbose("retrying...")
156                    retries -= 1
157            if not failed:
158                break
159
160    def reboot(self):
161        self.__rackpower('-r')
162
163    def shutdown(self):
164        self.__rackpower('-d')
165
166    def get_output(self):
167        return self.console_out
168
169
170for n in sorted(UWMachine._uw_machines.keys()):
171    class TmpMachine(UWMachine):
172        name = n
173    MachineFactory.addMachine(n, TmpMachine, **UWMachine._uw_machines[n])
174