1########################################################################## 2# Copyright (c) 2013, 2014, University of Washington. 3# All rights reserved. 4# 5# This file is distributed under the terms in the attached LICENSE file. 6# If you do not find this file, copies can be found by writing to: 7# ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group. 8########################################################################## 9 10import sys, os, signal, time, getpass, subprocess, socket, pty 11import debug, machines, uw_machinedata 12from machines import Machine, MachineLockedError, MachineFactory 13 14TFTP_PATH='/var/lib/tftpboot' 15TOOLS_PATH='/usr/local/bin' 16RACKBOOT=os.path.join(TOOLS_PATH, 'rackboot.sh') 17RACKPOWER=os.path.join(TOOLS_PATH, 'rackpower') 18 19class UWMachine(Machine): 20 _uw_machines = uw_machinedata.machines 21 22 host2mgmt = { 23 'bigfish.cs.washington.edu': 'bigfish-e1k1.cs.washington.edu', 24 'swingout1.cs.washington.edu': 'swingout1-brcm1.cs.washington.edu', 25 'swingout5.cs.washington.edu': 'swingout5-brcm1.cs.washington.edu' 26 } 27 28 def __init__(self, options): 29 super(UWMachine, self).__init__(options) 30 self.lockprocess = None 31 self.masterfd = None 32 33 def get_bootarch(self): 34 b = self._uw_machines[self.name]['bootarch'] 35 assert(b in self.get_buildarchs()) 36 return b 37 38 def get_machine_name(self): 39 return self._uw_machines[self.name]['machine_name'] 40 41 def get_buildarchs(self): 42 return self._uw_machines[self.name]['buildarchs'] 43 44 def get_ncores(self): 45 return self._uw_machines[self.name]['ncores'] 46 47 def get_cores_per_socket(self): 48 return self._uw_machines[self.name]['cores_per_socket'] 49 50 def get_tickrate(self): 51 return self._uw_machines[self.name]['tickrate'] 52 53 def get_perfcount_type(self): 54 return self._uw_machines[self.name]['perfcount_type'] 55 56 def get_kernel_args(self): 57 return self._uw_machines[self.name].get('kernel_args') 58 59 def get_pci_args(self): 60 return self._uw_machines[self.name].get('pci_args') 61 62 def get_boot_timeout(self): 63 return self._uw_machines[self.name].get('boot_timeout') 64 65 def get_hostname(self): 66 return self.get_machine_name() + '.cs.washington.edu' 67 68 def get_ip(self): 69 return socket.gethostbyname(self.host2mgmt[self.get_hostname()]) 70 71 def get_tftp_dir(self): 72 user = getpass.getuser() 73 return os.path.join(TFTP_PATH, user, self.name + "_harness") 74 75 def _write_menu_lst(self, data, path): 76 debug.verbose('writing %s' % path) 77 debug.debug(data) 78 f = open(path, 'w') 79 f.write(data) 80 f.close() 81 82 def _set_menu_lst(self, relpath): 83 ip_menu_name = os.path.join(TFTP_PATH, "menu.lst." + self.get_ip()) 84 debug.verbose('relinking %s to %s' % (ip_menu_name, relpath)) 85 os.remove(ip_menu_name) 86 os.symlink(relpath, ip_menu_name) 87 88 def set_bootmodules(self, modules): 89 fullpath = os.path.join(self.get_tftp_dir(), 'menu.lst') 90 relpath = os.path.relpath(fullpath, TFTP_PATH) 91 tftppath = '/' + os.path.relpath(self.get_tftp_dir(), TFTP_PATH) 92 self._write_menu_lst(modules.get_menu_data(tftppath), fullpath) 93 self._set_menu_lst(relpath) 94 95 def lock(self): 96 """Use conserver to lock the machine.""" 97 98 # find out current status of console 99 debug.verbose('executing "console -i %s" to check state' % 100 self.get_machine_name()) 101 proc = subprocess.Popen(["console", "-i", self.get_machine_name()], 102 stdout=subprocess.PIPE) 103 line = proc.communicate()[0] 104 assert(proc.returncode == 0) 105 106 # check that nobody else has it open for writing 107 myuser = getpass.getuser() 108 parts = line.strip().split(':') 109 conname, child, contype, details, users, state = parts[:6] 110 if users: 111 for userinfo in users.split(','): 112 mode, username, host, port = userinfo.split('@')[:4] 113 if 'w' in mode and username != myuser: 114 raise MachineLockedError # Machine is not free 115 116 # run a console in the background to 'hold' the lock and read output 117 debug.verbose('starting "console %s"' % self.get_machine_name()) 118 # run on a PTY to work around terminal mangling code in console 119 (self.masterfd, slavefd) = pty.openpty() 120 self.lockprocess = subprocess.Popen(["console", self.get_machine_name()], 121 close_fds=True, 122 stdout=slavefd, stdin=slavefd) 123 os.close(slavefd) 124 # XXX: open in binary mode with no buffering 125 # otherwise select.select() may block when there is data in the buffer 126 self.console_out = os.fdopen(self.masterfd, 'rb', 0) 127 128 def unlock(self): 129 if self.lockprocess is None: 130 return # noop 131 debug.verbose('quitting console process (%d)' % self.lockprocess.pid) 132 # os.kill(self.lockprocess.pid, signal.SIGTERM) 133 os.write(self.masterfd, "\x05c.") 134 self.lockprocess.wait() 135 self.lockprocess = None 136 self.masterfd = None 137 138 def __rackboot(self, args): 139 debug.checkcmd([RACKBOOT] + args + [self.get_machine_name()]) 140 141 def setup(self): 142 self.__rackboot(["-b", "-n"]) 143 144 def __rackpower(self, arg): 145 retries = 3 146 failed = False 147 while retries > 0: 148 try: 149 debug.checkcmd([RACKPOWER, arg, self.get_machine_name()]) 150 except subprocess.CalledProcessError: 151 debug.warning("rackpower %s %s failed" % 152 (arg, self.get_machine_name())) 153 failed = True 154 if retries > 0: 155 debug.verbose("retrying...") 156 retries -= 1 157 if not failed: 158 break 159 160 def reboot(self): 161 self.__rackpower('-r') 162 163 def shutdown(self): 164 self.__rackpower('-d') 165 166 def get_output(self): 167 return self.console_out 168 169 170for n in sorted(UWMachine._uw_machines.keys()): 171 class TmpMachine(UWMachine): 172 name = n 173 MachineFactory.addMachine(n, TmpMachine, **UWMachine._uw_machines[n]) 174