1/* 2 Copyright (c) 2014 Intel Corporation. All Rights Reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 * Neither the name of Intel Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28*/ 29 30 31#ifndef OFFLOAD_ENGINE_H_INCLUDED 32#define OFFLOAD_ENGINE_H_INCLUDED 33 34#include <limits.h> 35 36#include <list> 37#include <set> 38#include <map> 39#include "offload_common.h" 40#include "coi/coi_client.h" 41 42// Address range 43class MemRange { 44public: 45 MemRange() : m_start(0), m_length(0) {} 46 MemRange(const void *addr, uint64_t len) : m_start(addr), m_length(len) {} 47 48 const void* start() const { 49 return m_start; 50 } 51 52 const void* end() const { 53 return static_cast<const char*>(m_start) + m_length; 54 } 55 56 uint64_t length() const { 57 return m_length; 58 } 59 60 // returns true if given range overlaps with another one 61 bool overlaps(const MemRange &o) const { 62 // Two address ranges A[start, end) and B[start,end) overlap 63 // if A.start < B.end and A.end > B.start. 64 return start() < o.end() && end() > o.start(); 65 } 66 67 // returns true if given range contains the other range 68 bool contains(const MemRange &o) const { 69 return start() <= o.start() && o.end() <= end(); 70 } 71 72private: 73 const void* m_start; 74 uint64_t m_length; 75}; 76 77// Data associated with a pointer variable 78class PtrData { 79public: 80 PtrData(const void *addr, uint64_t len) : 81 cpu_addr(addr, len), cpu_buf(0), 82 mic_addr(0), alloc_disp(0), mic_buf(0), mic_offset(0), 83 ref_count(0), is_static(false) 84 {} 85 86 // 87 // Copy constructor 88 // 89 PtrData(const PtrData& ptr): 90 cpu_addr(ptr.cpu_addr), cpu_buf(ptr.cpu_buf), 91 mic_addr(ptr.mic_addr), alloc_disp(ptr.alloc_disp), 92 mic_buf(ptr.mic_buf), mic_offset(ptr.mic_offset), 93 ref_count(ptr.ref_count), is_static(ptr.is_static) 94 {} 95 96 bool operator<(const PtrData &o) const { 97 // Variables are sorted by the CPU start address. 98 // Overlapping memory ranges are considered equal. 99 return (cpu_addr.start() < o.cpu_addr.start()) && 100 !cpu_addr.overlaps(o.cpu_addr); 101 } 102 103 long add_reference() { 104 if (is_static) { 105 return LONG_MAX; 106 } 107#ifndef TARGET_WINNT 108 return __sync_fetch_and_add(&ref_count, 1); 109#else // TARGET_WINNT 110 return _InterlockedIncrement(&ref_count) - 1; 111#endif // TARGET_WINNT 112 } 113 114 long remove_reference() { 115 if (is_static) { 116 return LONG_MAX; 117 } 118#ifndef TARGET_WINNT 119 return __sync_sub_and_fetch(&ref_count, 1); 120#else // TARGET_WINNT 121 return _InterlockedDecrement(&ref_count); 122#endif // TARGET_WINNT 123 } 124 125 long get_reference() const { 126 if (is_static) { 127 return LONG_MAX; 128 } 129 return ref_count; 130 } 131 132public: 133 // CPU address range 134 const MemRange cpu_addr; 135 136 // CPU and MIC buffers 137 COIBUFFER cpu_buf; 138 COIBUFFER mic_buf; 139 140 // placeholder for buffer address on mic 141 uint64_t mic_addr; 142 143 uint64_t alloc_disp; 144 145 // additional offset to pointer data on MIC for improving bandwidth for 146 // data which is not 4K aligned 147 uint32_t mic_offset; 148 149 // if true buffers are created from static memory 150 bool is_static; 151 mutex_t alloc_ptr_data_lock; 152 153private: 154 // reference count for the entry 155 long ref_count; 156}; 157 158typedef std::list<PtrData*> PtrDataList; 159 160// Data associated with automatic variable 161class AutoData { 162public: 163 AutoData(const void *addr, uint64_t len) : 164 cpu_addr(addr, len), ref_count(0) 165 {} 166 167 bool operator<(const AutoData &o) const { 168 // Variables are sorted by the CPU start address. 169 // Overlapping memory ranges are considered equal. 170 return (cpu_addr.start() < o.cpu_addr.start()) && 171 !cpu_addr.overlaps(o.cpu_addr); 172 } 173 174 long add_reference() { 175#ifndef TARGET_WINNT 176 return __sync_fetch_and_add(&ref_count, 1); 177#else // TARGET_WINNT 178 return _InterlockedIncrement(&ref_count) - 1; 179#endif // TARGET_WINNT 180 } 181 182 long remove_reference() { 183#ifndef TARGET_WINNT 184 return __sync_sub_and_fetch(&ref_count, 1); 185#else // TARGET_WINNT 186 return _InterlockedDecrement(&ref_count); 187#endif // TARGET_WINNT 188 } 189 190 long get_reference() const { 191 return ref_count; 192 } 193 194public: 195 // CPU address range 196 const MemRange cpu_addr; 197 198private: 199 // reference count for the entry 200 long ref_count; 201}; 202 203// Set of autimatic variables 204typedef std::set<AutoData> AutoSet; 205 206// Target image data 207struct TargetImage 208{ 209 TargetImage(const char *_name, const void *_data, uint64_t _size, 210 const char *_origin, uint64_t _offset) : 211 name(_name), data(_data), size(_size), 212 origin(_origin), offset(_offset) 213 {} 214 215 // library name 216 const char* name; 217 218 // contents and size 219 const void* data; 220 uint64_t size; 221 222 // file of origin and offset within that file 223 const char* origin; 224 uint64_t offset; 225}; 226 227typedef std::list<TargetImage> TargetImageList; 228 229// Data associated with persistent auto objects 230struct PersistData 231{ 232 PersistData(const void *addr, uint64_t routine_num, uint64_t size) : 233 stack_cpu_addr(addr), routine_id(routine_num) 234 { 235 stack_ptr_data = new PtrData(0, size); 236 } 237 // 1-st key value - begining of the stack at CPU 238 const void * stack_cpu_addr; 239 // 2-nd key value - identifier of routine invocation at CPU 240 uint64_t routine_id; 241 // corresponded PtrData; only stack_ptr_data->mic_buf is used 242 PtrData * stack_ptr_data; 243 // used to get offset of the variable in stack buffer 244 char * cpu_stack_addr; 245}; 246 247typedef std::list<PersistData> PersistDataList; 248 249// class representing a single engine 250struct Engine { 251 friend void __offload_init_library_once(void); 252 friend void __offload_fini_library(void); 253 254#define check_result(res, tag, ...) \ 255 { \ 256 if (res == COI_PROCESS_DIED) { \ 257 fini_process(true); \ 258 exit(1); \ 259 } \ 260 if (res != COI_SUCCESS) { \ 261 __liboffload_error_support(tag, __VA_ARGS__); \ 262 exit(1); \ 263 } \ 264 } 265 266 int get_logical_index() const { 267 return m_index; 268 } 269 270 int get_physical_index() const { 271 return m_physical_index; 272 } 273 274 const COIPROCESS& get_process() const { 275 return m_process; 276 } 277 278 // initialize device 279 void init(void); 280 281 // add new library 282 void add_lib(const TargetImage &lib) 283 { 284 m_lock.lock(); 285 m_ready = false; 286 m_images.push_back(lib); 287 m_lock.unlock(); 288 } 289 290 COIRESULT compute( 291 const std::list<COIBUFFER> &buffers, 292 const void* data, 293 uint16_t data_size, 294 void* ret, 295 uint16_t ret_size, 296 uint32_t num_deps, 297 const COIEVENT* deps, 298 COIEVENT* event 299 ); 300 301#ifdef MYO_SUPPORT 302 // temporary workaround for blocking behavior for myoiLibInit/Fini calls 303 void init_myo(COIEVENT *event) { 304 COIRESULT res; 305 res = COI::PipelineRunFunction(get_pipeline(), 306 m_funcs[c_func_myo_init], 307 0, 0, 0, 0, 0, 0, 0, 0, 0, 308 event); 309 check_result(res, c_pipeline_run_func, m_index, res); 310 } 311 312 void fini_myo(COIEVENT *event) { 313 COIRESULT res; 314 res = COI::PipelineRunFunction(get_pipeline(), 315 m_funcs[c_func_myo_fini], 316 0, 0, 0, 0, 0, 0, 0, 0, 0, 317 event); 318 check_result(res, c_pipeline_run_func, m_index, res); 319 } 320#endif // MYO_SUPPORT 321 322 // 323 // Memory association table 324 // 325 PtrData* find_ptr_data(const void *ptr) { 326 m_ptr_lock.lock(); 327 PtrSet::iterator res = m_ptr_set.find(PtrData(ptr, 0)); 328 m_ptr_lock.unlock(); 329 if (res == m_ptr_set.end()) { 330 return 0; 331 } 332 return const_cast<PtrData*>(res.operator->()); 333 } 334 335 PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) { 336 m_ptr_lock.lock(); 337 std::pair<PtrSet::iterator, bool> res = 338 m_ptr_set.insert(PtrData(ptr, len)); 339 PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->()); 340 m_ptr_lock.unlock(); 341 342 is_new = res.second; 343 if (is_new) { 344 // It's necessary to lock as soon as possible. 345 // unlock must be done at call site of insert_ptr_data at 346 // branch for is_new 347 ptr_data->alloc_ptr_data_lock.lock(); 348 } 349 return ptr_data; 350 } 351 352 void remove_ptr_data(const void *ptr) { 353 m_ptr_lock.lock(); 354 m_ptr_set.erase(PtrData(ptr, 0)); 355 m_ptr_lock.unlock(); 356 } 357 358 // 359 // Automatic variables 360 // 361 AutoData* find_auto_data(const void *ptr) { 362 AutoSet &auto_vars = get_auto_vars(); 363 AutoSet::iterator res = auto_vars.find(AutoData(ptr, 0)); 364 if (res == auto_vars.end()) { 365 return 0; 366 } 367 return const_cast<AutoData*>(res.operator->()); 368 } 369 370 AutoData* insert_auto_data(const void *ptr, uint64_t len) { 371 AutoSet &auto_vars = get_auto_vars(); 372 std::pair<AutoSet::iterator, bool> res = 373 auto_vars.insert(AutoData(ptr, len)); 374 return const_cast<AutoData*>(res.first.operator->()); 375 } 376 377 void remove_auto_data(const void *ptr) { 378 get_auto_vars().erase(AutoData(ptr, 0)); 379 } 380 381 // 382 // Signals 383 // 384 void add_signal(const void *signal, OffloadDescriptor *desc) { 385 m_signal_lock.lock(); 386 m_signal_map[signal] = desc; 387 m_signal_lock.unlock(); 388 } 389 390 OffloadDescriptor* find_signal(const void *signal, bool remove) { 391 OffloadDescriptor *desc = 0; 392 393 m_signal_lock.lock(); 394 { 395 SignalMap::iterator it = m_signal_map.find(signal); 396 if (it != m_signal_map.end()) { 397 desc = it->second; 398 if (remove) { 399 m_signal_map.erase(it); 400 } 401 } 402 } 403 m_signal_lock.unlock(); 404 405 return desc; 406 } 407 408 // stop device process 409 void fini_process(bool verbose); 410 411 // list of stacks active at the engine 412 PersistDataList m_persist_list; 413 414private: 415 Engine() : m_index(-1), m_physical_index(-1), m_process(0), m_ready(false), 416 m_proc_number(0) 417 {} 418 419 ~Engine() { 420 if (m_process != 0) { 421 fini_process(false); 422 } 423 } 424 425 // set indexes 426 void set_indexes(int logical_index, int physical_index) { 427 m_index = logical_index; 428 m_physical_index = physical_index; 429 } 430 431 // start process on device 432 void init_process(); 433 434 void load_libraries(void); 435 void init_ptr_data(void); 436 437 // performs library intialization on the device side 438 pid_t init_device(void); 439 440private: 441 // get pipeline associated with a calling thread 442 COIPIPELINE get_pipeline(void); 443 444 // get automatic vars set associated with the calling thread 445 AutoSet& get_auto_vars(void); 446 447 // destructor for thread data 448 static void destroy_thread_data(void *data); 449 450private: 451 typedef std::set<PtrData> PtrSet; 452 typedef std::map<const void*, OffloadDescriptor*> SignalMap; 453 454 // device indexes 455 int m_index; 456 int m_physical_index; 457 458 // number of COI pipes created for the engine 459 long m_proc_number; 460 461 // process handle 462 COIPROCESS m_process; 463 464 // If false, device either has not been initialized or new libraries 465 // have been added. 466 bool m_ready; 467 mutex_t m_lock; 468 469 // List of libraries to be loaded 470 TargetImageList m_images; 471 472 // var table 473 PtrSet m_ptr_set; 474 mutex_t m_ptr_lock; 475 476 // signals 477 SignalMap m_signal_map; 478 mutex_t m_signal_lock; 479 480 // constants for accessing device function handles 481 enum { 482 c_func_compute = 0, 483#ifdef MYO_SUPPORT 484 c_func_myo_init, 485 c_func_myo_fini, 486#endif // MYO_SUPPORT 487 c_func_init, 488 c_func_var_table_size, 489 c_func_var_table_copy, 490 c_funcs_total 491 }; 492 static const char* m_func_names[c_funcs_total]; 493 494 // device function handles 495 COIFUNCTION m_funcs[c_funcs_total]; 496 497 // int -> name mapping for device signals 498 static const int c_signal_max = 32; 499 static const char* c_signal_names[c_signal_max]; 500}; 501 502#endif // OFFLOAD_ENGINE_H_INCLUDED 503