1/*
2    Copyright (c) 2014 Intel Corporation.  All Rights Reserved.
3
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7
8      * Redistributions of source code must retain the above copyright
9        notice, this list of conditions and the following disclaimer.
10      * Redistributions in binary form must reproduce the above copyright
11        notice, this list of conditions and the following disclaimer in the
12        documentation and/or other materials provided with the distribution.
13      * Neither the name of Intel Corporation nor the names of its
14        contributors may be used to endorse or promote products derived
15        from this software without specific prior written permission.
16
17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*/
29
30
31#include "offload_target.h"
32#include <stdlib.h>
33#include <unistd.h>
34#ifdef SEP_SUPPORT
35#include <fcntl.h>
36#include <sys/ioctl.h>
37#endif // SEP_SUPPORT
38#include <omp.h>
39#include <map>
40
41// typedef offload_func_with_parms.
42// Pointer to function that represents an offloaded entry point.
43// The parameters are a temporary fix for parameters on the stack.
44typedef void (*offload_func_with_parms)(void *);
45
46// Target console and file logging
47const char *prefix;
48int console_enabled = 0;
49int offload_report_level = 0;
50
51// Trace information
52static const char* vardesc_direction_as_string[] = {
53    "NOCOPY",
54    "IN",
55    "OUT",
56    "INOUT"
57};
58static const char* vardesc_type_as_string[] = {
59    "unknown",
60    "data",
61    "data_ptr",
62    "func_ptr",
63    "void_ptr",
64    "string_ptr",
65    "dv",
66    "dv_data",
67    "dv_data_slice",
68    "dv_ptr",
69    "dv_ptr_data",
70    "dv_ptr_data_slice",
71    "cean_var",
72    "cean_var_ptr",
73    "c_data_ptr_array"
74};
75
76int mic_index = -1;
77int mic_engines_total = -1;
78uint64_t mic_frequency = 0;
79int offload_number = 0;
80static std::map<void*, RefInfo*> ref_data;
81static mutex_t add_ref_lock;
82
83#ifdef SEP_SUPPORT
84static const char*  sep_monitor_env = "SEP_MONITOR";
85static bool         sep_monitor = false;
86static const char*  sep_device_env = "SEP_DEVICE";
87static const char*  sep_device =  "/dev/sep3.8/c";
88static int          sep_counter = 0;
89
90#define SEP_API_IOC_MAGIC   99
91#define SEP_IOCTL_PAUSE     _IO (SEP_API_IOC_MAGIC, 31)
92#define SEP_IOCTL_RESUME    _IO (SEP_API_IOC_MAGIC, 32)
93
94static void add_ref_count(void * buf, bool created)
95{
96    mutex_locker_t locker(add_ref_lock);
97    RefInfo * info = ref_data[buf];
98
99    if (info) {
100        info->count++;
101    }
102    else {
103        info = new RefInfo((int)created,(long)1);
104    }
105    info->is_added |= created;
106    ref_data[buf] = info;
107}
108
109static void BufReleaseRef(void * buf)
110{
111    mutex_locker_t locker(add_ref_lock);
112    RefInfo * info = ref_data[buf];
113
114    if (info) {
115        --info->count;
116        if (info->count == 0 && info->is_added) {
117            BufferReleaseRef(buf);
118            info->is_added = 0;
119        }
120    }
121}
122
123static int VTPauseSampling(void)
124{
125    int ret = -1;
126    int handle = open(sep_device, O_RDWR);
127    if (handle > 0) {
128        ret = ioctl(handle, SEP_IOCTL_PAUSE);
129        close(handle);
130    }
131    return ret;
132}
133
134static int VTResumeSampling(void)
135{
136    int ret = -1;
137    int handle = open(sep_device, O_RDWR);
138    if (handle > 0) {
139        ret = ioctl(handle, SEP_IOCTL_RESUME);
140        close(handle);
141    }
142    return ret;
143}
144#endif // SEP_SUPPORT
145
146void OffloadDescriptor::offload(
147    uint32_t  buffer_count,
148    void**    buffers,
149    void*     misc_data,
150    uint16_t  misc_data_len,
151    void*     return_data,
152    uint16_t  return_data_len
153)
154{
155    FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
156    const char *name = func->data;
157    OffloadDescriptor ofld;
158    char *in_data = 0;
159    char *out_data = 0;
160    char *timer_data = 0;
161
162    console_enabled = func->console_enabled;
163    timer_enabled = func->timer_enabled;
164    offload_report_level = func->offload_report_level;
165    offload_number = func->offload_number;
166    ofld.set_offload_number(func->offload_number);
167
168#ifdef SEP_SUPPORT
169    if (sep_monitor) {
170        if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
171            OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
172            VTResumeSampling();
173        }
174    }
175#endif // SEP_SUPPORT
176
177    OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
178                          c_offload_start_target_func,
179                          "Offload \"%s\" started\n", name);
180
181    // initialize timer data
182    OFFLOAD_TIMER_INIT();
183
184    OFFLOAD_TIMER_START(c_offload_target_total_time);
185
186    OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);
187
188    // get input/output buffer addresses
189    if (func->in_datalen > 0 || func->out_datalen > 0) {
190        if (func->data_offset != 0) {
191            in_data = (char*) misc_data + func->data_offset;
192            out_data = (char*) return_data;
193        }
194        else {
195            char *inout_buf = (char*) buffers[--buffer_count];
196            in_data = inout_buf;
197            out_data = inout_buf;
198        }
199    }
200
201    // assign variable descriptors
202    ofld.m_vars_total = func->vars_num;
203    if (ofld.m_vars_total > 0) {
204        uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);
205
206        ofld.m_vars = (VarDesc*) malloc(var_data_len);
207        if (ofld.m_vars == NULL)
208          LIBOFFLOAD_ERROR(c_malloc);
209        memcpy(ofld.m_vars, in_data, var_data_len);
210
211        in_data += var_data_len;
212        func->in_datalen -= var_data_len;
213    }
214
215    // timer data
216    if (func->timer_enabled) {
217        uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();
218
219        timer_data = out_data;
220        out_data += timer_data_len;
221        func->out_datalen -= timer_data_len;
222    }
223
224    // init Marshallers
225    ofld.m_in.init_buffer(in_data, func->in_datalen);
226    ofld.m_out.init_buffer(out_data, func->out_datalen);
227
228    // copy buffers to offload descriptor
229    std::copy(buffers, buffers + buffer_count,
230              std::back_inserter(ofld.m_buffers));
231
232    OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);
233
234    // find offload entry address
235    OFFLOAD_TIMER_START(c_offload_target_func_lookup);
236
237    offload_func_with_parms entry = (offload_func_with_parms)
238        __offload_entries.find_addr(name);
239
240    if (entry == NULL) {
241#if OFFLOAD_DEBUG > 0
242        if (console_enabled > 2) {
243            __offload_entries.dump();
244        }
245#endif
246        LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
247        exit(1);
248    }
249
250    OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);
251
252    OFFLOAD_TIMER_START(c_offload_target_func_time);
253
254    // execute offload entry
255    entry(&ofld);
256
257    OFFLOAD_TIMER_STOP(c_offload_target_func_time);
258
259    OFFLOAD_TIMER_STOP(c_offload_target_total_time);
260
261    // copy timer data to the buffer
262    OFFLOAD_TIMER_TARGET_DATA(timer_data);
263
264    OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);
265
266#ifdef SEP_SUPPORT
267    if (sep_monitor) {
268        if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
269            OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
270            VTPauseSampling();
271        }
272    }
273#endif // SEP_SUPPORT
274}
275
276void OffloadDescriptor::merge_var_descs(
277    VarDesc *vars,
278    VarDesc2 *vars2,
279    int vars_total
280)
281{
282    // number of variable descriptors received from host and generated
283    // locally should match
284    if (m_vars_total < vars_total) {
285        LIBOFFLOAD_ERROR(c_merge_var_descs1);
286        exit(1);
287    }
288
289    for (int i = 0; i < m_vars_total; i++) {
290        if (i < vars_total) {
291            // variable type must match
292            if (m_vars[i].type.bits != vars[i].type.bits) {
293                LIBOFFLOAD_ERROR(c_merge_var_descs2);
294                exit(1);
295            }
296
297            m_vars[i].ptr = vars[i].ptr;
298            m_vars[i].into = vars[i].into;
299
300            const char *var_sname = "";
301            if (vars2 != NULL) {
302                if (vars2[i].sname != NULL) {
303                    var_sname = vars2[i].sname;
304                }
305            }
306            OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
307                "   VarDesc %d, var=%s, %s, %s\n",
308                i, var_sname,
309                vardesc_direction_as_string[m_vars[i].direction.bits],
310                vardesc_type_as_string[m_vars[i].type.src]);
311            if (vars2 != NULL && vars2[i].dname != NULL) {
312                OFFLOAD_TRACE(2, "              into=%s, %s\n", vars2[i].dname,
313                    vardesc_type_as_string[m_vars[i].type.dst]);
314            }
315        }
316        OFFLOAD_TRACE(2,
317            "              type_src=%d, type_dstn=%d, direction=%d, "
318            "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
319            "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
320            m_vars[i].type.src,
321            m_vars[i].type.dst,
322            m_vars[i].direction.bits,
323            m_vars[i].alloc_if,
324            m_vars[i].free_if,
325            m_vars[i].align,
326            m_vars[i].mic_offset,
327            m_vars[i].flags.bits,
328            m_vars[i].offset,
329            m_vars[i].size,
330            m_vars[i].count,
331            m_vars[i].ptr,
332            m_vars[i].into);
333    }
334}
335
336void OffloadDescriptor::scatter_copyin_data()
337{
338    OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);
339
340    OFFLOAD_DEBUG_TRACE(2, "IN  buffer @ %p size %lld\n",
341                        m_in.get_buffer_start(),
342                        m_in.get_buffer_size());
343    OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
344                             m_in.get_buffer_size());
345
346    // receive data
347    for (int i = 0; i < m_vars_total; i++) {
348        bool src_is_for_mic = (m_vars[i].direction.out ||
349                               m_vars[i].into == NULL);
350        void** ptr_addr = src_is_for_mic ?
351                          static_cast<void**>(m_vars[i].ptr) :
352                          static_cast<void**>(m_vars[i].into);
353        int type = src_is_for_mic ? m_vars[i].type.src :
354                                    m_vars[i].type.dst;
355        bool is_static = src_is_for_mic ?
356                         m_vars[i].flags.is_static :
357                         m_vars[i].flags.is_static_dstn;
358        void *ptr = NULL;
359
360        if (m_vars[i].flags.alloc_disp) {
361            int64_t offset = 0;
362            m_in.receive_data(&offset, sizeof(offset));
363            m_vars[i].offset = -offset;
364        }
365        if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
366            VAR_TYPE_IS_DV_DATA(type)) {
367            ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
368                  reinterpret_cast<ArrDesc*>(ptr_addr) :
369                  *reinterpret_cast<ArrDesc**>(ptr_addr);
370            ptr_addr = reinterpret_cast<void**>(&dvp->Base);
371        }
372
373        // Set pointer values
374        switch (type) {
375            case c_data_ptr_array:
376                {
377                    int j = m_vars[i].ptr_arr_offset;
378                    int max_el = j + m_vars[i].count;
379                    char *dst_arr_ptr = (src_is_for_mic)?
380                        *(reinterpret_cast<char**>(m_vars[i].ptr)) :
381                        reinterpret_cast<char*>(m_vars[i].into);
382
383                    for (; j < max_el; j++) {
384                        if (src_is_for_mic) {
385                            m_vars[j].ptr =
386                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
387                        }
388                        else {
389                            m_vars[j].into =
390                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
391                        }
392                    }
393                }
394                break;
395            case c_data:
396            case c_void_ptr:
397            case c_cean_var:
398            case c_dv:
399                break;
400
401            case c_string_ptr:
402            case c_data_ptr:
403            case c_cean_var_ptr:
404            case c_dv_ptr:
405                if (m_vars[i].alloc_if) {
406                    void *buf;
407                    if (m_vars[i].flags.sink_addr) {
408                        m_in.receive_data(&buf, sizeof(buf));
409                    }
410                    else {
411                        buf = m_buffers.front();
412                        m_buffers.pop_front();
413                    }
414                    if (buf) {
415                        if (!is_static) {
416                            if (!m_vars[i].flags.sink_addr) {
417                                // increment buffer reference
418                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
419                                BufferAddRef(buf);
420                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
421                            }
422                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
423                        }
424                        ptr = static_cast<char*>(buf) +
425                                  m_vars[i].mic_offset +
426                                  (m_vars[i].flags.is_stack_buf ?
427                                   0 : m_vars[i].offset);
428                    }
429                    *ptr_addr = ptr;
430                }
431                else if (m_vars[i].flags.sink_addr) {
432                    void *buf;
433                    m_in.receive_data(&buf, sizeof(buf));
434                    void *ptr = static_cast<char*>(buf) +
435                                    m_vars[i].mic_offset +
436                                    (m_vars[i].flags.is_stack_buf ?
437                                     0 : m_vars[i].offset);
438                    *ptr_addr = ptr;
439                }
440                break;
441
442            case c_func_ptr:
443                break;
444
445            case c_dv_data:
446            case c_dv_ptr_data:
447            case c_dv_data_slice:
448            case c_dv_ptr_data_slice:
449                if (m_vars[i].alloc_if) {
450                    void *buf;
451                    if (m_vars[i].flags.sink_addr) {
452                        m_in.receive_data(&buf, sizeof(buf));
453                    }
454                    else {
455                        buf = m_buffers.front();
456                        m_buffers.pop_front();
457                    }
458                    if (buf) {
459                        if (!is_static) {
460                            if (!m_vars[i].flags.sink_addr) {
461                                // increment buffer reference
462                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
463                                BufferAddRef(buf);
464                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
465                            }
466                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
467                        }
468                        ptr = static_cast<char*>(buf) +
469                            m_vars[i].mic_offset + m_vars[i].offset;
470                    }
471                    *ptr_addr = ptr;
472                }
473                else if (m_vars[i].flags.sink_addr) {
474                    void *buf;
475                    m_in.receive_data(&buf, sizeof(buf));
476                    ptr = static_cast<char*>(buf) +
477                          m_vars[i].mic_offset + m_vars[i].offset;
478                    *ptr_addr = ptr;
479                }
480                break;
481
482            default:
483                LIBOFFLOAD_ERROR(c_unknown_var_type, type);
484                abort();
485        }
486        // Release obsolete buffers for stack of persistent objects
487        if (type = c_data_ptr &&
488            m_vars[i].flags.is_stack_buf &&
489            !m_vars[i].direction.bits &&
490            m_vars[i].alloc_if &&
491            m_vars[i].size != 0) {
492                for (int j=0; j < m_vars[i].size; j++) {
493                    void *buf;
494                    m_in.receive_data(&buf, sizeof(buf));
495                    BufferReleaseRef(buf);
496                    ref_data.erase(buf);
497                }
498        }
499        // Do copyin
500        switch (m_vars[i].type.dst) {
501            case c_data_ptr_array:
502                break;
503            case c_data:
504            case c_void_ptr:
505            case c_cean_var:
506                if (m_vars[i].direction.in &&
507                    !m_vars[i].flags.is_static_dstn) {
508                    int64_t size;
509                    int64_t disp;
510                    char* ptr = m_vars[i].into ?
511                                 static_cast<char*>(m_vars[i].into) :
512                                 static_cast<char*>(m_vars[i].ptr);
513                    if (m_vars[i].type.dst == c_cean_var) {
514                        m_in.receive_data((&size), sizeof(int64_t));
515                        m_in.receive_data((&disp), sizeof(int64_t));
516                    }
517                    else {
518                        size = m_vars[i].size;
519                        disp = 0;
520                    }
521                    m_in.receive_data(ptr + disp, size);
522                }
523                break;
524
525            case c_dv:
526                if (m_vars[i].direction.bits ||
527                    m_vars[i].alloc_if ||
528                    m_vars[i].free_if) {
529                    char* ptr = m_vars[i].into ?
530                                 static_cast<char*>(m_vars[i].into) :
531                                 static_cast<char*>(m_vars[i].ptr);
532                    m_in.receive_data(ptr + sizeof(uint64_t),
533                                      m_vars[i].size - sizeof(uint64_t));
534                }
535                break;
536
537            case c_string_ptr:
538            case c_data_ptr:
539            case c_cean_var_ptr:
540            case c_dv_ptr:
541            case c_dv_data:
542            case c_dv_ptr_data:
543            case c_dv_data_slice:
544            case c_dv_ptr_data_slice:
545                break;
546
547            case c_func_ptr:
548                if (m_vars[i].direction.in) {
549                    m_in.receive_func_ptr((const void**) m_vars[i].ptr);
550                }
551                break;
552
553            default:
554                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
555                abort();
556        }
557    }
558
559    OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
560                  m_in.get_tfr_size());
561
562    OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);
563
564    OFFLOAD_TIMER_START(c_offload_target_compute);
565}
566
567void OffloadDescriptor::gather_copyout_data()
568{
569    OFFLOAD_TIMER_STOP(c_offload_target_compute);
570
571    OFFLOAD_TIMER_START(c_offload_target_gather_outputs);
572
573    for (int i = 0; i < m_vars_total; i++) {
574        bool src_is_for_mic = (m_vars[i].direction.out ||
575                               m_vars[i].into == NULL);
576
577        switch (m_vars[i].type.src) {
578            case c_data_ptr_array:
579                break;
580            case c_data:
581            case c_void_ptr:
582            case c_cean_var:
583                if (m_vars[i].direction.out &&
584                    !m_vars[i].flags.is_static) {
585                    m_out.send_data(
586                        static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
587                        m_vars[i].size);
588                }
589                break;
590
591            case c_dv:
592                break;
593
594            case c_string_ptr:
595            case c_data_ptr:
596            case c_cean_var_ptr:
597            case c_dv_ptr:
598                if (m_vars[i].free_if &&
599                    src_is_for_mic &&
600                    !m_vars[i].flags.is_static) {
601                    void *buf = *static_cast<char**>(m_vars[i].ptr) -
602                                    m_vars[i].mic_offset -
603                                    (m_vars[i].flags.is_stack_buf?
604                                     0 : m_vars[i].offset);
605                    if (buf == NULL) {
606                        break;
607                    }
608                    // decrement buffer reference count
609                    OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
610                    BufReleaseRef(buf);
611                    OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
612                }
613                break;
614
615            case c_func_ptr:
616                if (m_vars[i].direction.out) {
617                    m_out.send_func_ptr(*((void**) m_vars[i].ptr));
618                }
619                break;
620
621            case c_dv_data:
622            case c_dv_ptr_data:
623            case c_dv_data_slice:
624            case c_dv_ptr_data_slice:
625                if (src_is_for_mic &&
626                    m_vars[i].free_if &&
627                    !m_vars[i].flags.is_static) {
628                    ArrDesc *dvp = (m_vars[i].type.src == c_dv_data ||
629                                    m_vars[i].type.src == c_dv_data_slice) ?
630                        static_cast<ArrDesc*>(m_vars[i].ptr) :
631                        *static_cast<ArrDesc**>(m_vars[i].ptr);
632
633                    void *buf = reinterpret_cast<char*>(dvp->Base) -
634                                m_vars[i].mic_offset -
635                                m_vars[i].offset;
636
637                    if (buf == NULL) {
638                        break;
639                    }
640
641                    // decrement buffer reference count
642                    OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
643                    BufReleaseRef(buf);
644                    OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
645                }
646                break;
647
648            default:
649                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
650                abort();
651        }
652
653        if (m_vars[i].into) {
654            switch (m_vars[i].type.dst) {
655                case c_data_ptr_array:
656                    break;
657                case c_data:
658                case c_void_ptr:
659                case c_cean_var:
660                case c_dv:
661                    break;
662
663                case c_string_ptr:
664                case c_data_ptr:
665                case c_cean_var_ptr:
666                case c_dv_ptr:
667                    if (m_vars[i].direction.in &&
668                        m_vars[i].free_if &&
669                        !m_vars[i].flags.is_static_dstn) {
670                        void *buf = *static_cast<char**>(m_vars[i].into) -
671                                    m_vars[i].mic_offset -
672                                    (m_vars[i].flags.is_stack_buf?
673                                     0 : m_vars[i].offset);
674
675                        if (buf == NULL) {
676                            break;
677                        }
678                        // decrement buffer reference count
679                        OFFLOAD_TIMER_START(
680                            c_offload_target_release_buffer_refs);
681                        BufReleaseRef(buf);
682                        OFFLOAD_TIMER_STOP(
683                            c_offload_target_release_buffer_refs);
684                    }
685                    break;
686
687                case c_func_ptr:
688                    break;
689
690                case c_dv_data:
691                case c_dv_ptr_data:
692                case c_dv_data_slice:
693                case c_dv_ptr_data_slice:
694                    if (m_vars[i].free_if &&
695                        m_vars[i].direction.in &&
696                        !m_vars[i].flags.is_static_dstn) {
697                        ArrDesc *dvp =
698                            (m_vars[i].type.dst == c_dv_data_slice ||
699                             m_vars[i].type.dst == c_dv_data) ?
700                            static_cast<ArrDesc*>(m_vars[i].into) :
701                            *static_cast<ArrDesc**>(m_vars[i].into);
702                        void *buf = reinterpret_cast<char*>(dvp->Base) -
703                              m_vars[i].mic_offset -
704                              m_vars[i].offset;
705
706                        if (buf == NULL) {
707                            break;
708                        }
709                        // decrement buffer reference count
710                        OFFLOAD_TIMER_START(
711                            c_offload_target_release_buffer_refs);
712                        BufReleaseRef(buf);
713                        OFFLOAD_TIMER_STOP(
714                            c_offload_target_release_buffer_refs);
715                    }
716                    break;
717
718                default:
719                    LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
720                    abort();
721            }
722        }
723    }
724
725    OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
726                        m_out.get_buffer_start(),
727                        m_out.get_buffer_size());
728
729    OFFLOAD_DEBUG_DUMP_BYTES(2,
730                             m_out.get_buffer_start(),
731                             m_out.get_buffer_size());
732
733    OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
734                  "Total copyout data sent to host: [%lld] bytes\n",
735                  m_out.get_tfr_size());
736
737    OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
738}
739
740void __offload_target_init(void)
741{
742#ifdef SEP_SUPPORT
743    const char* env_var = getenv(sep_monitor_env);
744    if (env_var != 0 && *env_var != '\0') {
745        sep_monitor = atoi(env_var);
746    }
747    env_var = getenv(sep_device_env);
748    if (env_var != 0 && *env_var != '\0') {
749        sep_device = env_var;
750    }
751#endif // SEP_SUPPORT
752
753    prefix = report_get_message_str(c_report_mic);
754
755    // init frequency
756    mic_frequency = COIPerfGetCycleFrequency();
757}
758
759// User-visible offload API
760
761int _Offload_number_of_devices(void)
762{
763    return mic_engines_total;
764}
765
766int _Offload_get_device_number(void)
767{
768    return mic_index;
769}
770
771int _Offload_get_physical_device_number(void)
772{
773    uint32_t index;
774    EngineGetIndex(&index);
775    return index;
776}
777