/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2008-2009, Intel Corporation. * All Rights Reserved. */ #define MAX_TAG 8 #define MAX_STACK 64 #pragma D option aggsize=8m #pragma D option bufsize=16m #pragma D option dynvarsize=16m #pragma D option aggrate=0 #pragma D option stackframes=MAX_STACK /* * Our D script needs to compile even if some of the TRANSLATE probes cannot * be found. Missing probes can be caused by older kernel, different * architecture, unloaded modules etc. */ #pragma D option zdefs #if defined(ENABLE_SCHED) #if defined(TRACE_PID) #define TRACE_FILTER / pid == 0 || pid == TRACE_PID / #define TRACE_FILTER_COND(a) / (pid == 0 || pid == TRACE_PID) && (a) / #elif defined(TRACE_PGID) #define TRACE_FILTER / pid == 0 || curpsinfo->pr_pgid == TRACE_PGID / #define TRACE_FILTER_COND(a) / (pid == 0 || curpsinfo->pr_pgid == TRACE_PGID) && (a) / #else #define TRACE_FILTER #define TRACE_FILTER_COND(a) / (a) / #endif #else /* ENABLE_SCHED */ #if defined(TRACE_PID) #define TRACE_FILTER / pid == TRACE_PID / #define TRACE_FILTER_COND(a) / (pid == TRACE_PID) && (a) / #elif defined(TRACE_PGID) #define TRACE_FILTER / curpsinfo->pr_pgid == TRACE_PGID / #define TRACE_FILTER_COND(a) / (curpsinfo->pr_pgid == TRACE_PGID) && (a) / #else #define TRACE_FILTER / pid != 0 / #define TRACE_FILTER_COND(a) / (pid != 0) && (a) / #endif #endif /* ENABLE_SCHED */ /* Threshold to filter WAKEABLE latencies. */ #define FILTER_THRESHOLD 5000000 /* From thread.h */ #define T_WAKEABLE 2 /* * This array is used to store timestamp of when threads are enqueued * to dispatch queue. * self-> is not accessible when enqueue happens. */ unsigned long long lt_timestamps[int, int]; self unsigned int lt_is_block_wakeable; self unsigned long long lt_sleep_start; self unsigned long long lt_sleep_duration; self unsigned long long lt_sch_delay; self unsigned int lt_counter; /* only used in low overhead */ self unsigned long long lt_timestamp; /* only used in low overhead */ self unsigned int lt_stackp; self unsigned int lt_prio[int]; self string lt_cause[int]; this unsigned int priority; this string cause; /* * Clean up everything, otherwise we will run out of memory. */ proc:::lwp-exit { lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0; self->lt_sleep_start = 0; self->lt_is_block_wakeable = 0; self->lt_counter = 0; self->lt_timestamp = 0; /* * Workaround: no way to clear associative array. * We have to manually clear 0 ~ (MAX_TAG-1). */ self->lt_prio[0] = 0; self->lt_prio[1] = 0; self->lt_prio[2] = 0; self->lt_prio[3] = 0; self->lt_prio[4] = 0; self->lt_prio[5] = 0; self->lt_prio[6] = 0; self->lt_prio[7] = 0; self->lt_cause[0] = 0; self->lt_cause[1] = 0; self->lt_cause[2] = 0; self->lt_cause[3] = 0; self->lt_cause[4] = 0; self->lt_cause[5] = 0; self->lt_cause[6] = 0; self->lt_cause[7] = 0; } #if !defined(ENABLE_LOW_OVERHEAD) /* * Log timestamp when a thread is taken off the CPU. */ sched::resume:off-cpu TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP) { self->lt_sleep_start = timestamp; self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE; lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = self->lt_sleep_start; } /* * Log timestamp when a thread is put on a dispatch queue and becomes runnable. */ sched:::enqueue /lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/ { lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] = timestamp; } /* * Calculate latency when the thread is actually on the CPU. * This is necessary in order to get the right stack. */ this unsigned long long end; this unsigned long long now; sched::resume:on-cpu /self->lt_sleep_start != 0/ { this->end = lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid]; this->now = timestamp; lt_timestamps[curpsinfo->pr_pid, curlwpsinfo->pr_lwpid] = 0; this->end = (this->end != 0 && this->end != self->lt_sleep_start) ? this->end : this->now; self->lt_sch_delay = this->now - this->end; self->lt_sleep_duration = this->end - self->lt_sleep_start; self->lt_sleep_start = 0; } /* * Filter: drop all "large" latency when it is interruptible, i.e., sleep() * etc. */ #if defined(ENABLE_FILTER) sched::resume:on-cpu /self->lt_sleep_duration > FILTER_THRESHOLD && self->lt_is_block_wakeable != 0/ { self->lt_sch_delay = 0; self->lt_sleep_duration = 0; self->lt_is_block_wakeable = 0; } #endif /* defined(ENABLE_FILTER) */ /* * Write sleep time to the aggregation. * lt_sleep_duration is the duration between the time when a thread is taken * off the CPU and the time when it is enqueued again. */ sched::resume:on-cpu /self->lt_sleep_duration != 0/ { this->cause = self->lt_stackp > 0 ? self->lt_cause[self->lt_stackp - 1] : ""; this->priority = self->lt_stackp > 0 ? self->lt_prio[self->lt_stackp - 1] : 0; @lt_call_count[pid, tid, stack(), this->cause, this->priority] = count(); @lt_call_sum[pid, tid, stack(), this->cause, this->priority] = sum(self->lt_sleep_duration); @lt_call_max[pid, tid, stack(), this->cause, this->priority] = max(self->lt_sleep_duration); self->lt_is_block_wakeable = 0; /* Clear the flag to avoid leak */ self->lt_sleep_duration = 0; } /* * Write time spent in queue to the aggregation. * lt_sch_delay is the interval between the time when a thread becomes * runnable and the time when it is actually on the CPU. */ sched::resume:on-cpu /self->lt_sch_delay != 0/ { @lt_named_count[pid, tid, "Wait for available CPU"] = count(); @lt_named_sum[pid, tid, "Wait for available CPU"] = sum(self->lt_sch_delay); @lt_named_max[pid, tid, "Wait for available CPU"] = max(self->lt_sch_delay); self->lt_sch_delay = 0; } /* * Probes to track latency caused by spinning on a lock. */ lockstat:::adaptive-spin TRACE_FILTER { @lt_named_count[pid, tid, "Adapt. lock spin"] = count(); @lt_named_sum[pid, tid, "Adapt. lock spin"] = sum(arg1); @lt_named_max[pid, tid, "Adapt. lock spin"] = max(arg1); } lockstat:::spin-spin TRACE_FILTER { @lt_named_count[pid, tid, "Spinlock spin"] = count(); @lt_named_sum[pid, tid, "Spinlock spin"] = sum(arg1); @lt_named_max[pid, tid, "Spinlock spin"] = max(arg1); } /* * Probes to track latency caused by blocking on a lock. */ lockstat:::adaptive-block TRACE_FILTER { @lt_named_count[pid, tid, "#Adapt. lock block"] = count(); @lt_named_sum[pid, tid, "#Adapt. lock block"] = sum(arg1); @lt_named_max[pid, tid, "#Adapt. lock block"] = max(arg1); } lockstat:::rw-block TRACE_FILTER { @lt_named_count[pid, tid, "#RW. lock block"] = count(); @lt_named_sum[pid, tid, "#RW. lock block"] = sum(arg1); @lt_named_max[pid, tid, "#RW. lock block"] = max(arg1); } #if defined(ENABLE_SYNCOBJ) /* * Probes to track latency caused by synchronization objects. */ this int stype; this unsigned long long wchan; this unsigned long long wtime; sched:::wakeup /* * Currently we are unable to track wakeup from sched, because all its LWP IDs * are zero when we trace it and that makes lt_timestamps unusable. */ /args[1]->pr_pid != 0 && lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid] != 0/ { this->stype = args[0]->pr_stype; this->wchan = args[0]->pr_wchan; /* * We can use lt_timestamps[] here, because * wakeup is always fired before enqueue. * After enqueue, lt_timestamps[] will be overwritten. */ this->wtime = timestamp - lt_timestamps[args[1]->pr_pid, args[0]->pr_lwpid]; @lt_sync_count[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, this->wchan] = count(); @lt_sync_sum[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, this->wchan] = sum(this->wtime); @lt_sync_max[args[1]->pr_pid, args[0]->pr_lwpid, this->stype, this->wchan] = max(this->wtime); } #endif /* defined(ENABLE_SYNCOBJ) */ #else /* !defined(ENABLE_LOW_OVERHEAD) */ /* * This is the low overhead mode. * In order to reduce the number of instructions executed during each * off-cpu and on-cpu event, we do the following: * * 1. Use sampling and update aggregations only roughly 1/100 times * (SAMPLE_TIMES). * 2. Do not track anything other than what is needed for "main" window. * 3. Use as few thread local variables as possible. */ #define SAMPLE_TIMES 100 #define SAMPLE_THRESHOLD 50000000 /* * Log timestamp when a thread is off CPU. */ sched::resume:off-cpu TRACE_FILTER_COND(curlwpsinfo->pr_state == SSLEEP) { self->lt_timestamp = timestamp; #if defined(ENABLE_FILTER) self->lt_is_block_wakeable = curthread->t_flag & T_WAKEABLE; #endif /* defined(ENABLE_FILTER) */ } /* * Calculate latency when a thread is actually on the CPU. */ this int need_skip; sched::resume:on-cpu /self->lt_timestamp != 0/ { self->lt_timestamp = timestamp - self->lt_timestamp; #if defined(ENABLE_FILTER) self->lt_timestamp = (self->lt_timestamp > FILTER_THRESHOLD && self->lt_is_block_wakeable != 0) ? 0 : self->lt_timestamp; self->lt_is_block_wakeable = 0; #endif /* defined(ENABLE_FILTER) */ this->need_skip = (self->lt_counter < (SAMPLE_TIMES - 1) && self->lt_timestamp <= SAMPLE_THRESHOLD) ? 1 : 0; self->lt_timestamp = this->need_skip ? 0 : self->lt_timestamp; self->lt_counter += this->need_skip; } /* * Track large latency first. */ sched::resume:on-cpu /self->lt_timestamp > SAMPLE_THRESHOLD/ { this->cause = self->lt_stackp > 0 ? self->lt_cause[self->lt_stackp - 1] : ""; this->priority = self->lt_stackp > 0 ? self->lt_prio[self->lt_stackp - 1] : 0; @lt_call_count[pid, tid, stack(), this->cause, this->priority] = sum(1); @lt_call_sum[pid, tid, stack(), this->cause, this->priority] = sum(self->lt_timestamp); @lt_call_max[pid, tid, stack(), this->cause, this->priority] = max(self->lt_timestamp); self->lt_timestamp = 0; } /* * If we fall back to this probe, that means the latency is small and counter * has reached SAMPLE_TIMES. */ sched::resume:on-cpu /self->lt_timestamp != 0/ { this->cause = self->lt_stackp > 0 ? self->lt_cause[self->lt_stackp - 1] : ""; this->priority = self->lt_stackp > 0 ? self->lt_prio[self->lt_stackp - 1] : 0; /* Need +1 because lt_counter has not been updated in this cycle. */ @lt_call_count[pid, tid, stack(), this->cause, this->priority] = sum(self->lt_counter + 1); @lt_call_sum[pid, tid, stack(), this->cause, this->priority] = sum((self->lt_counter + 1) * self->lt_timestamp); @lt_call_max[pid, tid, stack(), this->cause, this->priority] = max(self->lt_timestamp); self->lt_timestamp = 0; self->lt_counter = 0; } #endif /* !defined(ENABLE_LOW_OVERHEAD) */ #define TRANSLATE(entryprobe, returnprobe, cause, priority) \ entryprobe \ TRACE_FILTER_COND(self->lt_stackp == 0 || \ (self->lt_stackp < MAX_TAG && \ self->lt_prio[self->lt_stackp - 1] <= priority) ) \ { \ self->lt_prio[self->lt_stackp] = priority; \ self->lt_cause[self->lt_stackp] = cause; \ ++self->lt_stackp; \ } \ returnprobe \ TRACE_FILTER_COND(self->lt_stackp > 0 && \ self->lt_cause[self->lt_stackp - 1] == cause) \ { \ --self->lt_stackp; \ self->lt_cause[self->lt_stackp] = NULL; \ } /* * Syscalls have a priority of 10. This is to make sure that latency is * traced to one of the syscalls only if nothing else matches. * We put this special probe here because it uses "probefunc" variable instead * of a constant string. */ TRANSLATE(syscall:::entry, syscall:::return, probefunc, 10)