1215976Sjmallett/***********************license start***************
2215976Sjmallett * Copyright (c) 2003-2010  Cavium Networks (support@cavium.com). All rights
3215976Sjmallett * reserved.
4215976Sjmallett *
5215976Sjmallett *
6215976Sjmallett * Redistribution and use in source and binary forms, with or without
7215976Sjmallett * modification, are permitted provided that the following conditions are
8215976Sjmallett * met:
9215976Sjmallett *
10215976Sjmallett *   * Redistributions of source code must retain the above copyright
11215976Sjmallett *     notice, this list of conditions and the following disclaimer.
12215976Sjmallett *
13215976Sjmallett *   * Redistributions in binary form must reproduce the above
14215976Sjmallett *     copyright notice, this list of conditions and the following
15215976Sjmallett *     disclaimer in the documentation and/or other materials provided
16215976Sjmallett *     with the distribution.
17215976Sjmallett
18215976Sjmallett *   * Neither the name of Cavium Networks nor the names of
19215976Sjmallett *     its contributors may be used to endorse or promote products
20215976Sjmallett *     derived from this software without specific prior written
21215976Sjmallett *     permission.
22215976Sjmallett
23215976Sjmallett * This Software, including technical data, may be subject to U.S. export  control
24215976Sjmallett * laws, including the U.S. Export Administration Act and its  associated
25215976Sjmallett * regulations, and may be subject to export or import  regulations in other
26215976Sjmallett * countries.
27215976Sjmallett
28215976Sjmallett * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29215976Sjmallett * AND WITH ALL FAULTS AND CAVIUM  NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR
30215976Sjmallett * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
31215976Sjmallett * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
32215976Sjmallett * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
33215976Sjmallett * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
34215976Sjmallett * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
35215976Sjmallett * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
36215976Sjmallett * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE  RISK ARISING OUT OF USE OR
37215976Sjmallett * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38215976Sjmallett ***********************license end**************************************/
39215976Sjmallett
40215976Sjmallett
41215976Sjmallett
42215976Sjmallett
43215976Sjmallett
44215976Sjmallett
45215976Sjmallett
46215976Sjmallett//
47215976Sjmallett// The function defined here is called for every function as it is executed.
48215976Sjmallett// These calls are automatically inserted by GCC when the switch "-pg" is
49215976Sjmallett// used. This allows cvmx-log to add a PC entry as each function is executed.
50215976Sjmallett// This information, along with the timestamps can give the user a good idea
51215976Sjmallett// of the performance characteristics of their program. This function normally
52215976Sjmallett// takes about 22 cycles to execute.
53215976Sjmallett//
54215976Sjmallett
55215976Sjmallett#ifdef __linux__
56215976Sjmallett#include <asm/asm.h>
57215976Sjmallett#include <asm/regdef.h>
58215976Sjmallett#define LA dla
59215976Sjmallett#else
60215976Sjmallett#include <machine/asm.h>
61215976Sjmallett#include <machine/regdef.h>
62215976Sjmallett#define LA la
63215976Sjmallett#endif
64215976Sjmallett
65215976Sjmallett.set noreorder
66215976Sjmallett.set noat
67215976SjmallettLEAF(_mcount)
68215976Sjmallett	//
69215976Sjmallett	// All registers we use must be saved since calls are added by gcc
70215976Sjmallett	// after register allocation. The at register ($3) will contain the
71215976Sjmallett	// original ra register before the _mcount call. Also the compiler
72215976Sjmallett	// automatically performs a "dsubu sp, sp, 16" before we're called.
73215976Sjmallett	// At the end of this function all registers must have their original
74215976Sjmallett	// values and the stack pointr must be adjusted by 16. This code is
75215976Sjmallett	// pretty unreadable since it has been arranged to promote dual issue.
76215976Sjmallett	//
77215976Sjmallett#ifdef __linux__
78215976Sjmallett	dsubu	sp, sp, 32
79215976Sjmallett#else
80215976Sjmallett	dsubu	sp, sp, 16
81215976Sjmallett#endif
82215976Sjmallett	sd	s3, 24(sp)				// Save register
83215976Sjmallett	rdhwr	s3, $31					// Read the cycle count
84215976Sjmallett	sd	s0, 0(sp)				// Save register
85215976Sjmallett	LA	s0, cvmx_log_buffer_end_ptr		// Load the address of the end of the log buffer
86215976Sjmallett	sd	s1, 8(sp)				// Save register
87215976Sjmallett	LA	s1, cvmx_log_buffer_write_ptr		// Load the address of the location in the log buffer
88215976Sjmallett	sd	s2, 16(sp)				// Save register
89215976Sjmallett	ld	s0, 0(s0)				// Get value of the current log buffer end location
90215976Sjmallett	ld	s2, 0(s1)				// Get value of the current log buffer location
91215976Sjmallett	dsubu	s0, s0, s2				// Subtract the end pointer and the write pointer
92215976Sjmallett	sltiu	s0, s0, 16				// Check if there are at least 16 bytes
93215976Sjmallett	bne	s0, $0, call_c_pc			// Call the slow C function if we don't have room in the log
94215976Sjmallett	li	s0, 0x001				// 11 bit constant that matches the first 11 bits of a CVMX_LOG_TYPE_PC header
95215976Sjmallett	sd	ra, 8(s2)				// Write the pc to the log
96215976Sjmallett	dins	s3, s0, 53, 11				// Overwrite the upper cycle count bits with the CVMX_LOG_TYPE_PC header
97215976Sjmallett	sd	s3, 0(s2)				// Write the log header
98215976Sjmallett	daddu	s2, s2, 16				// Increment the write location ptr
99215976Sjmallett	sd	s2, 0(s1)				// Store the write location ptr
100215976Sjmallettreturn_c_pc:
101215976Sjmallett	ld	s0, 0(sp)				// Restore register
102215976Sjmallett	ld	s1, 8(sp)				// Restore register
103215976Sjmallett	ld	s2, 16(sp)				// Restore register
104215976Sjmallett	ld	s3, 24(sp)				// Restore register
105215976Sjmallett	daddu	sp, sp, 32				// Pop everything off the stack, even the 16 bytes done by gcc
106215976Sjmallett	jr	ra					// Return to the caller and
107215976Sjmallett	or	ra, $1, $1				// make sure the ra is back to its original value
108215976Sjmallett
109215976Sjmallettcall_c_pc:
110215976Sjmallett	// The registers used by the C code may change based on optimizations. To be
111215976Sjmallett	// safe, I'll save all registers. We're in the slow path case anyway.
112215976Sjmallett	dsubu	sp, sp, 216
113215976Sjmallett	sd	$1, 0(sp)
114215976Sjmallett	sd	$2, 8(sp)
115215976Sjmallett	sd	$3, 16(sp)
116215976Sjmallett	sd 	$4, 24(sp)
117215976Sjmallett	sd 	$5, 32(sp)
118215976Sjmallett	sd	$6, 40(sp)
119215976Sjmallett	sd	$7, 48(sp)
120215976Sjmallett	sd	$8, 56(sp)
121215976Sjmallett	sd	$9, 64(sp)
122215976Sjmallett	sd	$10, 72(sp)
123215976Sjmallett	sd	$11, 80(sp)
124215976Sjmallett	sd	$12, 88(sp)
125215976Sjmallett	sd	$13, 96(sp)
126215976Sjmallett	sd	$14, 104(sp)
127215976Sjmallett	sd	$15, 112(sp)
128215976Sjmallett	// s0, s1, s2, s3 are already saved
129215976Sjmallett	sd	$20, 120(sp)
130215976Sjmallett	sd	$21, 128(sp)
131215976Sjmallett	sd	$22, 136(sp)
132215976Sjmallett	sd	$23, 144(sp)
133215976Sjmallett	sd	$24, 152(sp)
134215976Sjmallett	sd	$25, 160(sp)
135215976Sjmallett	sd	$26, 168(sp)
136215976Sjmallett	sd	$27, 176(sp)
137215976Sjmallett	sd	$28, 184(sp)
138215976Sjmallett	sd	$29, 192(sp)
139215976Sjmallett	sd	$30, 200(sp)
140215976Sjmallett	sd	$31, 208(sp)
141215976Sjmallett
142215976Sjmallett	or	a0, ra, ra
143215976Sjmallett	jal	cvmx_log_pc
144215976Sjmallett	nop
145215976Sjmallett
146215976Sjmallett	ld	$1, 0(sp)
147215976Sjmallett	ld	$2, 8(sp)
148215976Sjmallett	ld	$3, 16(sp)
149215976Sjmallett	ld 	$4, 24(sp)
150215976Sjmallett	ld 	$5, 32(sp)
151215976Sjmallett	ld	$6, 40(sp)
152215976Sjmallett	ld	$7, 48(sp)
153215976Sjmallett	ld	$8, 56(sp)
154215976Sjmallett	ld	$9, 64(sp)
155215976Sjmallett	ld	$10, 72(sp)
156215976Sjmallett	ld	$11, 80(sp)
157215976Sjmallett	ld	$12, 88(sp)
158215976Sjmallett	ld	$13, 96(sp)
159215976Sjmallett	ld	$14, 104(sp)
160215976Sjmallett	ld	$15, 112(sp)
161215976Sjmallett	// s0, s1, s2, s3 will be restored later
162215976Sjmallett	ld	$20, 120(sp)
163215976Sjmallett	ld	$21, 128(sp)
164215976Sjmallett	ld	$22, 136(sp)
165215976Sjmallett	ld	$23, 144(sp)
166215976Sjmallett	ld	$24, 152(sp)
167215976Sjmallett	ld	$25, 160(sp)
168215976Sjmallett	ld	$26, 168(sp)
169215976Sjmallett	ld	$27, 176(sp)
170215976Sjmallett	ld	$28, 184(sp)
171215976Sjmallett	ld	$29, 192(sp)
172215976Sjmallett	ld	$30, 200(sp)
173215976Sjmallett	ld	$31, 208(sp)
174215976Sjmallett	b	return_c_pc
175215976Sjmallett	daddu	sp, sp, 216
176215976Sjmallett
177215976SjmallettEND(_mcount)
178215976Sjmallett
179