1293034Sdes/*- 2293034Sdes * Copyright (c) 2015 Dag-Erling Sm��rgrav 3293034Sdes * All rights reserved. 4293034Sdes * 5293034Sdes * Redistribution and use in source and binary forms, with or without 6293034Sdes * modification, are permitted provided that the following conditions 7293034Sdes * are met: 8293034Sdes * 1. Redistributions of source code must retain the above copyright 9293034Sdes * notice, this list of conditions and the following disclaimer. 10293034Sdes * 2. Redistributions in binary form must reproduce the above copyright 11293034Sdes * notice, this list of conditions and the following disclaimer in the 12293034Sdes * documentation and/or other materials provided with the distribution. 13293034Sdes * 14293034Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15293034Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16293034Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17293034Sdes * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18293034Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19293034Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20293034Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21293034Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22293034Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23293034Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24293034Sdes * SUCH DAMAGE. 25293034Sdes * 26293034Sdes * $FreeBSD$ 27293034Sdes */ 28293034Sdes 29293034Sdes#ifdef _KERNEL 30293034Sdes#include <sys/libkern.h> 31293034Sdes#else 32293034Sdes#include <stdio.h> 33293034Sdes#include <strings.h> 34293034Sdes#endif 35293034Sdes 36293034Sdes#include "fp16.h" 37293034Sdes 38293034Sdes/* 39293034Sdes * Compute the quare root of x, using Newton's method with 2^(log2(x)/2) 40293034Sdes * as the initial estimate. 41293034Sdes */ 42293034Sdesfp16_t 43293034Sdesfp16_sqrt(fp16_t x) 44293034Sdes{ 45293034Sdes fp16_t y, delta; 46293034Sdes signed int log2x; 47293034Sdes 48293034Sdes /* special case */ 49293034Sdes if (x == 0) 50293034Sdes return (0); 51293034Sdes 52293034Sdes /* shift toward 0 by half the logarithm */ 53293034Sdes log2x = flsl(x) - 1; 54293034Sdes if (log2x >= 16) { 55293034Sdes y = x >> (log2x - 16) / 2; 56293034Sdes } else { 57293034Sdes#if 0 58293034Sdes y = x << (16 - log2x) / 2; 59293034Sdes#else 60293034Sdes /* XXX for now, return 0 for anything < 1 */ 61293034Sdes return (0); 62293034Sdes#endif 63293034Sdes } 64293034Sdes while (y > 0) { 65293034Sdes /* delta = y^2 / 2y */ 66293034Sdes delta = fp16_div(fp16_sub(fp16_mul(y, y), x), y * 2); 67293034Sdes if (delta == 0) 68293034Sdes break; 69293034Sdes y = fp16_sub(y, delta); 70293034Sdes } 71293034Sdes return (y); 72293034Sdes} 73293034Sdes 74294783Sdesstatic fp16_t fp16_sin_table[256] = { 75294783Sdes 0, 402, 804, 1206, 1608, 2010, 2412, 2814, 76294783Sdes 3215, 3617, 4018, 4420, 4821, 5222, 5622, 6023, 77294783Sdes 6423, 6823, 7223, 7623, 8022, 8421, 8819, 9218, 78294783Sdes 9616, 10013, 10410, 10807, 11204, 11600, 11995, 12390, 79294783Sdes 12785, 13179, 13573, 13966, 14359, 14751, 15142, 15533, 80294783Sdes 15923, 16313, 16702, 17091, 17479, 17866, 18253, 18638, 81294783Sdes 19024, 19408, 19792, 20175, 20557, 20938, 21319, 21699, 82294783Sdes 22078, 22456, 22833, 23210, 23586, 23960, 24334, 24707, 83294783Sdes 25079, 25450, 25820, 26189, 26557, 26925, 27291, 27656, 84294783Sdes 28020, 28383, 28745, 29105, 29465, 29824, 30181, 30538, 85294783Sdes 30893, 31247, 31600, 31952, 32302, 32651, 32999, 33346, 86294783Sdes 33692, 34036, 34379, 34721, 35061, 35400, 35738, 36074, 87294783Sdes 36409, 36743, 37075, 37406, 37736, 38064, 38390, 38716, 88294783Sdes 39039, 39362, 39682, 40002, 40319, 40636, 40950, 41263, 89294783Sdes 41575, 41885, 42194, 42501, 42806, 43110, 43412, 43712, 90294783Sdes 44011, 44308, 44603, 44897, 45189, 45480, 45768, 46055, 91294783Sdes 46340, 46624, 46906, 47186, 47464, 47740, 48015, 48288, 92294783Sdes 48558, 48828, 49095, 49360, 49624, 49886, 50146, 50403, 93294783Sdes 50660, 50914, 51166, 51416, 51665, 51911, 52155, 52398, 94294783Sdes 52639, 52877, 53114, 53348, 53581, 53811, 54040, 54266, 95294783Sdes 54491, 54713, 54933, 55152, 55368, 55582, 55794, 56004, 96294783Sdes 56212, 56417, 56621, 56822, 57022, 57219, 57414, 57606, 97294783Sdes 57797, 57986, 58172, 58356, 58538, 58718, 58895, 59070, 98294783Sdes 59243, 59414, 59583, 59749, 59913, 60075, 60235, 60392, 99294783Sdes 60547, 60700, 60850, 60998, 61144, 61288, 61429, 61568, 100294783Sdes 61705, 61839, 61971, 62100, 62228, 62353, 62475, 62596, 101294783Sdes 62714, 62829, 62942, 63053, 63162, 63268, 63371, 63473, 102294783Sdes 63571, 63668, 63762, 63854, 63943, 64030, 64115, 64197, 103294783Sdes 64276, 64353, 64428, 64501, 64571, 64638, 64703, 64766, 104294783Sdes 64826, 64884, 64939, 64992, 65043, 65091, 65136, 65179, 105294783Sdes 65220, 65258, 65294, 65327, 65358, 65386, 65412, 65436, 106294783Sdes 65457, 65475, 65491, 65505, 65516, 65524, 65531, 65534, 107293034Sdes}; 108293034Sdes 109293034Sdes/* 110294783Sdes * Compute the sine of theta. 111294783Sdes */ 112294783Sdesfp16_t 113294783Sdesfp16_sin(fp16_t theta) 114294783Sdes{ 115294783Sdes unsigned int i; 116294783Sdes 117294783Sdes i = 1024 * (theta % FP16_2PI) / FP16_2PI; 118294783Sdes switch (i / 256) { 119294783Sdes case 0: 120294783Sdes return (fp16_sin_table[i % 256]); 121294783Sdes case 1: 122294783Sdes return (fp16_sin_table[255 - i % 256]); 123294783Sdes case 2: 124294783Sdes return (-fp16_sin_table[i % 256]); 125294783Sdes case 3: 126294783Sdes return (-fp16_sin_table[255 - i % 256]); 127294783Sdes default: 128294783Sdes /* inconceivable! */ 129294783Sdes return (0); 130294783Sdes } 131294783Sdes} 132294783Sdes 133294783Sdes/* 134293034Sdes * Compute the cosine of theta. 135293034Sdes */ 136293034Sdesfp16_t 137293034Sdesfp16_cos(fp16_t theta) 138293034Sdes{ 139293034Sdes unsigned int i; 140293034Sdes 141293034Sdes i = 1024 * (theta % FP16_2PI) / FP16_2PI; 142293034Sdes switch (i / 256) { 143293034Sdes case 0: 144294783Sdes return (fp16_sin_table[255 - i % 256]); 145293034Sdes case 1: 146294783Sdes return (-fp16_sin_table[i % 256]); 147293034Sdes case 2: 148294783Sdes return (-fp16_sin_table[255 - i % 256]); 149293034Sdes case 3: 150294783Sdes return (fp16_sin_table[i % 256]); 151293034Sdes default: 152293034Sdes /* inconceivable! */ 153293034Sdes return (0); 154293034Sdes } 155293034Sdes} 156