Cross Reference: /netbsd-current/crypto/external/bsd/openssl/dist/crypto/perlasm/sparcv9

Deleted Added

sdiff udiff text old ( 1.1.1.1 ) new ( 1.1.1.2 )

full compact

sparcv9_modes.pl (1.1.1.1)	sparcv9_modes.pl (1.1.1.2)
1#!/usr/bin/env perl	1#! /usr/bin/env perl 2# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html
2	8
	9
3# Specific modes implementations for SPARC Architecture 2011. There 4# is T4 dependency though, an ASI value that is not specified in the 5# Architecture Manual. But as SPARC universe is rather monocultural, 6# we imply that processor capable of executing crypto instructions 7# can handle the ASI in question as well. This means that we ought to 8# keep eyes open when new processors emerge... 9# 10# As for above mentioned ASI. It's so called "block initializing 11# store" which cancels "read" in "read-update-write" on cache lines. 12# This is "cooperative" optimization, as it reduces overall pressure 13# on memory interface. Benefits can't be observed/quantified with 14# usual benchmarks, on the contrary you can notice that single-thread 15# performance for parallelizable modes is ~1.5% worse for largest 16# block sizes [though few percent better for not so long ones]. All 17# this based on suggestions from David Miller. 18	10# Specific modes implementations for SPARC Architecture 2011. There 11# is T4 dependency though, an ASI value that is not specified in the 12# Architecture Manual. But as SPARC universe is rather monocultural, 13# we imply that processor capable of executing crypto instructions 14# can handle the ASI in question as well. This means that we ought to 15# keep eyes open when new processors emerge... 16# 17# As for above mentioned ASI. It's so called "block initializing 18# store" which cancels "read" in "read-update-write" on cache lines. 19# This is "cooperative" optimization, as it reduces overall pressure 20# on memory interface. Benefits can't be observed/quantified with 21# usual benchmarks, on the contrary you can notice that single-thread 22# performance for parallelizable modes is ~1.5% worse for largest 23# block sizes [though few percent better for not so long ones]. All 24# this based on suggestions from David Miller. 25
	26$::bias="STACK_BIAS"; 27$::frame="STACK_FRAME"; 28$::size_t_cc="SIZE_T_CC"; 29
19sub asm_init { # to be called with @ARGV as argument 20 for (@_) { $::abibits=64 if (/\-m64/ \|\| /\-xarch\=v9/); } 21 if ($::abibits==64) { $::bias=2047; $::frame=192; $::size_t_cc="%xcc"; } 22 else { $::bias=0; $::frame=112; $::size_t_cc="%icc"; } 23} 24 25# unified interface 26my ($inp,$out,$len,$key,$ivec)=map("%i$_",(0..5)); --- 1355 unchanged lines hidden (view full) --- 1382 restore 1383.type ${alg}${bits}_t4_xts_${dir}crypt,#function 1384.size ${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt 1385___ 1386} 1387 1388# Purpose of these subroutines is to explicitly encode VIS instructions, 1389# so that one can compile the module without having to specify VIS	30sub asm_init { # to be called with @ARGV as argument 31 for (@_) { $::abibits=64 if (/\-m64/ \|\| /\-xarch\=v9/); } 32 if ($::abibits==64) { $::bias=2047; $::frame=192; $::size_t_cc="%xcc"; } 33 else { $::bias=0; $::frame=112; $::size_t_cc="%icc"; } 34} 35 36# unified interface 37my ($inp,$out,$len,$key,$ivec)=map("%i$_",(0..5)); --- 1355 unchanged lines hidden (view full) --- 1393 restore 1394.type ${alg}${bits}_t4_xts_${dir}crypt,#function 1395.size ${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt 1396___ 1397} 1398 1399# Purpose of these subroutines is to explicitly encode VIS instructions, 1400# so that one can compile the module without having to specify VIS
1390# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.	1401# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
1391# Idea is to reserve for option to produce "universal" binary and let 1392# programmer detect if current CPU is VIS capable at run-time. 1393sub unvis { 1394my ($mnemonic,$rs1,$rs2,$rd)=@_; 1395my ($ref,$opf); 1396my %visopf = ( "faligndata" => 0x048, 1397 "bshuffle" => 0x04c, 1398 "fnot2" => 0x066, --- 293 unchanged lines hidden ---	1402# Idea is to reserve for option to produce "universal" binary and let 1403# programmer detect if current CPU is VIS capable at run-time. 1404sub unvis { 1405my ($mnemonic,$rs1,$rs2,$rd)=@_; 1406my ($ref,$opf); 1407my %visopf = ( "faligndata" => 0x048, 1408 "bshuffle" => 0x04c, 1409 "fnot2" => 0x066, --- 293 unchanged lines hidden ---