Deleted Added
full compact
sparcv9_modes.pl (1.1.1.1) sparcv9_modes.pl (1.1.1.2)
1#!/usr/bin/env perl
1#! /usr/bin/env perl
2# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
2
8
9
3# Specific modes implementations for SPARC Architecture 2011. There
4# is T4 dependency though, an ASI value that is not specified in the
5# Architecture Manual. But as SPARC universe is rather monocultural,
6# we imply that processor capable of executing crypto instructions
7# can handle the ASI in question as well. This means that we ought to
8# keep eyes open when new processors emerge...
9#
10# As for above mentioned ASI. It's so called "block initializing
11# store" which cancels "read" in "read-update-write" on cache lines.
12# This is "cooperative" optimization, as it reduces overall pressure
13# on memory interface. Benefits can't be observed/quantified with
14# usual benchmarks, on the contrary you can notice that single-thread
15# performance for parallelizable modes is ~1.5% worse for largest
16# block sizes [though few percent better for not so long ones]. All
17# this based on suggestions from David Miller.
18
10# Specific modes implementations for SPARC Architecture 2011. There
11# is T4 dependency though, an ASI value that is not specified in the
12# Architecture Manual. But as SPARC universe is rather monocultural,
13# we imply that processor capable of executing crypto instructions
14# can handle the ASI in question as well. This means that we ought to
15# keep eyes open when new processors emerge...
16#
17# As for above mentioned ASI. It's so called "block initializing
18# store" which cancels "read" in "read-update-write" on cache lines.
19# This is "cooperative" optimization, as it reduces overall pressure
20# on memory interface. Benefits can't be observed/quantified with
21# usual benchmarks, on the contrary you can notice that single-thread
22# performance for parallelizable modes is ~1.5% worse for largest
23# block sizes [though few percent better for not so long ones]. All
24# this based on suggestions from David Miller.
25
26$::bias="STACK_BIAS";
27$::frame="STACK_FRAME";
28$::size_t_cc="SIZE_T_CC";
29
19sub asm_init { # to be called with @ARGV as argument
20 for (@_) { $::abibits=64 if (/\-m64/ || /\-xarch\=v9/); }
21 if ($::abibits==64) { $::bias=2047; $::frame=192; $::size_t_cc="%xcc"; }
22 else { $::bias=0; $::frame=112; $::size_t_cc="%icc"; }
23}
24
25# unified interface
26my ($inp,$out,$len,$key,$ivec)=map("%i$_",(0..5));

--- 1355 unchanged lines hidden (view full) ---

1382 restore
1383.type ${alg}${bits}_t4_xts_${dir}crypt,#function
1384.size ${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt
1385___
1386}
1387
1388# Purpose of these subroutines is to explicitly encode VIS instructions,
1389# so that one can compile the module without having to specify VIS
30sub asm_init { # to be called with @ARGV as argument
31 for (@_) { $::abibits=64 if (/\-m64/ || /\-xarch\=v9/); }
32 if ($::abibits==64) { $::bias=2047; $::frame=192; $::size_t_cc="%xcc"; }
33 else { $::bias=0; $::frame=112; $::size_t_cc="%icc"; }
34}
35
36# unified interface
37my ($inp,$out,$len,$key,$ivec)=map("%i$_",(0..5));

--- 1355 unchanged lines hidden (view full) ---

1393 restore
1394.type ${alg}${bits}_t4_xts_${dir}crypt,#function
1395.size ${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt
1396___
1397}
1398
1399# Purpose of these subroutines is to explicitly encode VIS instructions,
1400# so that one can compile the module without having to specify VIS
1390# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
1401# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
1391# Idea is to reserve for option to produce "universal" binary and let
1392# programmer detect if current CPU is VIS capable at run-time.
1393sub unvis {
1394my ($mnemonic,$rs1,$rs2,$rd)=@_;
1395my ($ref,$opf);
1396my %visopf = ( "faligndata" => 0x048,
1397 "bshuffle" => 0x04c,
1398 "fnot2" => 0x066,

--- 293 unchanged lines hidden ---
1402# Idea is to reserve for option to produce "universal" binary and let
1403# programmer detect if current CPU is VIS capable at run-time.
1404sub unvis {
1405my ($mnemonic,$rs1,$rs2,$rd)=@_;
1406my ($ref,$opf);
1407my %visopf = ( "faligndata" => 0x048,
1408 "bshuffle" => 0x04c,
1409 "fnot2" => 0x066,

--- 293 unchanged lines hidden ---