TestByteVect.java revision 11707:ad7af1afda7a
172017Scg/*
272017Scg * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
372017Scg * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
472017Scg *
572017Scg * This code is free software; you can redistribute it and/or modify it
672017Scg * under the terms of the GNU General Public License version 2 only, as
772017Scg * published by the Free Software Foundation.
872017Scg *
972017Scg * This code is distributed in the hope that it will be useful, but WITHOUT
1072017Scg * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1172017Scg * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1272017Scg * version 2 for more details (a copy is included in the LICENSE file that
1372017Scg * accompanied this code).
1472017Scg *
1572017Scg * You should have received a copy of the GNU General Public License version
1672017Scg * 2 along with this work; if not, write to the Free Software Foundation,
1772017Scg * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1872017Scg *
1972017Scg * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2072017Scg * or visit www.oracle.com if you need additional information or have any
2172017Scg * questions.
2272017Scg *
2372017Scg */
2472017Scg
2572017Scg/**
2672017Scg * @test
2772017Scg * @bug 6340864
2872455Scg * @summary Implement vectorization optimizations in hotspot-server
2972455Scg *
3072455Scg * @run main/othervm/timeout=400 -Xbatch -Xmx64m compiler.c2.cr6340864.TestByteVect
3172017Scg */
3272017Scg
3372017Scgpackage compiler.c2.cr6340864;
3472017Scg
35119287Simppublic class TestByteVect {
36119287Simp  private static final int ARRLEN = 997;
3772017Scg  private static final int ITERS  = 11000;
3872017Scg  private static final int ADD_INIT = 63;
3972017Scg  private static final int BIT_MASK = 0xB7;
4082180Scg  private static final int VALUE = 3;
4182180Scg  private static final int SHIFT = 8;
4284771Sorion
4372017Scg  public static void main(String args[]) {
4472017Scg    System.out.println("Testing Byte vectors");
4572017Scg    int errn = test();
4672017Scg    if (errn > 0) {
4772017Scg      System.err.println("FAILED: " + errn + " errors");
4872017Scg      System.exit(97);
4972017Scg    }
5072017Scg    System.out.println("PASSED");
5172017Scg  }
5272017Scg
5372017Scg  static int test() {
5472017Scg    byte[] a0 = new byte[ARRLEN];
5572017Scg    byte[] a1 = new byte[ARRLEN];
5672017Scg    byte[] a2 = new byte[ARRLEN];
5772017Scg    byte[] a3 = new byte[ARRLEN];
5872017Scg    byte[] a4 = new byte[ARRLEN];
5972017Scg    short[] p2 = new short[ARRLEN/2];
6072017Scg      int[] p4 = new   int[ARRLEN/4];
6172017Scg     long[] p8 = new  long[ARRLEN/8];
6272017Scg    // Initialize
6372017Scg    int gold_sum = 0;
6472017Scg    for (int i=0; i<ARRLEN; i++) {
6572017Scg      byte val = (byte)(ADD_INIT+i);
6672017Scg      gold_sum += val;
6772017Scg      a1[i] = val;
6874763Scg      a2[i] = (byte)VALUE;
6974763Scg      a3[i] = (byte)-VALUE;
7072017Scg      a4[i] = (byte)BIT_MASK;
7172455Scg    }
7272455Scg    System.out.println("Warmup");
7372455Scg    for (int i=0; i<ITERS; i++) {
7472017Scg      test_sum(a1);
7572017Scg      test_addc(a0, a1);
7672017Scg      test_addv(a0, a1, (byte)VALUE);
7772017Scg      test_adda(a0, a1, a2);
7872017Scg      test_subc(a0, a1);
7972017Scg      test_subv(a0, a1, (byte)VALUE);
8072017Scg      test_suba(a0, a1, a2);
8172017Scg
8272017Scg      test_mulc(a0, a1);
8372017Scg      test_mulv(a0, a1, (byte)VALUE);
8472017Scg      test_mula(a0, a1, a2);
8572017Scg      test_divc(a0, a1);
8672017Scg      test_divv(a0, a1, (byte)VALUE);
8772017Scg      test_diva(a0, a1, a2);
8872017Scg      test_mulc_n(a0, a1);
8972017Scg      test_mulv(a0, a1, (byte)-VALUE);
9084771Sorion      test_mula(a0, a1, a3);
9172017Scg      test_divc_n(a0, a1);
9272017Scg      test_divv(a0, a1, (byte)-VALUE);
9372017Scg      test_diva(a0, a1, a3);
9472017Scg
9572017Scg      test_andc(a0, a1);
9672017Scg      test_andv(a0, a1, (byte)BIT_MASK);
9772017Scg      test_anda(a0, a1, a4);
9872017Scg      test_orc(a0, a1);
9972017Scg      test_orv(a0, a1, (byte)BIT_MASK);
10072017Scg      test_ora(a0, a1, a4);
10172017Scg      test_xorc(a0, a1);
10272455Scg      test_xorv(a0, a1, (byte)BIT_MASK);
10372017Scg      test_xora(a0, a1, a4);
10472017Scg
10572017Scg      test_sllc(a0, a1);
10672017Scg      test_sllv(a0, a1, VALUE);
10772017Scg      test_srlc(a0, a1);
10872017Scg      test_srlv(a0, a1, VALUE);
10972017Scg      test_srac(a0, a1);
11072017Scg      test_srav(a0, a1, VALUE);
11172017Scg
11272017Scg      test_sllc_n(a0, a1);
11372017Scg      test_sllv(a0, a1, -VALUE);
11472017Scg      test_srlc_n(a0, a1);
11572017Scg      test_srlv(a0, a1, -VALUE);
11672017Scg      test_srac_n(a0, a1);
11772017Scg      test_srav(a0, a1, -VALUE);
11872017Scg
11972017Scg      test_sllc_o(a0, a1);
12072017Scg      test_sllv(a0, a1, SHIFT);
12172017Scg      test_srlc_o(a0, a1);
12272017Scg      test_srlv(a0, a1, SHIFT);
12372017Scg      test_srac_o(a0, a1);
12472017Scg      test_srav(a0, a1, SHIFT);
12572017Scg
12672017Scg      test_sllc_on(a0, a1);
12772017Scg      test_sllv(a0, a1, -SHIFT);
12872017Scg      test_srlc_on(a0, a1);
12972017Scg      test_srlv(a0, a1, -SHIFT);
13072017Scg      test_srac_on(a0, a1);
13172017Scg      test_srav(a0, a1, -SHIFT);
13272017Scg
13372017Scg      test_sllc_add(a0, a1);
13472017Scg      test_sllv_add(a0, a1, ADD_INIT);
13574763Scg      test_srlc_add(a0, a1);
13672017Scg      test_srlv_add(a0, a1, ADD_INIT);
13772017Scg      test_srac_add(a0, a1);
13872017Scg      test_srav_add(a0, a1, ADD_INIT);
13972017Scg
14072017Scg      test_sllc_and(a0, a1);
14172017Scg      test_sllv_and(a0, a1, BIT_MASK);
14272017Scg      test_srlc_and(a0, a1);
14372017Scg      test_srlv_and(a0, a1, BIT_MASK);
14472017Scg      test_srac_and(a0, a1);
14572017Scg      test_srav_and(a0, a1, BIT_MASK);
14672017Scg
14772017Scg      test_pack2(p2, a1);
14872017Scg      test_unpack2(a0, p2);
14972017Scg      test_pack2_swap(p2, a1);
15072017Scg      test_unpack2_swap(a0, p2);
15172017Scg      test_pack4(p4, a1);
15272017Scg      test_unpack4(a0, p4);
15372017Scg      test_pack4_swap(p4, a1);
15472017Scg      test_unpack4_swap(a0, p4);
15572017Scg      test_pack8(p8, a1);
15672017Scg      test_unpack8(a0, p8);
15772017Scg      test_pack8_swap(p8, a1);
15872017Scg      test_unpack8_swap(a0, p8);
15972017Scg    }
16072017Scg    // Test and verify results
16172017Scg    System.out.println("Verification");
16272017Scg    int errn = 0;
16372017Scg    {
16472017Scg      int sum = test_sum(a1);
16572017Scg      if (sum != gold_sum) {
16672017Scg        System.err.println("test_sum:  " + sum + " != " + gold_sum);
16772017Scg        errn++;
16872017Scg      }
16972017Scg
17072017Scg      test_addc(a0, a1);
17172017Scg      for (int i=0; i<ARRLEN; i++) {
17272017Scg        errn += verify("test_addc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
17372017Scg      }
17472017Scg      test_addv(a0, a1, (byte)VALUE);
17572017Scg      for (int i=0; i<ARRLEN; i++) {
17672017Scg        errn += verify("test_addv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
17772017Scg      }
17872017Scg      test_adda(a0, a1, a2);
17972017Scg      for (int i=0; i<ARRLEN; i++) {
18072017Scg        errn += verify("test_adda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)+VALUE));
18172017Scg      }
18272017Scg
18372017Scg      test_subc(a0, a1);
18472017Scg      for (int i=0; i<ARRLEN; i++) {
18572017Scg        errn += verify("test_subc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
18672017Scg      }
18772017Scg      test_subv(a0, a1, (byte)VALUE);
18872017Scg      for (int i=0; i<ARRLEN; i++) {
18972017Scg        errn += verify("test_subv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
19072017Scg      }
19172017Scg      test_suba(a0, a1, a2);
19272017Scg      for (int i=0; i<ARRLEN; i++) {
19372017Scg        errn += verify("test_suba: ", i, a0[i], (byte)((byte)(ADD_INIT+i)-VALUE));
19472017Scg      }
19572017Scg
19672017Scg      test_mulc(a0, a1);
19772017Scg      for (int i=0; i<ARRLEN; i++) {
19872017Scg        errn += verify("test_mulc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
19972017Scg      }
20072017Scg      test_mulv(a0, a1, (byte)VALUE);
20172017Scg      for (int i=0; i<ARRLEN; i++) {
20272017Scg        errn += verify("test_mulv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
20372455Scg      }
20472017Scg      test_mula(a0, a1, a2);
20572017Scg      for (int i=0; i<ARRLEN; i++) {
20672017Scg        errn += verify("test_mula: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*VALUE));
20772017Scg      }
20872017Scg
20972017Scg      test_divc(a0, a1);
21072017Scg      for (int i=0; i<ARRLEN; i++) {
21172017Scg        errn += verify("test_divc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
21272017Scg      }
21372017Scg      test_divv(a0, a1, (byte)VALUE);
21472017Scg      for (int i=0; i<ARRLEN; i++) {
21572017Scg        errn += verify("test_divv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
21672017Scg      }
21772017Scg      test_diva(a0, a1, a2);
21872017Scg      for (int i=0; i<ARRLEN; i++) {
21972017Scg        errn += verify("test_diva: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/VALUE));
22072017Scg      }
22172017Scg
22272017Scg      test_mulc_n(a0, a1);
22372017Scg      for (int i=0; i<ARRLEN; i++) {
22472017Scg        errn += verify("test_mulc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
22572017Scg      }
22672017Scg      test_mulv(a0, a1, (byte)-VALUE);
22772017Scg      for (int i=0; i<ARRLEN; i++) {
22872455Scg        errn += verify("test_mulv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
22972017Scg      }
23072017Scg      test_mula(a0, a1, a3);
23172017Scg      for (int i=0; i<ARRLEN; i++) {
23272017Scg        errn += verify("test_mula_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)*(-VALUE)));
23372017Scg      }
23472017Scg
23572017Scg      test_divc_n(a0, a1);
23672455Scg      for (int i=0; i<ARRLEN; i++) {
23772017Scg        errn += verify("test_divc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
23872017Scg      }
23972455Scg      test_divv(a0, a1, (byte)-VALUE);
24072017Scg      for (int i=0; i<ARRLEN; i++) {
24172017Scg        errn += verify("test_divv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
24272017Scg      }
24372017Scg      test_diva(a0, a1, a3);
24472017Scg      for (int i=0; i<ARRLEN; i++) {
24572017Scg        errn += verify("test_diva_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)/(-VALUE)));
24672017Scg      }
24772017Scg
24872017Scg      test_andc(a0, a1);
24972017Scg      for (int i=0; i<ARRLEN; i++) {
25072017Scg        errn += verify("test_andc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
25172017Scg      }
25272455Scg      test_andv(a0, a1, (byte)BIT_MASK);
25372017Scg      for (int i=0; i<ARRLEN; i++) {
25472017Scg        errn += verify("test_andv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
25572017Scg      }
25672017Scg      test_anda(a0, a1, a4);
25772455Scg      for (int i=0; i<ARRLEN; i++) {
25872017Scg        errn += verify("test_anda: ", i, a0[i], (byte)((byte)(ADD_INIT+i)&BIT_MASK));
25972017Scg      }
26072017Scg
26172017Scg      test_orc(a0, a1);
26272455Scg      for (int i=0; i<ARRLEN; i++) {
26372455Scg        errn += verify("test_orc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
26472017Scg      }
26572017Scg      test_orv(a0, a1, (byte)BIT_MASK);
26672017Scg      for (int i=0; i<ARRLEN; i++) {
26772017Scg        errn += verify("test_orv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
26872017Scg      }
26972017Scg      test_ora(a0, a1, a4);
27072017Scg      for (int i=0; i<ARRLEN; i++) {
27172017Scg        errn += verify("test_ora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)|BIT_MASK));
27272017Scg      }
27372017Scg
27472017Scg      test_xorc(a0, a1);
27572017Scg      for (int i=0; i<ARRLEN; i++) {
27672017Scg        errn += verify("test_xorc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
27772455Scg      }
27872455Scg      test_xorv(a0, a1, (byte)BIT_MASK);
27972017Scg      for (int i=0; i<ARRLEN; i++) {
28072017Scg        errn += verify("test_xorv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
28172017Scg      }
28272017Scg      test_xora(a0, a1, a4);
28372017Scg      for (int i=0; i<ARRLEN; i++) {
28472017Scg        errn += verify("test_xora: ", i, a0[i], (byte)((byte)(ADD_INIT+i)^BIT_MASK));
28572017Scg      }
28672455Scg
28772017Scg      test_sllc(a0, a1);
28872017Scg      for (int i=0; i<ARRLEN; i++) {
28972017Scg        errn += verify("test_sllc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
29072017Scg      }
29172017Scg      test_sllv(a0, a1, VALUE);
29272455Scg      for (int i=0; i<ARRLEN; i++) {
29372017Scg        errn += verify("test_sllv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<VALUE));
29472455Scg      }
29572017Scg
29672017Scg      test_srlc(a0, a1);
29772017Scg      for (int i=0; i<ARRLEN; i++) {
29872017Scg        errn += verify("test_srlc: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
29972017Scg      }
30072017Scg      test_srlv(a0, a1, VALUE);
30172017Scg      for (int i=0; i<ARRLEN; i++) {
30272017Scg        errn += verify("test_srlv: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>VALUE));
30372017Scg      }
30472017Scg
30572017Scg      test_srac(a0, a1);
30672017Scg      for (int i=0; i<ARRLEN; i++) {
30772017Scg        errn += verify("test_srac: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
30872017Scg      }
30972017Scg      test_srav(a0, a1, VALUE);
31072017Scg      for (int i=0; i<ARRLEN; i++) {
31174763Scg        errn += verify("test_srav: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>VALUE));
31272017Scg      }
31372017Scg
31472017Scg      test_sllc_n(a0, a1);
31572017Scg      for (int i=0; i<ARRLEN; i++) {
31672017Scg        errn += verify("test_sllc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
31784771Sorion      }
31872017Scg      test_sllv(a0, a1, -VALUE);
31972017Scg      for (int i=0; i<ARRLEN; i++) {
32072017Scg        errn += verify("test_sllv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-VALUE)));
32172017Scg      }
32272017Scg
32372017Scg      test_srlc_n(a0, a1);
32472017Scg      for (int i=0; i<ARRLEN; i++) {
32572017Scg        errn += verify("test_srlc_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
32672455Scg      }
32772017Scg      test_srlv(a0, a1, -VALUE);
32872017Scg      for (int i=0; i<ARRLEN; i++) {
32972017Scg        errn += verify("test_srlv_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-VALUE)));
33072017Scg      }
33172017Scg
33272017Scg      test_srac_n(a0, a1);
33372017Scg      for (int i=0; i<ARRLEN; i++) {
33472017Scg        errn += verify("test_srac_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
33572017Scg      }
33672017Scg      test_srav(a0, a1, -VALUE);
33772017Scg      for (int i=0; i<ARRLEN; i++) {
33872017Scg        errn += verify("test_srav_n: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-VALUE)));
33972017Scg      }
34072017Scg
34184771Sorion      test_sllc_o(a0, a1);
34272455Scg      for (int i=0; i<ARRLEN; i++) {
34372017Scg        errn += verify("test_sllc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
34472017Scg      }
34572017Scg      test_sllv(a0, a1, SHIFT);
34672017Scg      for (int i=0; i<ARRLEN; i++) {
34772017Scg        errn += verify("test_sllv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<SHIFT));
34884771Sorion      }
34972455Scg
35072017Scg      test_srlc_o(a0, a1);
35172017Scg      for (int i=0; i<ARRLEN; i++) {
35272017Scg        errn += verify("test_srlc_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
35372017Scg      }
35484771Sorion      test_srlv(a0, a1, SHIFT);
35572017Scg      for (int i=0; i<ARRLEN; i++) {
35682837Sorion        errn += verify("test_srlv_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>SHIFT));
35772017Scg      }
35872017Scg
35972017Scg      test_srac_o(a0, a1);
36072017Scg      for (int i=0; i<ARRLEN; i++) {
36172017Scg        errn += verify("test_srac_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
36272017Scg      }
36372017Scg      test_srav(a0, a1, SHIFT);
36472017Scg      for (int i=0; i<ARRLEN; i++) {
36572017Scg        errn += verify("test_srav_o: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>SHIFT));
36672017Scg      }
36772017Scg
36872017Scg      test_sllc_on(a0, a1);
36972017Scg      for (int i=0; i<ARRLEN; i++) {
37072017Scg        errn += verify("test_sllc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
37172017Scg      }
37272017Scg      test_sllv(a0, a1, -SHIFT);
37372017Scg      for (int i=0; i<ARRLEN; i++) {
37472017Scg        errn += verify("test_sllv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)<<(-SHIFT)));
37572017Scg      }
37672017Scg
37772017Scg      test_srlc_on(a0, a1);
37872017Scg      for (int i=0; i<ARRLEN; i++) {
37972017Scg        errn += verify("test_srlc_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
38072017Scg      }
38172017Scg      test_srlv(a0, a1, -SHIFT);
38272017Scg      for (int i=0; i<ARRLEN; i++) {
38372017Scg        errn += verify("test_srlv_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>>(-SHIFT)));
38472017Scg      }
38572017Scg
38672017Scg      test_srac_on(a0, a1);
38772455Scg      for (int i=0; i<ARRLEN; i++) {
38872017Scg        errn += verify("test_srac_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
38972017Scg      }
39072017Scg      test_srav(a0, a1, -SHIFT);
39172017Scg      for (int i=0; i<ARRLEN; i++) {
39272017Scg        errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
39372017Scg      }
39472017Scg
39572017Scg      test_sllc_add(a0, a1);
39672017Scg      for (int i=0; i<ARRLEN; i++) {
39772017Scg        errn += verify("test_sllc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
39872017Scg      }
39972017Scg      test_sllv_add(a0, a1, ADD_INIT);
40072017Scg      for (int i=0; i<ARRLEN; i++) {
40172017Scg        errn += verify("test_sllv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
40272017Scg      }
40372017Scg
40472017Scg      test_srlc_add(a0, a1);
40572017Scg      for (int i=0; i<ARRLEN; i++) {
40672017Scg        errn += verify("test_srlc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
40772017Scg      }
40872017Scg      test_srlv_add(a0, a1, ADD_INIT);
40972017Scg      for (int i=0; i<ARRLEN; i++) {
41072017Scg        errn += verify("test_srlv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
41172017Scg      }
41272017Scg
41372017Scg      test_srac_add(a0, a1);
41472017Scg      for (int i=0; i<ARRLEN; i++) {
41572017Scg        errn += verify("test_srac_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
41672017Scg      }
41772017Scg      test_srav_add(a0, a1, ADD_INIT);
41872017Scg      for (int i=0; i<ARRLEN; i++) {
41972017Scg        errn += verify("test_srav_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
42072017Scg      }
42172017Scg
42272017Scg      test_sllc_and(a0, a1);
42372017Scg      for (int i=0; i<ARRLEN; i++) {
42472017Scg        errn += verify("test_sllc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
42572017Scg      }
42672017Scg      test_sllv_and(a0, a1, BIT_MASK);
42772017Scg      for (int i=0; i<ARRLEN; i++) {
42872017Scg        errn += verify("test_sllv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
42972017Scg      }
43072017Scg
43172017Scg      test_srlc_and(a0, a1);
43272017Scg      for (int i=0; i<ARRLEN; i++) {
43372017Scg        errn += verify("test_srlc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
43472017Scg      }
43572017Scg      test_srlv_and(a0, a1, BIT_MASK);
43672017Scg      for (int i=0; i<ARRLEN; i++) {
43772017Scg        errn += verify("test_srlv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
43872017Scg      }
43974763Scg
44072017Scg      test_srac_and(a0, a1);
44172017Scg      for (int i=0; i<ARRLEN; i++) {
44272017Scg        errn += verify("test_srac_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
44372017Scg      }
44472017Scg      test_srav_and(a0, a1, BIT_MASK);
44572017Scg      for (int i=0; i<ARRLEN; i++) {
44672017Scg        errn += verify("test_srav_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
44772017Scg      }
44872017Scg
44972017Scg      test_pack2(p2, a1);
45072017Scg      for (int i=0; i<ARRLEN/2; i++) {
45172017Scg        errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
45272017Scg      }
45372017Scg      for (int i=0; i<ARRLEN; i++) {
45472017Scg        a0[i] = -1;
45572017Scg      }
45672017Scg      test_unpack2(a0, p2);
45772017Scg      for (int i=0; i<(ARRLEN&(-2)); i++) {
45872017Scg        errn += verify("test_unpack2: ", i, a0[i], (byte)(ADD_INIT+i));
45972017Scg      }
46072017Scg
46172017Scg      test_pack2_swap(p2, a1);
46272017Scg      for (int i=0; i<ARRLEN/2; i++) {
46372017Scg        errn += verify("test_pack2_swap: ", i, p2[i], (short)(((short)(ADD_INIT+2*i+1) & 0xFF) | ((short)(ADD_INIT+2*i) << 8)));
46472017Scg      }
46572017Scg      for (int i=0; i<ARRLEN; i++) {
46672017Scg        a0[i] = -1;
46772017Scg      }
46872455Scg      test_unpack2_swap(a0, p2);
46972017Scg      for (int i=0; i<(ARRLEN&(-2)); i++) {
47072017Scg        errn += verify("test_unpack2_swap: ", i, a0[i], (byte)(ADD_INIT+i));
47172455Scg      }
47272017Scg
47372455Scg      test_pack4(p4, a1);
47472017Scg      for (int i=0; i<ARRLEN/4; i++) {
47572017Scg        errn += verify("test_pack4: ", i, p4[i],  ((int)(ADD_INIT+4*i+0) & 0xFF) |
47672017Scg                                                 (((int)(ADD_INIT+4*i+1) & 0xFF) <<  8)  |
47772017Scg                                                 (((int)(ADD_INIT+4*i+2) & 0xFF) << 16)  |
47872017Scg                                                 (((int)(ADD_INIT+4*i+3) & 0xFF) << 24));
47972017Scg      }
48072017Scg      for (int i=0; i<ARRLEN; i++) {
48172017Scg        a0[i] = -1;
48272455Scg      }
48372017Scg      test_unpack4(a0, p4);
48472017Scg      for (int i=0; i<(ARRLEN&(-4)); i++) {
48572017Scg        errn += verify("test_unpack4: ", i, a0[i], (byte)(ADD_INIT+i));
48672017Scg      }
48772017Scg
48872017Scg      test_pack4_swap(p4, a1);
48972017Scg      for (int i=0; i<ARRLEN/4; i++) {
490111183Scognet        errn += verify("test_pack4_swap: ", i, p4[i],  ((int)(ADD_INIT+4*i+3) & 0xFF) |
49172455Scg                                                      (((int)(ADD_INIT+4*i+2) & 0xFF) <<  8)  |
49272017Scg                                                      (((int)(ADD_INIT+4*i+1) & 0xFF) << 16)  |
49372017Scg                                                      (((int)(ADD_INIT+4*i+0) & 0xFF) << 24));
49472017Scg      }
49572017Scg      for (int i=0; i<ARRLEN; i++) {
49672017Scg        a0[i] = -1;
49772017Scg      }
49872017Scg      test_unpack4_swap(a0, p4);
49972017Scg      for (int i=0; i<(ARRLEN&(-4)); i++) {
50072017Scg        errn += verify("test_unpack4_swap: ", i, a0[i], (byte)(ADD_INIT+i));
50172017Scg      }
50272017Scg
50372017Scg      test_pack8(p8, a1);
50472017Scg      for (int i=0; i<ARRLEN/8; i++) {
50572017Scg        errn += verify("test_pack8: ", i, p8[i],  ((long)(ADD_INIT+8*i+0) & 0xFFl) |
50672017Scg                                                 (((long)(ADD_INIT+8*i+1) & 0xFFl) <<  8)  |
50772017Scg                                                 (((long)(ADD_INIT+8*i+2) & 0xFFl) << 16)  |
50872455Scg                                                 (((long)(ADD_INIT+8*i+3) & 0xFFl) << 24)  |
50972017Scg                                                 (((long)(ADD_INIT+8*i+4) & 0xFFl) << 32)  |
51072017Scg                                                 (((long)(ADD_INIT+8*i+5) & 0xFFl) << 40)  |
51172017Scg                                                 (((long)(ADD_INIT+8*i+6) & 0xFFl) << 48)  |
51272017Scg                                                 (((long)(ADD_INIT+8*i+7) & 0xFFl) << 56));
51372017Scg      }
51472017Scg      for (int i=0; i<ARRLEN; i++) {
51572017Scg        a0[i] = -1;
51672017Scg      }
51772017Scg      test_unpack8(a0, p8);
51872017Scg      for (int i=0; i<(ARRLEN&(-8)); i++) {
51972017Scg        errn += verify("test_unpack8: ", i, a0[i], (byte)(ADD_INIT+i));
52072017Scg      }
52172017Scg
52272455Scg      test_pack8_swap(p8, a1);
52372017Scg      for (int i=0; i<ARRLEN/8; i++) {
52472017Scg        errn += verify("test_pack8_swap: ", i, p8[i],  ((long)(ADD_INIT+8*i+7) & 0xFFl) |
52572017Scg                                                      (((long)(ADD_INIT+8*i+6) & 0xFFl) <<  8)  |
52672455Scg                                                      (((long)(ADD_INIT+8*i+5) & 0xFFl) << 16)  |
52772017Scg                                                      (((long)(ADD_INIT+8*i+4) & 0xFFl) << 24)  |
52872017Scg                                                      (((long)(ADD_INIT+8*i+3) & 0xFFl) << 32)  |
52972017Scg                                                      (((long)(ADD_INIT+8*i+2) & 0xFFl) << 40)  |
53072017Scg                                                      (((long)(ADD_INIT+8*i+1) & 0xFFl) << 48)  |
53172455Scg                                                      (((long)(ADD_INIT+8*i+0) & 0xFFl) << 56));
53272017Scg      }
53372455Scg      for (int i=0; i<ARRLEN; i++) {
53472455Scg        a0[i] = -1;
53572455Scg      }
53672455Scg      test_unpack8_swap(a0, p8);
53772455Scg      for (int i=0; i<(ARRLEN&(-8)); i++) {
53872017Scg        errn += verify("test_unpack8_swap: ", i, a0[i], (byte)(ADD_INIT+i));
53972455Scg      }
54072455Scg
54172455Scg    }
54272455Scg
54372017Scg    if (errn > 0)
54472017Scg      return errn;
54572455Scg
54672455Scg    System.out.println("Time");
54772017Scg    long start, end;
54872017Scg
54972017Scg    start = System.currentTimeMillis();
55072017Scg    for (int i=0; i<ITERS; i++) {
55172017Scg      test_sum(a1);
55272017Scg    }
55372017Scg    end = System.currentTimeMillis();
55472017Scg    System.out.println("test_sum: " + (end - start));
55572017Scg
55672017Scg    start = System.currentTimeMillis();
55772017Scg    for (int i=0; i<ITERS; i++) {
55872017Scg      test_addc(a0, a1);
55972017Scg    }
56072455Scg    end = System.currentTimeMillis();
56172017Scg    System.out.println("test_addc: " + (end - start));
56272017Scg    start = System.currentTimeMillis();
56372017Scg    for (int i=0; i<ITERS; i++) {
56472017Scg      test_addv(a0, a1, (byte)VALUE);
56572017Scg    }
56672017Scg    end = System.currentTimeMillis();
56772017Scg    System.out.println("test_addv: " + (end - start));
56872017Scg    start = System.currentTimeMillis();
56972017Scg    for (int i=0; i<ITERS; i++) {
57072455Scg      test_adda(a0, a1, a2);
57172017Scg    }
57272017Scg    end = System.currentTimeMillis();
57372455Scg    System.out.println("test_adda: " + (end - start));
57472017Scg
57572455Scg    start = System.currentTimeMillis();
57672455Scg    for (int i=0; i<ITERS; i++) {
57772017Scg      test_subc(a0, a1);
57872017Scg    }
57972455Scg    end = System.currentTimeMillis();
58072017Scg    System.out.println("test_subc: " + (end - start));
58172017Scg    start = System.currentTimeMillis();
58272017Scg    for (int i=0; i<ITERS; i++) {
58372017Scg      test_subv(a0, a1, (byte)VALUE);
58472455Scg    }
58572455Scg    end = System.currentTimeMillis();
58672455Scg    System.out.println("test_subv: " + (end - start));
58772017Scg    start = System.currentTimeMillis();
58872017Scg    for (int i=0; i<ITERS; i++) {
58972017Scg      test_suba(a0, a1, a2);
59072017Scg    }
59172017Scg    end = System.currentTimeMillis();
59272017Scg    System.out.println("test_suba: " + (end - start));
59372455Scg
59472017Scg    start = System.currentTimeMillis();
59572017Scg    for (int i=0; i<ITERS; i++) {
59672017Scg      test_mulc(a0, a1);
59772017Scg    }
59872017Scg    end = System.currentTimeMillis();
59972017Scg    System.out.println("test_mulc: " + (end - start));
60072017Scg    start = System.currentTimeMillis();
60172017Scg    for (int i=0; i<ITERS; i++) {
60272017Scg      test_mulv(a0, a1, (byte)VALUE);
60372017Scg    }
60472017Scg    end = System.currentTimeMillis();
60572455Scg    System.out.println("test_mulv: " + (end - start));
60672017Scg    start = System.currentTimeMillis();
60772017Scg    for (int i=0; i<ITERS; i++) {
60872017Scg      test_mula(a0, a1, a2);
60972455Scg    }
61072455Scg    end = System.currentTimeMillis();
61172017Scg    System.out.println("test_mula: " + (end - start));
61272017Scg
61372017Scg    start = System.currentTimeMillis();
61472017Scg    for (int i=0; i<ITERS; i++) {
61572017Scg      test_divc(a0, a1);
61672017Scg    }
61772017Scg    end = System.currentTimeMillis();
61872017Scg    System.out.println("test_divc: " + (end - start));
61972017Scg    start = System.currentTimeMillis();
62072017Scg    for (int i=0; i<ITERS; i++) {
62172017Scg      test_divv(a0, a1, (byte)VALUE);
62272017Scg    }
62372017Scg    end = System.currentTimeMillis();
62472017Scg    System.out.println("test_divv: " + (end - start));
62572017Scg    start = System.currentTimeMillis();
62672017Scg    for (int i=0; i<ITERS; i++) {
62772017Scg      test_diva(a0, a1, a2);
62872017Scg    }
62972017Scg    end = System.currentTimeMillis();
63072455Scg    System.out.println("test_diva: " + (end - start));
63172455Scg
63272455Scg    start = System.currentTimeMillis();
63372017Scg    for (int i=0; i<ITERS; i++) {
63472017Scg      test_mulc_n(a0, a1);
63572017Scg    }
63672455Scg    end = System.currentTimeMillis();
63772017Scg    System.out.println("test_mulc_n: " + (end - start));
63872017Scg    start = System.currentTimeMillis();
63972455Scg    for (int i=0; i<ITERS; i++) {
64072455Scg      test_mulv(a0, a1, (byte)-VALUE);
64172017Scg    }
64272017Scg    end = System.currentTimeMillis();
64372017Scg    System.out.println("test_mulv_n: " + (end - start));
644102302Sorion    start = System.currentTimeMillis();
64572017Scg    for (int i=0; i<ITERS; i++) {
64672017Scg      test_mula(a0, a1, a3);
64772017Scg    }
64872017Scg    end = System.currentTimeMillis();
64972455Scg    System.out.println("test_mula_n: " + (end - start));
65072017Scg
65172017Scg    start = System.currentTimeMillis();
65272017Scg    for (int i=0; i<ITERS; i++) {
65372017Scg      test_divc_n(a0, a1);
65472017Scg    }
65572017Scg    end = System.currentTimeMillis();
65672017Scg    System.out.println("test_divc_n: " + (end - start));
65772017Scg    start = System.currentTimeMillis();
65872017Scg    for (int i=0; i<ITERS; i++) {
65972017Scg      test_divv(a0, a1, (byte)-VALUE);
66072455Scg    }
66172017Scg    end = System.currentTimeMillis();
66272017Scg    System.out.println("test_divv_n: " + (end - start));
66372017Scg    start = System.currentTimeMillis();
66472017Scg    for (int i=0; i<ITERS; i++) {
66572017Scg      test_diva(a0, a1, a3);
66672017Scg    }
66772017Scg    end = System.currentTimeMillis();
66872017Scg    System.out.println("test_diva_n: " + (end - start));
66972017Scg
67072017Scg    start = System.currentTimeMillis();
67172017Scg    for (int i=0; i<ITERS; i++) {
67272017Scg      test_andc(a0, a1);
67372017Scg    }
67472017Scg    end = System.currentTimeMillis();
67572017Scg    System.out.println("test_andc: " + (end - start));
67672017Scg    start = System.currentTimeMillis();
67772017Scg    for (int i=0; i<ITERS; i++) {
67872017Scg      test_andv(a0, a1, (byte)BIT_MASK);
67972017Scg    }
68072017Scg    end = System.currentTimeMillis();
68172017Scg    System.out.println("test_andv: " + (end - start));
68272017Scg    start = System.currentTimeMillis();
68372017Scg    for (int i=0; i<ITERS; i++) {
68472017Scg      test_anda(a0, a1, a4);
68572017Scg    }
68672017Scg    end = System.currentTimeMillis();
68772017Scg    System.out.println("test_anda: " + (end - start));
68872017Scg
68972017Scg    start = System.currentTimeMillis();
69072017Scg    for (int i=0; i<ITERS; i++) {
69172017Scg      test_orc(a0, a1);
69272017Scg    }
69372017Scg    end = System.currentTimeMillis();
69472017Scg    System.out.println("test_orc: " + (end - start));
69572017Scg    start = System.currentTimeMillis();
69672017Scg    for (int i=0; i<ITERS; i++) {
69772017Scg      test_orv(a0, a1, (byte)BIT_MASK);
69872017Scg    }
69972017Scg    end = System.currentTimeMillis();
70072017Scg    System.out.println("test_orv: " + (end - start));
70172017Scg    start = System.currentTimeMillis();
70272017Scg    for (int i=0; i<ITERS; i++) {
70372017Scg      test_ora(a0, a1, a4);
70472017Scg    }
70572017Scg    end = System.currentTimeMillis();
70672017Scg    System.out.println("test_ora: " + (end - start));
70772017Scg
70872017Scg    start = System.currentTimeMillis();
70972017Scg    for (int i=0; i<ITERS; i++) {
71072017Scg      test_xorc(a0, a1);
71172017Scg    }
71272017Scg    end = System.currentTimeMillis();
71372017Scg    System.out.println("test_xorc: " + (end - start));
71472455Scg    start = System.currentTimeMillis();
71572017Scg    for (int i=0; i<ITERS; i++) {
71672455Scg      test_xorv(a0, a1, (byte)BIT_MASK);
71772455Scg    }
71872017Scg    end = System.currentTimeMillis();
71972017Scg    System.out.println("test_xorv: " + (end - start));
72072017Scg    start = System.currentTimeMillis();
72172017Scg    for (int i=0; i<ITERS; i++) {
72272017Scg      test_xora(a0, a1, a4);
72372017Scg    }
72472017Scg    end = System.currentTimeMillis();
72572017Scg    System.out.println("test_xora: " + (end - start));
72672017Scg
72772017Scg    start = System.currentTimeMillis();
72872017Scg    for (int i=0; i<ITERS; i++) {
72972017Scg      test_sllc(a0, a1);
73072017Scg    }
73172017Scg    end = System.currentTimeMillis();
73272017Scg    System.out.println("test_sllc: " + (end - start));
73372017Scg    start = System.currentTimeMillis();
73472017Scg    for (int i=0; i<ITERS; i++) {
73572017Scg      test_sllv(a0, a1, VALUE);
73672017Scg    }
73772017Scg    end = System.currentTimeMillis();
73872017Scg    System.out.println("test_sllv: " + (end - start));
73972017Scg
74072017Scg    start = System.currentTimeMillis();
74172017Scg    for (int i=0; i<ITERS; i++) {
74272017Scg      test_srlc(a0, a1);
74372017Scg    }
74472017Scg    end = System.currentTimeMillis();
74572455Scg    System.out.println("test_srlc: " + (end - start));
74672017Scg    start = System.currentTimeMillis();
74772017Scg    for (int i=0; i<ITERS; i++) {
74872017Scg      test_srlv(a0, a1, VALUE);
74972017Scg    }
75072017Scg    end = System.currentTimeMillis();
75172017Scg    System.out.println("test_srlv: " + (end - start));
75272017Scg
75372017Scg    start = System.currentTimeMillis();
75472017Scg    for (int i=0; i<ITERS; i++) {
75572017Scg      test_srac(a0, a1);
75678564Sgreid    }
75772017Scg    end = System.currentTimeMillis();
75872017Scg    System.out.println("test_srac: " + (end - start));
75972017Scg    start = System.currentTimeMillis();
76072017Scg    for (int i=0; i<ITERS; i++) {
76172017Scg      test_srav(a0, a1, VALUE);
76272017Scg    }
76372017Scg    end = System.currentTimeMillis();
76472017Scg    System.out.println("test_srav: " + (end - start));
76572017Scg
76672017Scg    start = System.currentTimeMillis();
76772017Scg    for (int i=0; i<ITERS; i++) {
76873770Scg      test_sllc_n(a0, a1);
76972455Scg    }
77072455Scg    end = System.currentTimeMillis();
77172455Scg    System.out.println("test_sllc_n: " + (end - start));
77272455Scg    start = System.currentTimeMillis();
77372455Scg    for (int i=0; i<ITERS; i++) {
77472455Scg      test_sllv(a0, a1, -VALUE);
77572455Scg    }
77675702Sorion    end = System.currentTimeMillis();
77775702Sorion    System.out.println("test_sllv_n: " + (end - start));
77875702Sorion
77975702Sorion    start = System.currentTimeMillis();
78075702Sorion    for (int i=0; i<ITERS; i++) {
78178362Scg      test_srlc_n(a0, a1);
78275702Sorion    }
78375702Sorion    end = System.currentTimeMillis();
78475702Sorion    System.out.println("test_srlc_n: " + (end - start));
78575702Sorion    start = System.currentTimeMillis();
78673770Scg    for (int i=0; i<ITERS; i++) {
78775702Sorion      test_srlv(a0, a1, -VALUE);
788119690Sjhb    }
78972017Scg    end = System.currentTimeMillis();
79072017Scg    System.out.println("test_srlv_n: " + (end - start));
79172017Scg
79272017Scg    start = System.currentTimeMillis();
79372017Scg    for (int i=0; i<ITERS; i++) {
79472017Scg      test_srac_n(a0, a1);
79572017Scg    }
79672017Scg    end = System.currentTimeMillis();
79772017Scg    System.out.println("test_srac_n: " + (end - start));
79872017Scg    start = System.currentTimeMillis();
79972017Scg    for (int i=0; i<ITERS; i++) {
80072017Scg      test_srav(a0, a1, -VALUE);
80172017Scg    }
80272017Scg    end = System.currentTimeMillis();
80372017Scg    System.out.println("test_srav_n: " + (end - start));
804119690Sjhb
80572455Scg    start = System.currentTimeMillis();
80672017Scg    for (int i=0; i<ITERS; i++) {
80772017Scg      test_sllc_o(a0, a1);
80872017Scg    }
80972017Scg    end = System.currentTimeMillis();
81072017Scg    System.out.println("test_sllc_o: " + (end - start));
81172017Scg    start = System.currentTimeMillis();
81272017Scg    for (int i=0; i<ITERS; i++) {
81372017Scg      test_sllv(a0, a1, SHIFT);
81472017Scg    }
81572017Scg    end = System.currentTimeMillis();
81672017Scg    System.out.println("test_sllv_o: " + (end - start));
81772017Scg
81872017Scg    start = System.currentTimeMillis();
81972017Scg    for (int i=0; i<ITERS; i++) {
82074763Scg      test_srlc_o(a0, a1);
82172017Scg    }
82272017Scg    end = System.currentTimeMillis();
82372017Scg    System.out.println("test_srlc_o: " + (end - start));
82472017Scg    start = System.currentTimeMillis();
82584771Sorion    for (int i=0; i<ITERS; i++) {
82684771Sorion      test_srlv(a0, a1, SHIFT);
82772017Scg    }
82872017Scg    end = System.currentTimeMillis();
82972017Scg    System.out.println("test_srlv_o: " + (end - start));
83072017Scg
83184771Sorion    start = System.currentTimeMillis();
83272017Scg    for (int i=0; i<ITERS; i++) {
833117126Sscottl      test_srac_o(a0, a1);
834117126Sscottl    }
83572017Scg    end = System.currentTimeMillis();
83672017Scg    System.out.println("test_srac_o: " + (end - start));
83772017Scg    start = System.currentTimeMillis();
83872017Scg    for (int i=0; i<ITERS; i++) {
83972017Scg      test_srav(a0, a1, SHIFT);
84072017Scg    }
84172017Scg    end = System.currentTimeMillis();
84272017Scg    System.out.println("test_srav_o: " + (end - start));
84372017Scg
84472017Scg    start = System.currentTimeMillis();
84572017Scg    for (int i=0; i<ITERS; i++) {
84672017Scg      test_sllc_on(a0, a1);
84772017Scg    }
84872017Scg    end = System.currentTimeMillis();
84972017Scg    System.out.println("test_sllc_on: " + (end - start));
85072017Scg    start = System.currentTimeMillis();
85172017Scg    for (int i=0; i<ITERS; i++) {
85272017Scg      test_sllv(a0, a1, -SHIFT);
85372017Scg    }
85472017Scg    end = System.currentTimeMillis();
85572017Scg    System.out.println("test_sllv_on: " + (end - start));
85672017Scg
85772017Scg    start = System.currentTimeMillis();
85872017Scg    for (int i=0; i<ITERS; i++) {
85972017Scg      test_srlc_on(a0, a1);
86072017Scg    }
86172017Scg    end = System.currentTimeMillis();
86272017Scg    System.out.println("test_srlc_on: " + (end - start));
86372017Scg    start = System.currentTimeMillis();
86472017Scg    for (int i=0; i<ITERS; i++) {
86572017Scg      test_srlv(a0, a1, -SHIFT);
86672017Scg    }
86772017Scg    end = System.currentTimeMillis();
86872017Scg    System.out.println("test_srlv_on: " + (end - start));
86972017Scg
87072017Scg    start = System.currentTimeMillis();
87172017Scg    for (int i=0; i<ITERS; i++) {
87272017Scg      test_srac_on(a0, a1);
87372017Scg    }
87472017Scg    end = System.currentTimeMillis();
87572017Scg    System.out.println("test_srac_on: " + (end - start));
87672017Scg    start = System.currentTimeMillis();
87772017Scg    for (int i=0; i<ITERS; i++) {
87872017Scg      test_srav(a0, a1, -SHIFT);
87972017Scg    }
88072017Scg    end = System.currentTimeMillis();
88172017Scg    System.out.println("test_srav_on: " + (end - start));
88272017Scg
88372017Scg    start = System.currentTimeMillis();
88472017Scg    for (int i=0; i<ITERS; i++) {
88572017Scg      test_sllc_add(a0, a1);
88672017Scg    }
88772017Scg    end = System.currentTimeMillis();
88872017Scg    System.out.println("test_sllc_add: " + (end - start));
88972017Scg    start = System.currentTimeMillis();
89072017Scg    for (int i=0; i<ITERS; i++) {
89172017Scg      test_sllv_add(a0, a1, ADD_INIT);
89272017Scg    }
89372017Scg    end = System.currentTimeMillis();
89472017Scg    System.out.println("test_sllv_add: " + (end - start));
89572017Scg
89672017Scg    start = System.currentTimeMillis();
89772017Scg    for (int i=0; i<ITERS; i++) {
89872017Scg      test_srlc_add(a0, a1);
89972017Scg    }
90072017Scg    end = System.currentTimeMillis();
90172017Scg    System.out.println("test_srlc_add: " + (end - start));
90272017Scg    start = System.currentTimeMillis();
90372017Scg    for (int i=0; i<ITERS; i++) {
90472017Scg      test_srlv_add(a0, a1, ADD_INIT);
90572017Scg    }
90672017Scg    end = System.currentTimeMillis();
90772017Scg    System.out.println("test_srlv_add: " + (end - start));
90872017Scg
90972017Scg    start = System.currentTimeMillis();
91072017Scg    for (int i=0; i<ITERS; i++) {
91172017Scg      test_srac_add(a0, a1);
91272017Scg    }
91372017Scg    end = System.currentTimeMillis();
91472017Scg    System.out.println("test_srac_add: " + (end - start));
91572017Scg    start = System.currentTimeMillis();
91672017Scg    for (int i=0; i<ITERS; i++) {
91772017Scg      test_srav_add(a0, a1, ADD_INIT);
91872455Scg    }
91972455Scg    end = System.currentTimeMillis();
92072017Scg    System.out.println("test_srav_add: " + (end - start));
92172455Scg
92272455Scg    start = System.currentTimeMillis();
92372017Scg    for (int i=0; i<ITERS; i++) {
92472017Scg      test_sllc_and(a0, a1);
92572017Scg    }
92672017Scg    end = System.currentTimeMillis();
92772017Scg    System.out.println("test_sllc_and: " + (end - start));
92872017Scg    start = System.currentTimeMillis();
92972017Scg    for (int i=0; i<ITERS; i++) {
93072017Scg      test_sllv_and(a0, a1, BIT_MASK);
93172017Scg    }
93272017Scg    end = System.currentTimeMillis();
93372017Scg    System.out.println("test_sllv_and: " + (end - start));
93472455Scg
93572017Scg    start = System.currentTimeMillis();
93672455Scg    for (int i=0; i<ITERS; i++) {
93772455Scg      test_srlc_and(a0, a1);
93872455Scg    }
93972455Scg    end = System.currentTimeMillis();
94072017Scg    System.out.println("test_srlc_and: " + (end - start));
94172017Scg    start = System.currentTimeMillis();
94272017Scg    for (int i=0; i<ITERS; i++) {
94372017Scg      test_srlv_and(a0, a1, BIT_MASK);
94472017Scg    }
94572017Scg    end = System.currentTimeMillis();
94672017Scg    System.out.println("test_srlv_and: " + (end - start));
94772017Scg
94872455Scg    start = System.currentTimeMillis();
94972455Scg    for (int i=0; i<ITERS; i++) {
95072455Scg      test_srac_and(a0, a1);
95172455Scg    }
95272455Scg    end = System.currentTimeMillis();
95372455Scg    System.out.println("test_srac_and: " + (end - start));
95472455Scg    start = System.currentTimeMillis();
95572455Scg    for (int i=0; i<ITERS; i++) {
95672455Scg      test_srav_and(a0, a1, BIT_MASK);
95772455Scg    }
95872455Scg    end = System.currentTimeMillis();
95972017Scg    System.out.println("test_srav_and: " + (end - start));
96072017Scg
96172017Scg    start = System.currentTimeMillis();
96272017Scg    for (int i=0; i<ITERS; i++) {
96372017Scg      test_pack2(p2, a1);
96472017Scg    }
96572017Scg    end = System.currentTimeMillis();
96672017Scg    System.out.println("test_pack2: " + (end - start));
96772017Scg    start = System.currentTimeMillis();
96872017Scg    for (int i=0; i<ITERS; i++) {
96972017Scg      test_unpack2(a0, p2);
97072017Scg    }
97172017Scg    end = System.currentTimeMillis();
97272017Scg    System.out.println("test_unpack2: " + (end - start));
97372017Scg    start = System.currentTimeMillis();
97472017Scg    for (int i=0; i<ITERS; i++) {
97582180Scg      test_pack2_swap(p2, a1);
97672017Scg    }
97772017Scg    end = System.currentTimeMillis();
97872017Scg    System.out.println("test_pack2_swap: " + (end - start));
97972017Scg    start = System.currentTimeMillis();
98072017Scg    for (int i=0; i<ITERS; i++) {
981      test_unpack2_swap(a0, p2);
982    }
983    end = System.currentTimeMillis();
984    System.out.println("test_unpack2_swap: " + (end - start));
985
986    start = System.currentTimeMillis();
987    for (int i=0; i<ITERS; i++) {
988      test_pack4(p4, a1);
989    }
990    end = System.currentTimeMillis();
991    System.out.println("test_pack4: " + (end - start));
992    start = System.currentTimeMillis();
993    for (int i=0; i<ITERS; i++) {
994      test_unpack4(a0, p4);
995    }
996    end = System.currentTimeMillis();
997    System.out.println("test_unpack4: " + (end - start));
998    start = System.currentTimeMillis();
999    for (int i=0; i<ITERS; i++) {
1000      test_pack4_swap(p4, a1);
1001    }
1002    end = System.currentTimeMillis();
1003    System.out.println("test_pack4_swap: " + (end - start));
1004    start = System.currentTimeMillis();
1005    for (int i=0; i<ITERS; i++) {
1006      test_unpack4_swap(a0, p4);
1007    }
1008    end = System.currentTimeMillis();
1009    System.out.println("test_unpack4_swap: " + (end - start));
1010
1011    start = System.currentTimeMillis();
1012    for (int i=0; i<ITERS; i++) {
1013      test_pack8(p8, a1);
1014    }
1015    end = System.currentTimeMillis();
1016    System.out.println("test_pack8: " + (end - start));
1017    start = System.currentTimeMillis();
1018    for (int i=0; i<ITERS; i++) {
1019      test_unpack8(a0, p8);
1020    }
1021    end = System.currentTimeMillis();
1022    System.out.println("test_unpack8: " + (end - start));
1023    start = System.currentTimeMillis();
1024    for (int i=0; i<ITERS; i++) {
1025      test_pack8_swap(p8, a1);
1026    }
1027    end = System.currentTimeMillis();
1028    System.out.println("test_pack8_swap: " + (end - start));
1029    start = System.currentTimeMillis();
1030    for (int i=0; i<ITERS; i++) {
1031      test_unpack8_swap(a0, p8);
1032    }
1033    end = System.currentTimeMillis();
1034    System.out.println("test_unpack8_swap: " + (end - start));
1035
1036    return errn;
1037  }
1038
1039  static int test_sum(byte[] a1) {
1040    int sum = 0;
1041    for (int i = 0; i < a1.length; i+=1) {
1042      sum += a1[i];
1043    }
1044    return sum;
1045  }
1046
1047  static void test_addc(byte[] a0, byte[] a1) {
1048    for (int i = 0; i < a0.length; i+=1) {
1049      a0[i] = (byte)(a1[i]+VALUE);
1050    }
1051  }
1052  static void test_addv(byte[] a0, byte[] a1, byte b) {
1053    for (int i = 0; i < a0.length; i+=1) {
1054      a0[i] = (byte)(a1[i]+b);
1055    }
1056  }
1057  static void test_adda(byte[] a0, byte[] a1, byte[] a2) {
1058    for (int i = 0; i < a0.length; i+=1) {
1059      a0[i] = (byte)(a1[i]+a2[i]);
1060    }
1061  }
1062
1063  static void test_subc(byte[] a0, byte[] a1) {
1064    for (int i = 0; i < a0.length; i+=1) {
1065      a0[i] = (byte)(a1[i]-VALUE);
1066    }
1067  }
1068  static void test_subv(byte[] a0, byte[] a1, byte b) {
1069    for (int i = 0; i < a0.length; i+=1) {
1070      a0[i] = (byte)(a1[i]-b);
1071    }
1072  }
1073  static void test_suba(byte[] a0, byte[] a1, byte[] a2) {
1074    for (int i = 0; i < a0.length; i+=1) {
1075      a0[i] = (byte)(a1[i]-a2[i]);
1076    }
1077  }
1078
1079  static void test_mulc(byte[] a0, byte[] a1) {
1080    for (int i = 0; i < a0.length; i+=1) {
1081      a0[i] = (byte)(a1[i]*VALUE);
1082    }
1083  }
1084  static void test_mulc_n(byte[] a0, byte[] a1) {
1085    for (int i = 0; i < a0.length; i+=1) {
1086      a0[i] = (byte)(a1[i]*(-VALUE));
1087    }
1088  }
1089  static void test_mulv(byte[] a0, byte[] a1, byte b) {
1090    for (int i = 0; i < a0.length; i+=1) {
1091      a0[i] = (byte)(a1[i]*b);
1092    }
1093  }
1094  static void test_mula(byte[] a0, byte[] a1, byte[] a2) {
1095    for (int i = 0; i < a0.length; i+=1) {
1096      a0[i] = (byte)(a1[i]*a2[i]);
1097    }
1098  }
1099
1100  static void test_divc(byte[] a0, byte[] a1) {
1101    for (int i = 0; i < a0.length; i+=1) {
1102      a0[i] = (byte)(a1[i]/VALUE);
1103    }
1104  }
1105  static void test_divc_n(byte[] a0, byte[] a1) {
1106    for (int i = 0; i < a0.length; i+=1) {
1107      a0[i] = (byte)(a1[i]/(-VALUE));
1108    }
1109  }
1110  static void test_divv(byte[] a0, byte[] a1, byte b) {
1111    for (int i = 0; i < a0.length; i+=1) {
1112      a0[i] = (byte)(a1[i]/b);
1113    }
1114  }
1115  static void test_diva(byte[] a0, byte[] a1, byte[] a2) {
1116    for (int i = 0; i < a0.length; i+=1) {
1117      a0[i] = (byte)(a1[i]/a2[i]);
1118    }
1119  }
1120
1121  static void test_andc(byte[] a0, byte[] a1) {
1122    for (int i = 0; i < a0.length; i+=1) {
1123      a0[i] = (byte)(a1[i]&BIT_MASK);
1124    }
1125  }
1126  static void test_andv(byte[] a0, byte[] a1, byte b) {
1127    for (int i = 0; i < a0.length; i+=1) {
1128      a0[i] = (byte)(a1[i]&b);
1129    }
1130  }
1131  static void test_anda(byte[] a0, byte[] a1, byte[] a2) {
1132    for (int i = 0; i < a0.length; i+=1) {
1133      a0[i] = (byte)(a1[i]&a2[i]);
1134    }
1135  }
1136
1137  static void test_orc(byte[] a0, byte[] a1) {
1138    for (int i = 0; i < a0.length; i+=1) {
1139      a0[i] = (byte)(a1[i]|BIT_MASK);
1140    }
1141  }
1142  static void test_orv(byte[] a0, byte[] a1, byte b) {
1143    for (int i = 0; i < a0.length; i+=1) {
1144      a0[i] = (byte)(a1[i]|b);
1145    }
1146  }
1147  static void test_ora(byte[] a0, byte[] a1, byte[] a2) {
1148    for (int i = 0; i < a0.length; i+=1) {
1149      a0[i] = (byte)(a1[i]|a2[i]);
1150    }
1151  }
1152
1153  static void test_xorc(byte[] a0, byte[] a1) {
1154    for (int i = 0; i < a0.length; i+=1) {
1155      a0[i] = (byte)(a1[i]^BIT_MASK);
1156    }
1157  }
1158  static void test_xorv(byte[] a0, byte[] a1, byte b) {
1159    for (int i = 0; i < a0.length; i+=1) {
1160      a0[i] = (byte)(a1[i]^b);
1161    }
1162  }
1163  static void test_xora(byte[] a0, byte[] a1, byte[] a2) {
1164    for (int i = 0; i < a0.length; i+=1) {
1165      a0[i] = (byte)(a1[i]^a2[i]);
1166    }
1167  }
1168
1169  static void test_sllc(byte[] a0, byte[] a1) {
1170    for (int i = 0; i < a0.length; i+=1) {
1171      a0[i] = (byte)(a1[i]<<VALUE);
1172    }
1173  }
1174  static void test_sllc_n(byte[] a0, byte[] a1) {
1175    for (int i = 0; i < a0.length; i+=1) {
1176      a0[i] = (byte)(a1[i]<<(-VALUE));
1177    }
1178  }
1179  static void test_sllc_o(byte[] a0, byte[] a1) {
1180    for (int i = 0; i < a0.length; i+=1) {
1181      a0[i] = (byte)(a1[i]<<SHIFT);
1182    }
1183  }
1184  static void test_sllc_on(byte[] a0, byte[] a1) {
1185    for (int i = 0; i < a0.length; i+=1) {
1186      a0[i] = (byte)(a1[i]<<(-SHIFT));
1187    }
1188  }
1189  static void test_sllv(byte[] a0, byte[] a1, int b) {
1190    for (int i = 0; i < a0.length; i+=1) {
1191      a0[i] = (byte)(a1[i]<<b);
1192    }
1193  }
1194  static void test_sllc_add(byte[] a0, byte[] a1) {
1195    for (int i = 0; i < a0.length; i+=1) {
1196      a0[i] = (byte)((a1[i] + ADD_INIT)<<VALUE);
1197    }
1198  }
1199  static void test_sllv_add(byte[] a0, byte[] a1, int b) {
1200    for (int i = 0; i < a0.length; i+=1) {
1201      a0[i] = (byte)((a1[i] + b)<<VALUE);
1202    }
1203  }
1204  static void test_sllc_and(byte[] a0, byte[] a1) {
1205    for (int i = 0; i < a0.length; i+=1) {
1206      a0[i] = (byte)((a1[i] & BIT_MASK)<<VALUE);
1207    }
1208  }
1209  static void test_sllv_and(byte[] a0, byte[] a1, int b) {
1210    for (int i = 0; i < a0.length; i+=1) {
1211      a0[i] = (byte)((a1[i] & b)<<VALUE);
1212    }
1213  }
1214
1215  static void test_srlc(byte[] a0, byte[] a1) {
1216    for (int i = 0; i < a0.length; i+=1) {
1217      a0[i] = (byte)(a1[i]>>>VALUE);
1218    }
1219  }
1220  static void test_srlc_n(byte[] a0, byte[] a1) {
1221    for (int i = 0; i < a0.length; i+=1) {
1222      a0[i] = (byte)(a1[i]>>>(-VALUE));
1223    }
1224  }
1225  static void test_srlc_o(byte[] a0, byte[] a1) {
1226    for (int i = 0; i < a0.length; i+=1) {
1227      a0[i] = (byte)(a1[i]>>>SHIFT);
1228    }
1229  }
1230  static void test_srlc_on(byte[] a0, byte[] a1) {
1231    for (int i = 0; i < a0.length; i+=1) {
1232      a0[i] = (byte)(a1[i]>>>(-SHIFT));
1233    }
1234  }
1235  static void test_srlv(byte[] a0, byte[] a1, int b) {
1236    for (int i = 0; i < a0.length; i+=1) {
1237      a0[i] = (byte)(a1[i]>>>b);
1238    }
1239  }
1240  static void test_srlc_add(byte[] a0, byte[] a1) {
1241    for (int i = 0; i < a0.length; i+=1) {
1242      a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE);
1243    }
1244  }
1245  static void test_srlv_add(byte[] a0, byte[] a1, int b) {
1246    for (int i = 0; i < a0.length; i+=1) {
1247      a0[i] = (byte)((a1[i] + b)>>>VALUE);
1248    }
1249  }
1250  static void test_srlc_and(byte[] a0, byte[] a1) {
1251    for (int i = 0; i < a0.length; i+=1) {
1252      a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE);
1253    }
1254  }
1255  static void test_srlv_and(byte[] a0, byte[] a1, int b) {
1256    for (int i = 0; i < a0.length; i+=1) {
1257      a0[i] = (byte)((a1[i] & b)>>>VALUE);
1258    }
1259  }
1260
1261  static void test_srac(byte[] a0, byte[] a1) {
1262    for (int i = 0; i < a0.length; i+=1) {
1263      a0[i] = (byte)(a1[i]>>VALUE);
1264    }
1265  }
1266  static void test_srac_n(byte[] a0, byte[] a1) {
1267    for (int i = 0; i < a0.length; i+=1) {
1268      a0[i] = (byte)(a1[i]>>(-VALUE));
1269    }
1270  }
1271  static void test_srac_o(byte[] a0, byte[] a1) {
1272    for (int i = 0; i < a0.length; i+=1) {
1273      a0[i] = (byte)(a1[i]>>SHIFT);
1274    }
1275  }
1276  static void test_srac_on(byte[] a0, byte[] a1) {
1277    for (int i = 0; i < a0.length; i+=1) {
1278      a0[i] = (byte)(a1[i]>>(-SHIFT));
1279    }
1280  }
1281  static void test_srav(byte[] a0, byte[] a1, int b) {
1282    for (int i = 0; i < a0.length; i+=1) {
1283      a0[i] = (byte)(a1[i]>>b);
1284    }
1285  }
1286  static void test_srac_add(byte[] a0, byte[] a1) {
1287    for (int i = 0; i < a0.length; i+=1) {
1288      a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE);
1289    }
1290  }
1291  static void test_srav_add(byte[] a0, byte[] a1, int b) {
1292    for (int i = 0; i < a0.length; i+=1) {
1293      a0[i] = (byte)((a1[i] + b)>>VALUE);
1294    }
1295  }
1296  static void test_srac_and(byte[] a0, byte[] a1) {
1297    for (int i = 0; i < a0.length; i+=1) {
1298      a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE);
1299    }
1300  }
1301  static void test_srav_and(byte[] a0, byte[] a1, int b) {
1302    for (int i = 0; i < a0.length; i+=1) {
1303      a0[i] = (byte)((a1[i] & b)>>VALUE);
1304    }
1305  }
1306
1307  static void test_pack2(short[] p2, byte[] a1) {
1308    if (p2.length*2 > a1.length) return;
1309    for (int i = 0; i < p2.length; i+=1) {
1310      short l0 = (short)a1[i*2+0];
1311      short l1 = (short)a1[i*2+1];
1312      p2[i] = (short)((l1 << 8) | (l0 & 0xFF));
1313    }
1314  }
1315  static void test_unpack2(byte[] a0, short[] p2) {
1316    if (p2.length*2 > a0.length) return;
1317    for (int i = 0; i < p2.length; i+=1) {
1318      short l = p2[i];
1319      a0[i*2+0] = (byte)(l & 0xFF);
1320      a0[i*2+1] = (byte)(l >> 8);
1321    }
1322  }
1323  static void test_pack2_swap(short[] p2, byte[] a1) {
1324    if (p2.length*2 > a1.length) return;
1325    for (int i = 0; i < p2.length; i+=1) {
1326      short l0 = (short)a1[i*2+0];
1327      short l1 = (short)a1[i*2+1];
1328      p2[i] = (short)((l0 << 8) | (l1 & 0xFF));
1329    }
1330  }
1331  static void test_unpack2_swap(byte[] a0, short[] p2) {
1332    if (p2.length*2 > a0.length) return;
1333    for (int i = 0; i < p2.length; i+=1) {
1334      short l = p2[i];
1335      a0[i*2+0] = (byte)(l >> 8);
1336      a0[i*2+1] = (byte)(l & 0xFF);
1337    }
1338  }
1339
1340  static void test_pack4(int[] p4, byte[] a1) {
1341    if (p4.length*4 > a1.length) return;
1342    for (int i = 0; i < p4.length; i+=1) {
1343      int l0 = (int)a1[i*4+0];
1344      int l1 = (int)a1[i*4+1];
1345      int l2 = (int)a1[i*4+2];
1346      int l3 = (int)a1[i*4+3];
1347      p4[i] = (l0 & 0xFF) |
1348             ((l1 & 0xFF) <<  8) |
1349             ((l2 & 0xFF) << 16) |
1350             ((l3 & 0xFF) << 24);
1351    }
1352  }
1353  static void test_unpack4(byte[] a0, int[] p4) {
1354    if (p4.length*4 > a0.length) return;
1355    for (int i = 0; i < p4.length; i+=1) {
1356      int l = p4[i];
1357      a0[i*4+0] = (byte)(l & 0xFF);
1358      a0[i*4+1] = (byte)(l >>  8);
1359      a0[i*4+2] = (byte)(l >> 16);
1360      a0[i*4+3] = (byte)(l >> 24);
1361    }
1362  }
1363  static void test_pack4_swap(int[] p4, byte[] a1) {
1364    if (p4.length*4 > a1.length) return;
1365    for (int i = 0; i < p4.length; i+=1) {
1366      int l0 = (int)a1[i*4+0];
1367      int l1 = (int)a1[i*4+1];
1368      int l2 = (int)a1[i*4+2];
1369      int l3 = (int)a1[i*4+3];
1370      p4[i] = (l3 & 0xFF) |
1371             ((l2 & 0xFF) <<  8) |
1372             ((l1 & 0xFF) << 16) |
1373             ((l0 & 0xFF) << 24);
1374    }
1375  }
1376  static void test_unpack4_swap(byte[] a0, int[] p4) {
1377    if (p4.length*4 > a0.length) return;
1378    for (int i = 0; i < p4.length; i+=1) {
1379      int l = p4[i];
1380      a0[i*4+0] = (byte)(l >> 24);
1381      a0[i*4+1] = (byte)(l >> 16);
1382      a0[i*4+2] = (byte)(l >>  8);
1383      a0[i*4+3] = (byte)(l & 0xFF);
1384    }
1385  }
1386
1387  static void test_pack8(long[] p8, byte[] a1) {
1388    if (p8.length*8 > a1.length) return;
1389    for (int i = 0; i < p8.length; i+=1) {
1390      long l0 = (long)a1[i*8+0];
1391      long l1 = (long)a1[i*8+1];
1392      long l2 = (long)a1[i*8+2];
1393      long l3 = (long)a1[i*8+3];
1394      long l4 = (long)a1[i*8+4];
1395      long l5 = (long)a1[i*8+5];
1396      long l6 = (long)a1[i*8+6];
1397      long l7 = (long)a1[i*8+7];
1398      p8[i] = (l0 & 0xFFl) |
1399             ((l1 & 0xFFl) <<  8) |
1400             ((l2 & 0xFFl) << 16) |
1401             ((l3 & 0xFFl) << 24) |
1402             ((l4 & 0xFFl) << 32) |
1403             ((l5 & 0xFFl) << 40) |
1404             ((l6 & 0xFFl) << 48) |
1405             ((l7 & 0xFFl) << 56);
1406    }
1407  }
1408  static void test_unpack8(byte[] a0, long[] p8) {
1409    if (p8.length*8 > a0.length) return;
1410    for (int i = 0; i < p8.length; i+=1) {
1411      long l = p8[i];
1412      a0[i*8+0] = (byte)(l & 0xFFl);
1413      a0[i*8+1] = (byte)(l >>  8);
1414      a0[i*8+2] = (byte)(l >> 16);
1415      a0[i*8+3] = (byte)(l >> 24);
1416      a0[i*8+4] = (byte)(l >> 32);
1417      a0[i*8+5] = (byte)(l >> 40);
1418      a0[i*8+6] = (byte)(l >> 48);
1419      a0[i*8+7] = (byte)(l >> 56);
1420    }
1421  }
1422  static void test_pack8_swap(long[] p8, byte[] a1) {
1423    if (p8.length*8 > a1.length) return;
1424    for (int i = 0; i < p8.length; i+=1) {
1425      long l0 = (long)a1[i*8+0];
1426      long l1 = (long)a1[i*8+1];
1427      long l2 = (long)a1[i*8+2];
1428      long l3 = (long)a1[i*8+3];
1429      long l4 = (long)a1[i*8+4];
1430      long l5 = (long)a1[i*8+5];
1431      long l6 = (long)a1[i*8+6];
1432      long l7 = (long)a1[i*8+7];
1433      p8[i] = (l7 & 0xFFl) |
1434             ((l6 & 0xFFl) <<  8) |
1435             ((l5 & 0xFFl) << 16) |
1436             ((l4 & 0xFFl) << 24) |
1437             ((l3 & 0xFFl) << 32) |
1438             ((l2 & 0xFFl) << 40) |
1439             ((l1 & 0xFFl) << 48) |
1440             ((l0 & 0xFFl) << 56);
1441    }
1442  }
1443  static void test_unpack8_swap(byte[] a0, long[] p8) {
1444    if (p8.length*8 > a0.length) return;
1445    for (int i = 0; i < p8.length; i+=1) {
1446      long l = p8[i];
1447      a0[i*8+0] = (byte)(l >> 56);
1448      a0[i*8+1] = (byte)(l >> 48);
1449      a0[i*8+2] = (byte)(l >> 40);
1450      a0[i*8+3] = (byte)(l >> 32);
1451      a0[i*8+4] = (byte)(l >> 24);
1452      a0[i*8+5] = (byte)(l >> 16);
1453      a0[i*8+6] = (byte)(l >>  8);
1454      a0[i*8+7] = (byte)(l & 0xFFl);
1455    }
1456  }
1457
1458  static int verify(String text, int i, byte elem, byte val) {
1459    if (elem != val) {
1460      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1461      return 1;
1462    }
1463    return 0;
1464  }
1465
1466  static int verify(String text, int i, short elem, short val) {
1467    if (elem != val) {
1468      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
1469      return 1;
1470    }
1471    return 0;
1472  }
1473
1474  static int verify(String text, int i, int elem, int val) {
1475    if (elem != val) {
1476      System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val));
1477      return 1;
1478    }
1479    return 0;
1480  }
1481
1482  static int verify(String text, int i, long elem, long val) {
1483    if (elem != val) {
1484      System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val));
1485      return 1;
1486    }
1487    return 0;
1488  }
1489}
1490