1/*
2 * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23package org.graalvm.compiler.asm.amd64;
24
25import static jdk.vm.ci.amd64.AMD64.rax;
26import static jdk.vm.ci.amd64.AMD64.rcx;
27import static jdk.vm.ci.amd64.AMD64.rdx;
28import static jdk.vm.ci.amd64.AMD64.rsp;
29import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIncDec;
30import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmLoadAndClearUpper;
31import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmRegToRegMoveAll;
32
33import org.graalvm.compiler.asm.Label;
34import org.graalvm.compiler.core.common.NumUtil;
35import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
36
37import jdk.vm.ci.amd64.AMD64;
38import jdk.vm.ci.amd64.AMD64Kind;
39import jdk.vm.ci.code.Register;
40import jdk.vm.ci.code.TargetDescription;
41
42/**
43 * This class implements commonly used X86 code patterns.
44 */
45public class AMD64MacroAssembler extends AMD64Assembler {
46
47    public AMD64MacroAssembler(TargetDescription target) {
48        super(target);
49    }
50
51    public final void decrementq(Register reg, int value) {
52        if (value == Integer.MIN_VALUE) {
53            subq(reg, value);
54            return;
55        }
56        if (value < 0) {
57            incrementq(reg, -value);
58            return;
59        }
60        if (value == 0) {
61            return;
62        }
63        if (value == 1 && UseIncDec) {
64            decq(reg);
65        } else {
66            subq(reg, value);
67        }
68    }
69
70    public final void decrementq(AMD64Address dst, int value) {
71        if (value == Integer.MIN_VALUE) {
72            subq(dst, value);
73            return;
74        }
75        if (value < 0) {
76            incrementq(dst, -value);
77            return;
78        }
79        if (value == 0) {
80            return;
81        }
82        if (value == 1 && UseIncDec) {
83            decq(dst);
84        } else {
85            subq(dst, value);
86        }
87    }
88
89    public void incrementq(Register reg, int value) {
90        if (value == Integer.MIN_VALUE) {
91            addq(reg, value);
92            return;
93        }
94        if (value < 0) {
95            decrementq(reg, -value);
96            return;
97        }
98        if (value == 0) {
99            return;
100        }
101        if (value == 1 && UseIncDec) {
102            incq(reg);
103        } else {
104            addq(reg, value);
105        }
106    }
107
108    public final void incrementq(AMD64Address dst, int value) {
109        if (value == Integer.MIN_VALUE) {
110            addq(dst, value);
111            return;
112        }
113        if (value < 0) {
114            decrementq(dst, -value);
115            return;
116        }
117        if (value == 0) {
118            return;
119        }
120        if (value == 1 && UseIncDec) {
121            incq(dst);
122        } else {
123            addq(dst, value);
124        }
125    }
126
127    public final void movptr(Register dst, AMD64Address src) {
128        movq(dst, src);
129    }
130
131    public final void movptr(AMD64Address dst, Register src) {
132        movq(dst, src);
133    }
134
135    public final void movptr(AMD64Address dst, int src) {
136        movslq(dst, src);
137    }
138
139    public final void cmpptr(Register src1, Register src2) {
140        cmpq(src1, src2);
141    }
142
143    public final void cmpptr(Register src1, AMD64Address src2) {
144        cmpq(src1, src2);
145    }
146
147    public final void decrementl(Register reg) {
148        decrementl(reg, 1);
149    }
150
151    public final void decrementl(Register reg, int value) {
152        if (value == Integer.MIN_VALUE) {
153            subl(reg, value);
154            return;
155        }
156        if (value < 0) {
157            incrementl(reg, -value);
158            return;
159        }
160        if (value == 0) {
161            return;
162        }
163        if (value == 1 && UseIncDec) {
164            decl(reg);
165        } else {
166            subl(reg, value);
167        }
168    }
169
170    public final void decrementl(AMD64Address dst, int value) {
171        if (value == Integer.MIN_VALUE) {
172            subl(dst, value);
173            return;
174        }
175        if (value < 0) {
176            incrementl(dst, -value);
177            return;
178        }
179        if (value == 0) {
180            return;
181        }
182        if (value == 1 && UseIncDec) {
183            decl(dst);
184        } else {
185            subl(dst, value);
186        }
187    }
188
189    public final void incrementl(Register reg, int value) {
190        if (value == Integer.MIN_VALUE) {
191            addl(reg, value);
192            return;
193        }
194        if (value < 0) {
195            decrementl(reg, -value);
196            return;
197        }
198        if (value == 0) {
199            return;
200        }
201        if (value == 1 && UseIncDec) {
202            incl(reg);
203        } else {
204            addl(reg, value);
205        }
206    }
207
208    public final void incrementl(AMD64Address dst, int value) {
209        if (value == Integer.MIN_VALUE) {
210            addl(dst, value);
211            return;
212        }
213        if (value < 0) {
214            decrementl(dst, -value);
215            return;
216        }
217        if (value == 0) {
218            return;
219        }
220        if (value == 1 && UseIncDec) {
221            incl(dst);
222        } else {
223            addl(dst, value);
224        }
225    }
226
227    public void movflt(Register dst, Register src) {
228        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
229        if (UseXmmRegToRegMoveAll) {
230            movaps(dst, src);
231        } else {
232            movss(dst, src);
233        }
234    }
235
236    public void movflt(Register dst, AMD64Address src) {
237        assert dst.getRegisterCategory().equals(AMD64.XMM);
238        movss(dst, src);
239    }
240
241    public void movflt(AMD64Address dst, Register src) {
242        assert src.getRegisterCategory().equals(AMD64.XMM);
243        movss(dst, src);
244    }
245
246    public void movdbl(Register dst, Register src) {
247        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
248        if (UseXmmRegToRegMoveAll) {
249            movapd(dst, src);
250        } else {
251            movsd(dst, src);
252        }
253    }
254
255    public void movdbl(Register dst, AMD64Address src) {
256        assert dst.getRegisterCategory().equals(AMD64.XMM);
257        if (UseXmmLoadAndClearUpper) {
258            movsd(dst, src);
259        } else {
260            movlpd(dst, src);
261        }
262    }
263
264    public void movdbl(AMD64Address dst, Register src) {
265        assert src.getRegisterCategory().equals(AMD64.XMM);
266        movsd(dst, src);
267    }
268
269    /**
270     * Non-atomic write of a 64-bit constant to memory. Do not use if the address might be a
271     * volatile field!
272     */
273    public final void movlong(AMD64Address dst, long src) {
274        if (NumUtil.isInt(src)) {
275            AMD64MIOp.MOV.emit(this, OperandSize.QWORD, dst, (int) src);
276        } else {
277            AMD64Address high = new AMD64Address(dst.getBase(), dst.getIndex(), dst.getScale(), dst.getDisplacement() + 4);
278            movl(dst, (int) (src & 0xFFFFFFFF));
279            movl(high, (int) (src >> 32));
280        }
281
282    }
283
284    public final void flog(Register dest, Register value, boolean base10) {
285        if (base10) {
286            fldlg2();
287        } else {
288            fldln2();
289        }
290        AMD64Address tmp = trigPrologue(value);
291        fyl2x();
292        trigEpilogue(dest, tmp);
293    }
294
295    public final void fsin(Register dest, Register value) {
296        AMD64Address tmp = trigPrologue(value);
297        fsin();
298        trigEpilogue(dest, tmp);
299    }
300
301    public final void fcos(Register dest, Register value) {
302        AMD64Address tmp = trigPrologue(value);
303        fcos();
304        trigEpilogue(dest, tmp);
305    }
306
307    public final void ftan(Register dest, Register value) {
308        AMD64Address tmp = trigPrologue(value);
309        fptan();
310        fstp(0); // ftan pushes 1.0 in addition to the actual result, pop
311        trigEpilogue(dest, tmp);
312    }
313
314    public final void fpop() {
315        ffree(0);
316        fincstp();
317    }
318
319    private AMD64Address trigPrologue(Register value) {
320        assert value.getRegisterCategory().equals(AMD64.XMM);
321        AMD64Address tmp = new AMD64Address(AMD64.rsp);
322        subq(AMD64.rsp, AMD64Kind.DOUBLE.getSizeInBytes());
323        movdbl(tmp, value);
324        fldd(tmp);
325        return tmp;
326    }
327
328    private void trigEpilogue(Register dest, AMD64Address tmp) {
329        assert dest.getRegisterCategory().equals(AMD64.XMM);
330        fstpd(tmp);
331        movdbl(dest, tmp);
332        addq(AMD64.rsp, AMD64Kind.DOUBLE.getSizeInBytes());
333    }
334
335    // IndexOf for constant substrings with size >= 8 chars
336    // which don't need to be loaded through stack.
337    public void stringIndexofC8(Register str1, Register str2,
338                    Register cnt1, Register cnt2,
339                    int intCnt2, Register result,
340                    Register vec, Register tmp) {
341        // assert(UseSSE42Intrinsics, "SSE4.2 is required");
342
343        // This method uses pcmpestri inxtruction with bound registers
344        // inputs:
345        // xmm - substring
346        // rax - substring length (elements count)
347        // mem - scanned string
348        // rdx - string length (elements count)
349        // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
350        // outputs:
351        // rcx - matched index in string
352        assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri";
353
354        Label reloadSubstr = new Label();
355        Label scanToSubstr = new Label();
356        Label scanSubstr = new Label();
357        Label retFound = new Label();
358        Label retNotFound = new Label();
359        Label exit = new Label();
360        Label foundSubstr = new Label();
361        Label matchSubstrHead = new Label();
362        Label reloadStr = new Label();
363        Label foundCandidate = new Label();
364
365        // Note, inline_string_indexOf() generates checks:
366        // if (substr.count > string.count) return -1;
367        // if (substr.count == 0) return 0;
368        assert intCnt2 >= 8 : "this code isused only for cnt2 >= 8 chars";
369
370        // Load substring.
371        movdqu(vec, new AMD64Address(str2, 0));
372        movl(cnt2, intCnt2);
373        movq(result, str1); // string addr
374
375        if (intCnt2 > 8) {
376            jmpb(scanToSubstr);
377
378            // Reload substr for rescan, this code
379            // is executed only for large substrings (> 8 chars)
380            bind(reloadSubstr);
381            movdqu(vec, new AMD64Address(str2, 0));
382            negq(cnt2); // Jumped here with negative cnt2, convert to positive
383
384            bind(reloadStr);
385            // We came here after the beginning of the substring was
386            // matched but the rest of it was not so we need to search
387            // again. Start from the next element after the previous match.
388
389            // cnt2 is number of substring reminding elements and
390            // cnt1 is number of string reminding elements when cmp failed.
391            // Restored cnt1 = cnt1 - cnt2 + int_cnt2
392            subl(cnt1, cnt2);
393            addl(cnt1, intCnt2);
394            movl(cnt2, intCnt2); // Now restore cnt2
395
396            decrementl(cnt1, 1);     // Shift to next element
397            cmpl(cnt1, cnt2);
398            jccb(ConditionFlag.Negative, retNotFound);  // Left less then substring
399
400            addq(result, 2);
401
402        } // (int_cnt2 > 8)
403
404        // Scan string for start of substr in 16-byte vectors
405        bind(scanToSubstr);
406        pcmpestri(vec, new AMD64Address(result, 0), 0x0d);
407        jccb(ConditionFlag.Below, foundCandidate);   // CF == 1
408        subl(cnt1, 8);
409        jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string
410        cmpl(cnt1, cnt2);
411        jccb(ConditionFlag.Negative, retNotFound);  // Left less then substring
412        addq(result, 16);
413        jmpb(scanToSubstr);
414
415        // Found a potential substr
416        bind(foundCandidate);
417        // Matched whole vector if first element matched (tmp(rcx) == 0).
418        if (intCnt2 == 8) {
419            jccb(ConditionFlag.Overflow, retFound);    // OF == 1
420        } else { // int_cnt2 > 8
421            jccb(ConditionFlag.Overflow, foundSubstr);
422        }
423        // After pcmpestri tmp(rcx) contains matched element index
424        // Compute start addr of substr
425        leaq(result, new AMD64Address(result, tmp, Scale.Times2, 0));
426
427        // Make sure string is still long enough
428        subl(cnt1, tmp);
429        cmpl(cnt1, cnt2);
430        if (intCnt2 == 8) {
431            jccb(ConditionFlag.GreaterEqual, scanToSubstr);
432        } else { // int_cnt2 > 8
433            jccb(ConditionFlag.GreaterEqual, matchSubstrHead);
434        }
435        // Left less then substring.
436
437        bind(retNotFound);
438        movl(result, -1);
439        jmpb(exit);
440
441        if (intCnt2 > 8) {
442            // This code is optimized for the case when whole substring
443            // is matched if its head is matched.
444            bind(matchSubstrHead);
445            pcmpestri(vec, new AMD64Address(result, 0), 0x0d);
446            // Reload only string if does not match
447            jccb(ConditionFlag.NoOverflow, reloadStr); // OF == 0
448
449            Label contScanSubstr = new Label();
450            // Compare the rest of substring (> 8 chars).
451            bind(foundSubstr);
452            // First 8 chars are already matched.
453            negq(cnt2);
454            addq(cnt2, 8);
455
456            bind(scanSubstr);
457            subl(cnt1, 8);
458            cmpl(cnt2, -8); // Do not read beyond substring
459            jccb(ConditionFlag.LessEqual, contScanSubstr);
460            // Back-up strings to avoid reading beyond substring:
461            // cnt1 = cnt1 - cnt2 + 8
462            addl(cnt1, cnt2); // cnt2 is negative
463            addl(cnt1, 8);
464            movl(cnt2, 8);
465            negq(cnt2);
466            bind(contScanSubstr);
467            if (intCnt2 < 1024 * 1024 * 1024) {
468                movdqu(vec, new AMD64Address(str2, cnt2, Scale.Times2, intCnt2 * 2));
469                pcmpestri(vec, new AMD64Address(result, cnt2, Scale.Times2, intCnt2 * 2), 0x0d);
470            } else {
471                // calculate index in register to avoid integer overflow (int_cnt2*2)
472                movl(tmp, intCnt2);
473                addq(tmp, cnt2);
474                movdqu(vec, new AMD64Address(str2, tmp, Scale.Times2, 0));
475                pcmpestri(vec, new AMD64Address(result, tmp, Scale.Times2, 0), 0x0d);
476            }
477            // Need to reload strings pointers if not matched whole vector
478            jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0
479            addq(cnt2, 8);
480            jcc(ConditionFlag.Negative, scanSubstr);
481            // Fall through if found full substring
482
483        } // (int_cnt2 > 8)
484
485        bind(retFound);
486        // Found result if we matched full small substring.
487        // Compute substr offset
488        subq(result, str1);
489        shrl(result, 1); // index
490        bind(exit);
491
492    } // string_indexofC8
493
494    // Small strings are loaded through stack if they cross page boundary.
495    public void stringIndexOf(Register str1, Register str2,
496                    Register cnt1, Register cnt2,
497                    int intCnt2, Register result,
498                    Register vec, Register tmp, int vmPageSize) {
499        //
500        // int_cnt2 is length of small (< 8 chars) constant substring
501        // or (-1) for non constant substring in which case its length
502        // is in cnt2 register.
503        //
504        // Note, inline_string_indexOf() generates checks:
505        // if (substr.count > string.count) return -1;
506        // if (substr.count == 0) return 0;
507        //
508        assert intCnt2 == -1 || (0 < intCnt2 && intCnt2 < 8) : "should be != 0";
509
510        // This method uses pcmpestri instruction with bound registers
511        // inputs:
512        // xmm - substring
513        // rax - substring length (elements count)
514        // mem - scanned string
515        // rdx - string length (elements count)
516        // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
517        // outputs:
518        // rcx - matched index in string
519        assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri";
520
521        Label reloadSubstr = new Label();
522        Label scanToSubstr = new Label();
523        Label scanSubstr = new Label();
524        Label adjustStr = new Label();
525        Label retFound = new Label();
526        Label retNotFound = new Label();
527        Label cleanup = new Label();
528        Label foundSubstr = new Label();
529        Label foundCandidate = new Label();
530
531        int wordSize = 8;
532        // We don't know where these strings are located
533        // and we can't read beyond them. Load them through stack.
534        Label bigStrings = new Label();
535        Label checkStr = new Label();
536        Label copySubstr = new Label();
537        Label copyStr = new Label();
538
539        movq(tmp, rsp); // save old SP
540
541        if (intCnt2 > 0) {     // small (< 8 chars) constant substring
542            if (intCnt2 == 1) {  // One char
543                movzwl(result, new AMD64Address(str2, 0));
544                movdl(vec, result); // move 32 bits
545            } else if (intCnt2 == 2) { // Two chars
546                movdl(vec, new AMD64Address(str2, 0)); // move 32 bits
547            } else if (intCnt2 == 4) { // Four chars
548                movq(vec, new AMD64Address(str2, 0));  // move 64 bits
549            } else { // cnt2 = { 3, 5, 6, 7 }
550                // Array header size is 12 bytes in 32-bit VM
551                // + 6 bytes for 3 chars == 18 bytes,
552                // enough space to load vec and shift.
553                movdqu(vec, new AMD64Address(str2, (intCnt2 * 2) - 16));
554                psrldq(vec, 16 - (intCnt2 * 2));
555            }
556        } else { // not constant substring
557            cmpl(cnt2, 8);
558            jccb(ConditionFlag.AboveEqual, bigStrings); // Both strings are big enough
559
560            // We can read beyond string if str+16 does not cross page boundary
561            // since heaps are aligned and mapped by pages.
562            assert vmPageSize < 1024 * 1024 * 1024 : "default page should be small";
563            movl(result, str2); // We need only low 32 bits
564            andl(result, (vmPageSize - 1));
565            cmpl(result, (vmPageSize - 16));
566            jccb(ConditionFlag.BelowEqual, checkStr);
567
568            // Move small strings to stack to allow load 16 bytes into vec.
569            subq(rsp, 16);
570            int stackOffset = wordSize - 2;
571            push(cnt2);
572
573            bind(copySubstr);
574            movzwl(result, new AMD64Address(str2, cnt2, Scale.Times2, -2));
575            movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result);
576            decrementl(cnt2, 1);
577            jccb(ConditionFlag.NotZero, copySubstr);
578
579            pop(cnt2);
580            movq(str2, rsp);  // New substring address
581        } // non constant
582
583        bind(checkStr);
584        cmpl(cnt1, 8);
585        jccb(ConditionFlag.AboveEqual, bigStrings);
586
587        // Check cross page boundary.
588        movl(result, str1); // We need only low 32 bits
589        andl(result, (vmPageSize - 1));
590        cmpl(result, (vmPageSize - 16));
591        jccb(ConditionFlag.BelowEqual, bigStrings);
592
593        subq(rsp, 16);
594        int stackOffset = -2;
595        if (intCnt2 < 0) { // not constant
596            push(cnt2);
597            stackOffset += wordSize;
598        }
599        movl(cnt2, cnt1);
600
601        bind(copyStr);
602        movzwl(result, new AMD64Address(str1, cnt2, Scale.Times2, -2));
603        movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result);
604        decrementl(cnt2, 1);
605        jccb(ConditionFlag.NotZero, copyStr);
606
607        if (intCnt2 < 0) { // not constant
608            pop(cnt2);
609        }
610        movq(str1, rsp);  // New string address
611
612        bind(bigStrings);
613        // Load substring.
614        if (intCnt2 < 0) { // -1
615            movdqu(vec, new AMD64Address(str2, 0));
616            push(cnt2);       // substr count
617            push(str2);       // substr addr
618            push(str1);       // string addr
619        } else {
620            // Small (< 8 chars) constant substrings are loaded already.
621            movl(cnt2, intCnt2);
622        }
623        push(tmp);  // original SP
624        // Finished loading
625
626        // ========================================================
627        // Start search
628        //
629
630        movq(result, str1); // string addr
631
632        if (intCnt2 < 0) {  // Only for non constant substring
633            jmpb(scanToSubstr);
634
635            // SP saved at sp+0
636            // String saved at sp+1*wordSize
637            // Substr saved at sp+2*wordSize
638            // Substr count saved at sp+3*wordSize
639
640            // Reload substr for rescan, this code
641            // is executed only for large substrings (> 8 chars)
642            bind(reloadSubstr);
643            movq(str2, new AMD64Address(rsp, 2 * wordSize));
644            movl(cnt2, new AMD64Address(rsp, 3 * wordSize));
645            movdqu(vec, new AMD64Address(str2, 0));
646            // We came here after the beginning of the substring was
647            // matched but the rest of it was not so we need to search
648            // again. Start from the next element after the previous match.
649            subq(str1, result); // Restore counter
650            shrl(str1, 1);
651            addl(cnt1, str1);
652            decrementl(cnt1);   // Shift to next element
653            cmpl(cnt1, cnt2);
654            jccb(ConditionFlag.Negative, retNotFound);  // Left less then substring
655
656            addq(result, 2);
657        } // non constant
658
659        // Scan string for start of substr in 16-byte vectors
660        bind(scanToSubstr);
661        assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri";
662        pcmpestri(vec, new AMD64Address(result, 0), 0x0d);
663        jccb(ConditionFlag.Below, foundCandidate);   // CF == 1
664        subl(cnt1, 8);
665        jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string
666        cmpl(cnt1, cnt2);
667        jccb(ConditionFlag.Negative, retNotFound);  // Left less then substring
668        addq(result, 16);
669
670        bind(adjustStr);
671        cmpl(cnt1, 8); // Do not read beyond string
672        jccb(ConditionFlag.GreaterEqual, scanToSubstr);
673        // Back-up string to avoid reading beyond string.
674        leaq(result, new AMD64Address(result, cnt1, Scale.Times2, -16));
675        movl(cnt1, 8);
676        jmpb(scanToSubstr);
677
678        // Found a potential substr
679        bind(foundCandidate);
680        // After pcmpestri tmp(rcx) contains matched element index
681
682        // Make sure string is still long enough
683        subl(cnt1, tmp);
684        cmpl(cnt1, cnt2);
685        jccb(ConditionFlag.GreaterEqual, foundSubstr);
686        // Left less then substring.
687
688        bind(retNotFound);
689        movl(result, -1);
690        jmpb(cleanup);
691
692        bind(foundSubstr);
693        // Compute start addr of substr
694        leaq(result, new AMD64Address(result, tmp, Scale.Times2));
695
696        if (intCnt2 > 0) { // Constant substring
697            // Repeat search for small substring (< 8 chars)
698            // from new point without reloading substring.
699            // Have to check that we don't read beyond string.
700            cmpl(tmp, 8 - intCnt2);
701            jccb(ConditionFlag.Greater, adjustStr);
702            // Fall through if matched whole substring.
703        } else { // non constant
704            assert intCnt2 == -1 : "should be != 0";
705
706            addl(tmp, cnt2);
707            // Found result if we matched whole substring.
708            cmpl(tmp, 8);
709            jccb(ConditionFlag.LessEqual, retFound);
710
711            // Repeat search for small substring (<= 8 chars)
712            // from new point 'str1' without reloading substring.
713            cmpl(cnt2, 8);
714            // Have to check that we don't read beyond string.
715            jccb(ConditionFlag.LessEqual, adjustStr);
716
717            Label checkNext = new Label();
718            Label contScanSubstr = new Label();
719            Label retFoundLong = new Label();
720            // Compare the rest of substring (> 8 chars).
721            movq(str1, result);
722
723            cmpl(tmp, cnt2);
724            // First 8 chars are already matched.
725            jccb(ConditionFlag.Equal, checkNext);
726
727            bind(scanSubstr);
728            pcmpestri(vec, new AMD64Address(str1, 0), 0x0d);
729            // Need to reload strings pointers if not matched whole vector
730            jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0
731
732            bind(checkNext);
733            subl(cnt2, 8);
734            jccb(ConditionFlag.LessEqual, retFoundLong); // Found full substring
735            addq(str1, 16);
736            addq(str2, 16);
737            subl(cnt1, 8);
738            cmpl(cnt2, 8); // Do not read beyond substring
739            jccb(ConditionFlag.GreaterEqual, contScanSubstr);
740            // Back-up strings to avoid reading beyond substring.
741            leaq(str2, new AMD64Address(str2, cnt2, Scale.Times2, -16));
742            leaq(str1, new AMD64Address(str1, cnt2, Scale.Times2, -16));
743            subl(cnt1, cnt2);
744            movl(cnt2, 8);
745            addl(cnt1, 8);
746            bind(contScanSubstr);
747            movdqu(vec, new AMD64Address(str2, 0));
748            jmpb(scanSubstr);
749
750            bind(retFoundLong);
751            movq(str1, new AMD64Address(rsp, wordSize));
752        } // non constant
753
754        bind(retFound);
755        // Compute substr offset
756        subq(result, str1);
757        shrl(result, 1); // index
758
759        bind(cleanup);
760        pop(rsp); // restore SP
761
762    }
763
764}
765