1;; GCC machine description for i386 synchronization instructions.
2;; Copyright (C) 2005-2020 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.
19
20(define_c_enum "unspec" [
21  UNSPEC_LFENCE
22  UNSPEC_SFENCE
23  UNSPEC_MFENCE
24
25  UNSPEC_FILD_ATOMIC
26  UNSPEC_FIST_ATOMIC
27
28  UNSPEC_LDX_ATOMIC
29  UNSPEC_STX_ATOMIC
30
31  ;; __atomic support
32  UNSPEC_LDA
33  UNSPEC_STA
34])
35
36(define_c_enum "unspecv" [
37  UNSPECV_CMPXCHG
38  UNSPECV_XCHG
39  UNSPECV_LOCK
40])
41
42(define_expand "sse2_lfence"
43  [(set (match_dup 0)
44	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
45  "TARGET_SSE2"
46{
47  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
48  MEM_VOLATILE_P (operands[0]) = 1;
49})
50
51(define_insn "*sse2_lfence"
52  [(set (match_operand:BLK 0)
53	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
54  "TARGET_SSE2"
55  "lfence"
56  [(set_attr "type" "sse")
57   (set_attr "length_address" "0")
58   (set_attr "atom_sse_attr" "lfence")
59   (set_attr "memory" "unknown")])
60
61(define_expand "sse_sfence"
62  [(set (match_dup 0)
63	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
64  "TARGET_SSE || TARGET_3DNOW_A"
65{
66  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
67  MEM_VOLATILE_P (operands[0]) = 1;
68})
69
70(define_insn "*sse_sfence"
71  [(set (match_operand:BLK 0)
72	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
73  "TARGET_SSE || TARGET_3DNOW_A"
74  "sfence"
75  [(set_attr "type" "sse")
76   (set_attr "length_address" "0")
77   (set_attr "atom_sse_attr" "fence")
78   (set_attr "memory" "unknown")])
79
80(define_expand "sse2_mfence"
81  [(set (match_dup 0)
82	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
83  "TARGET_SSE2"
84{
85  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
86  MEM_VOLATILE_P (operands[0]) = 1;
87})
88
89(define_insn "mfence_sse2"
90  [(set (match_operand:BLK 0)
91	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
92  "TARGET_64BIT || TARGET_SSE2"
93  "mfence"
94  [(set_attr "type" "sse")
95   (set_attr "length_address" "0")
96   (set_attr "atom_sse_attr" "fence")
97   (set_attr "memory" "unknown")])
98
99(define_insn "mfence_nosse"
100  [(set (match_operand:BLK 0)
101	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
102   (clobber (reg:CC FLAGS_REG))]
103  "!(TARGET_64BIT || TARGET_SSE2)"
104  "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
105  [(set_attr "memory" "unknown")])
106
107(define_expand "mem_thread_fence"
108  [(match_operand:SI 0 "const_int_operand")]		;; model
109  ""
110{
111  enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
112
113  /* Unless this is a SEQ_CST fence, the i386 memory model is strong
114     enough not to require barriers of any kind.  */
115  if (is_mm_seq_cst (model))
116    {
117      rtx (*mfence_insn)(rtx);
118      rtx mem;
119
120      if (TARGET_64BIT || TARGET_SSE2)
121	mfence_insn = gen_mfence_sse2;
122      else
123	mfence_insn = gen_mfence_nosse;
124
125      mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
126      MEM_VOLATILE_P (mem) = 1;
127
128      emit_insn (mfence_insn (mem));
129    }
130  DONE;
131})
132
133;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations,
134;; Only beginning at Pentium family processors do we get any guarantee of
135;; atomicity in aligned 64-bit quantities.  Beginning at P6, we get a
136;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
137;;
138;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium".
139;;
140;; Importantly, *no* processor makes atomicity guarantees for larger
141;; accesses.  In particular, there's no way to perform an atomic TImode
142;; move, despite the apparent applicability of MOVDQA et al.
143
144(define_mode_iterator ATOMIC
145   [QI HI SI
146    (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))")
147   ])
148
149(define_expand "atomic_load<mode>"
150  [(set (match_operand:ATOMIC 0 "nonimmediate_operand")
151	(unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand")
152			(match_operand:SI 2 "const_int_operand")]
153		       UNSPEC_LDA))]
154  ""
155{
156  /* For DImode on 32-bit, we can use the FPU to perform the load.  */
157  if (<MODE>mode == DImode && !TARGET_64BIT)
158    emit_insn (gen_atomic_loaddi_fpu
159	       (operands[0], operands[1],
160	        assign_386_stack_local (DImode, SLOT_TEMP)));
161  else
162    {
163      rtx dst = operands[0];
164
165      if (MEM_P (dst))
166	dst = gen_reg_rtx (<MODE>mode);
167
168      emit_move_insn (dst, operands[1]);
169
170      /* Fix up the destination if needed.  */
171      if (dst != operands[0])
172	emit_move_insn (operands[0], dst);
173    }
174  DONE;
175})
176
177(define_insn_and_split "atomic_loaddi_fpu"
178  [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r")
179	(unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")]
180		   UNSPEC_LDA))
181   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
182   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
183  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
184  "#"
185  "&& reload_completed"
186  [(const_int 0)]
187{
188  rtx dst = operands[0], src = operands[1];
189  rtx mem = operands[2], tmp = operands[3];
190
191  if (SSE_REG_P (dst))
192    emit_move_insn (dst, src);
193  else
194    {
195      if (MEM_P (dst))
196	mem = dst;
197
198      if (STACK_REG_P (tmp))
199        {
200	  emit_insn (gen_loaddi_via_fpu (tmp, src));
201	  emit_insn (gen_storedi_via_fpu (mem, tmp));
202	}
203      else
204	{
205	  emit_insn (gen_loaddi_via_sse (tmp, src));
206	  emit_insn (gen_storedi_via_sse (mem, tmp));
207	}
208
209      if (mem != dst)
210	emit_move_insn (dst, mem);
211    }
212  DONE;
213})
214
215(define_expand "atomic_store<mode>"
216  [(set (match_operand:ATOMIC 0 "memory_operand")
217	(unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand")
218			(match_operand:SI 2 "const_int_operand")]
219		       UNSPEC_STA))]
220  ""
221{
222  enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
223
224  if (<MODE>mode == DImode && !TARGET_64BIT)
225    {
226      /* For DImode on 32-bit, we can use the FPU to perform the store.  */
227      /* Note that while we could perform a cmpxchg8b loop, that turns
228	 out to be significantly larger than this plus a barrier.  */
229      emit_insn (gen_atomic_storedi_fpu
230		 (operands[0], operands[1],
231	          assign_386_stack_local (DImode, SLOT_TEMP)));
232    }
233  else
234    {
235      operands[1] = force_reg (<MODE>mode, operands[1]);
236
237      /* For seq-cst stores, use XCHG when we lack MFENCE.  */
238      if (is_mm_seq_cst (model)
239	  && (!(TARGET_64BIT || TARGET_SSE2)
240	      || TARGET_AVOID_MFENCE))
241	{
242	  emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
243						operands[0], operands[1],
244						operands[2]));
245	  DONE;
246	}
247
248      /* Otherwise use a store.  */
249      emit_insn (gen_atomic_store<mode>_1 (operands[0], operands[1],
250					   operands[2]));
251    }
252  /* ... followed by an MFENCE, if required.  */
253  if (is_mm_seq_cst (model))
254    emit_insn (gen_mem_thread_fence (operands[2]));
255  DONE;
256})
257
258(define_insn "atomic_store<mode>_1"
259  [(set (match_operand:SWI 0 "memory_operand" "=m")
260	(unspec:SWI [(match_operand:SWI 1 "<nonmemory_operand>" "<r><i>")
261		     (match_operand:SI 2 "const_int_operand")]
262		    UNSPEC_STA))]
263  ""
264  "%K2mov{<imodesuffix>}\t{%1, %0|%0, %1}")
265
266(define_insn_and_split "atomic_storedi_fpu"
267  [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
268	(unspec:DI [(match_operand:DI 1 "nonimmediate_operand" "x,m,?r")]
269		   UNSPEC_STA))
270   (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
271   (clobber (match_scratch:DF 3 "=X,xf,xf"))]
272  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
273  "#"
274  "&& reload_completed"
275  [(const_int 0)]
276{
277  rtx dst = operands[0], src = operands[1];
278  rtx mem = operands[2], tmp = operands[3];
279
280  if (SSE_REG_P (src))
281    emit_move_insn (dst, src);
282  else
283    {
284      if (REG_P (src))
285	{
286	  emit_move_insn (mem, src);
287	  src = mem;
288	}
289
290      if (STACK_REG_P (tmp))
291	{
292	  emit_insn (gen_loaddi_via_fpu (tmp, src));
293	  emit_insn (gen_storedi_via_fpu (dst, tmp));
294	}
295      else
296	{
297	  emit_insn (gen_loaddi_via_sse (tmp, src));
298	  emit_insn (gen_storedi_via_sse (dst, tmp));
299	}
300    }
301  DONE;
302})
303
304;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
305;; operations.  But the fix_trunc patterns want way more setup than we want
306;; to provide.  Note that the scratch is DFmode instead of XFmode in order
307;; to make it easy to allocate a scratch in either SSE or FP_REGs above.
308
309(define_insn "loaddi_via_fpu"
310  [(set (match_operand:DF 0 "register_operand" "=f")
311	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
312		   UNSPEC_FILD_ATOMIC))]
313  "TARGET_80387"
314  "fild%Z1\t%1"
315  [(set_attr "type" "fmov")
316   (set_attr "mode" "DF")
317   (set_attr "fp_int_src" "true")])
318
319(define_insn "storedi_via_fpu"
320  [(set (match_operand:DI 0 "memory_operand" "=m")
321	(unspec:DI [(match_operand:DF 1 "register_operand" "f")]
322		   UNSPEC_FIST_ATOMIC))]
323  "TARGET_80387"
324{
325  gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX);
326
327  return "fistp%Z0\t%0";
328}
329  [(set_attr "type" "fmov")
330   (set_attr "mode" "DI")])
331
332(define_insn "loaddi_via_sse"
333  [(set (match_operand:DF 0 "register_operand" "=x")
334	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
335		   UNSPEC_LDX_ATOMIC))]
336  "TARGET_SSE"
337{
338  if (TARGET_SSE2)
339    return "%vmovq\t{%1, %0|%0, %1}";
340  return "movlps\t{%1, %0|%0, %1}";
341}
342  [(set_attr "type" "ssemov")
343   (set_attr "mode" "DI")])
344
345(define_insn "storedi_via_sse"
346  [(set (match_operand:DI 0 "memory_operand" "=m")
347	(unspec:DI [(match_operand:DF 1 "register_operand" "x")]
348		   UNSPEC_STX_ATOMIC))]
349  "TARGET_SSE"
350{
351  if (TARGET_SSE2)
352    return "%vmovq\t{%1, %0|%0, %1}";
353  return "movlps\t{%1, %0|%0, %1}";
354}
355  [(set_attr "type" "ssemov")
356   (set_attr "mode" "DI")])
357
358(define_expand "atomic_compare_and_swap<mode>"
359  [(match_operand:QI 0 "register_operand")	;; bool success output
360   (match_operand:SWI124 1 "register_operand")	;; oldval output
361   (match_operand:SWI124 2 "memory_operand")	;; memory
362   (match_operand:SWI124 3 "register_operand")	;; expected input
363   (match_operand:SWI124 4 "register_operand")	;; newval input
364   (match_operand:SI 5 "const_int_operand")	;; is_weak
365   (match_operand:SI 6 "const_int_operand")	;; success model
366   (match_operand:SI 7 "const_int_operand")]	;; failure model
367  "TARGET_CMPXCHG"
368{
369  emit_insn
370   (gen_atomic_compare_and_swap<mode>_1
371    (operands[1], operands[2], operands[3], operands[4], operands[6]));
372  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
373		     const0_rtx);
374  DONE;
375})
376
377(define_mode_iterator CASMODE
378  [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
379   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
380(define_mode_attr CASHMODE [(DI "SI") (TI "DI")])
381
382(define_expand "atomic_compare_and_swap<mode>"
383  [(match_operand:QI 0 "register_operand")	;; bool success output
384   (match_operand:CASMODE 1 "register_operand")	;; oldval output
385   (match_operand:CASMODE 2 "memory_operand")	;; memory
386   (match_operand:CASMODE 3 "register_operand")	;; expected input
387   (match_operand:CASMODE 4 "register_operand")	;; newval input
388   (match_operand:SI 5 "const_int_operand")	;; is_weak
389   (match_operand:SI 6 "const_int_operand")	;; success model
390   (match_operand:SI 7 "const_int_operand")]	;; failure model
391  "TARGET_CMPXCHG"
392{
393  if (<MODE>mode == DImode && TARGET_64BIT)
394    {
395      emit_insn
396       (gen_atomic_compare_and_swapdi_1
397	(operands[1], operands[2], operands[3], operands[4], operands[6]));
398    }
399  else
400    {
401      machine_mode hmode = <CASHMODE>mode;
402
403      emit_insn
404       (gen_atomic_compare_and_swap<mode>_doubleword
405        (operands[1], operands[2], operands[3],
406	 gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]),
407	 operands[6]));
408    }
409
410  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
411		     const0_rtx);
412  DONE;
413})
414
415;; For double-word compare and swap, we are obliged to play tricks with
416;; the input newval (op3:op4) because the Intel register numbering does
417;; not match the gcc register numbering, so the pair must be CX:BX.
418
419(define_mode_attr doublemodesuffix [(SI "8") (DI "16")])
420
421(define_insn "atomic_compare_and_swap<dwi>_doubleword"
422  [(set (match_operand:<DWI> 0 "register_operand" "=A")
423	(unspec_volatile:<DWI>
424	  [(match_operand:<DWI> 1 "memory_operand" "+m")
425	   (match_operand:<DWI> 2 "register_operand" "0")
426	   (match_operand:DWIH 3 "register_operand" "b")
427	   (match_operand:DWIH 4 "register_operand" "c")
428	   (match_operand:SI 5 "const_int_operand")]
429	  UNSPECV_CMPXCHG))
430   (set (match_dup 1)
431	(unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG))
432   (set (reg:CCZ FLAGS_REG)
433        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
434  "TARGET_CMPXCHG<doublemodesuffix>B"
435  "lock{%;} %K5cmpxchg<doublemodesuffix>b\t%1")
436
437(define_insn "atomic_compare_and_swap<mode>_1"
438  [(set (match_operand:SWI 0 "register_operand" "=a")
439	(unspec_volatile:SWI
440	  [(match_operand:SWI 1 "memory_operand" "+m")
441	   (match_operand:SWI 2 "register_operand" "0")
442	   (match_operand:SWI 3 "register_operand" "<r>")
443	   (match_operand:SI 4 "const_int_operand")]
444	  UNSPECV_CMPXCHG))
445   (set (match_dup 1)
446	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
447   (set (reg:CCZ FLAGS_REG)
448        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
449  "TARGET_CMPXCHG"
450  "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
451
452;; For operand 2 nonmemory_operand predicate is used instead of
453;; register_operand to allow combiner to better optimize atomic
454;; additions of constants.
455(define_insn "atomic_fetch_add<mode>"
456  [(set (match_operand:SWI 0 "register_operand" "=<r>")
457	(unspec_volatile:SWI
458	  [(match_operand:SWI 1 "memory_operand" "+m")
459	   (match_operand:SI 3 "const_int_operand")]		;; model
460	  UNSPECV_XCHG))
461   (set (match_dup 1)
462	(plus:SWI (match_dup 1)
463		  (match_operand:SWI 2 "nonmemory_operand" "0")))
464   (clobber (reg:CC FLAGS_REG))]
465  "TARGET_XADD"
466  "lock{%;} %K3xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
467
468;; This peephole2 and following insn optimize
469;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
470;; followed by testing of flags instead of lock xadd and comparisons.
471(define_peephole2
472  [(set (match_operand:SWI 0 "register_operand")
473	(match_operand:SWI 2 "const_int_operand"))
474   (parallel [(set (match_dup 0)
475		   (unspec_volatile:SWI
476		     [(match_operand:SWI 1 "memory_operand")
477		      (match_operand:SI 4 "const_int_operand")]
478		     UNSPECV_XCHG))
479	      (set (match_dup 1)
480		   (plus:SWI (match_dup 1)
481			     (match_dup 0)))
482	      (clobber (reg:CC FLAGS_REG))])
483   (set (reg:CCZ FLAGS_REG)
484	(compare:CCZ (match_dup 0)
485		     (match_operand:SWI 3 "const_int_operand")))]
486  "peep2_reg_dead_p (3, operands[0])
487   && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
488      == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
489   && !reg_overlap_mentioned_p (operands[0], operands[1])"
490  [(parallel [(set (reg:CCZ FLAGS_REG)
491		   (compare:CCZ
492		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
493					  UNSPECV_XCHG)
494		     (match_dup 3)))
495	      (set (match_dup 1)
496		   (plus:SWI (match_dup 1)
497			     (match_dup 2)))])])
498
499;; Likewise, but for the -Os special case of *mov<mode>_or.
500(define_peephole2
501  [(parallel [(set (match_operand:SWI 0 "register_operand")
502		   (match_operand:SWI 2 "constm1_operand"))
503	      (clobber (reg:CC FLAGS_REG))])
504   (parallel [(set (match_dup 0)
505		   (unspec_volatile:SWI
506		     [(match_operand:SWI 1 "memory_operand")
507		      (match_operand:SI 4 "const_int_operand")]
508		     UNSPECV_XCHG))
509	      (set (match_dup 1)
510		   (plus:SWI (match_dup 1)
511			     (match_dup 0)))
512	      (clobber (reg:CC FLAGS_REG))])
513   (set (reg:CCZ FLAGS_REG)
514	(compare:CCZ (match_dup 0)
515		     (match_operand:SWI 3 "const_int_operand")))]
516  "peep2_reg_dead_p (3, operands[0])
517   && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
518      == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
519   && !reg_overlap_mentioned_p (operands[0], operands[1])"
520  [(parallel [(set (reg:CCZ FLAGS_REG)
521		   (compare:CCZ
522		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
523					  UNSPECV_XCHG)
524		     (match_dup 3)))
525	      (set (match_dup 1)
526		   (plus:SWI (match_dup 1)
527			     (match_dup 2)))])])
528
529(define_insn "*atomic_fetch_add_cmp<mode>"
530  [(set (reg:CCZ FLAGS_REG)
531	(compare:CCZ
532	  (unspec_volatile:SWI
533	    [(match_operand:SWI 0 "memory_operand" "+m")
534	     (match_operand:SI 3 "const_int_operand")]		;; model
535	    UNSPECV_XCHG)
536	  (match_operand:SWI 2 "const_int_operand" "i")))
537   (set (match_dup 0)
538	(plus:SWI (match_dup 0)
539		  (match_operand:SWI 1 "const_int_operand" "i")))]
540  "(unsigned HOST_WIDE_INT) INTVAL (operands[1])
541   == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])"
542{
543  if (incdec_operand (operands[1], <MODE>mode))
544    {
545      if (operands[1] == const1_rtx)
546	return "lock{%;} %K3inc{<imodesuffix>}\t%0";
547      else
548	{
549	  gcc_assert (operands[1] == constm1_rtx);
550	  return "lock{%;} %K3dec{<imodesuffix>}\t%0";
551	}
552    }
553
554  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
555    return "lock{%;} %K3sub{<imodesuffix>}\t{%1, %0|%0, %1}";
556
557  return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}";
558})
559
560;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
561;; In addition, it is always a full barrier, so we can ignore the memory model.
562(define_insn "atomic_exchange<mode>"
563  [(set (match_operand:SWI 0 "register_operand" "=<r>")		;; output
564	(unspec_volatile:SWI
565	  [(match_operand:SWI 1 "memory_operand" "+m")		;; memory
566	   (match_operand:SI 3 "const_int_operand")]		;; model
567	  UNSPECV_XCHG))
568   (set (match_dup 1)
569	(match_operand:SWI 2 "register_operand" "0"))]		;; input
570  ""
571  "%K3xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
572
573(define_insn "atomic_add<mode>"
574  [(set (match_operand:SWI 0 "memory_operand" "+m")
575	(unspec_volatile:SWI
576	  [(plus:SWI (match_dup 0)
577		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
578	   (match_operand:SI 2 "const_int_operand")]		;; model
579	  UNSPECV_LOCK))
580   (clobber (reg:CC FLAGS_REG))]
581  ""
582{
583  if (incdec_operand (operands[1], <MODE>mode))
584    {
585      if (operands[1] == const1_rtx)
586	return "lock{%;} %K2inc{<imodesuffix>}\t%0";
587      else
588	{
589	  gcc_assert (operands[1] == constm1_rtx);
590	  return "lock{%;} %K2dec{<imodesuffix>}\t%0";
591	}
592    }
593
594  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
595    return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
596
597  return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
598})
599
600(define_insn "atomic_sub<mode>"
601  [(set (match_operand:SWI 0 "memory_operand" "+m")
602	(unspec_volatile:SWI
603	  [(minus:SWI (match_dup 0)
604		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
605	   (match_operand:SI 2 "const_int_operand")]		;; model
606	  UNSPECV_LOCK))
607   (clobber (reg:CC FLAGS_REG))]
608  ""
609{
610  if (incdec_operand (operands[1], <MODE>mode))
611    {
612      if (operands[1] == const1_rtx)
613	return "lock{%;} %K2dec{<imodesuffix>}\t%0";
614      else
615	{
616	  gcc_assert (operands[1] == constm1_rtx);
617	  return "lock{%;} %K2inc{<imodesuffix>}\t%0";
618	}
619    }
620
621  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
622    return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
623
624  return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
625})
626
627(define_insn "atomic_<logic><mode>"
628  [(set (match_operand:SWI 0 "memory_operand" "+m")
629	(unspec_volatile:SWI
630	  [(any_logic:SWI (match_dup 0)
631			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
632	   (match_operand:SI 2 "const_int_operand")]		;; model
633	  UNSPECV_LOCK))
634   (clobber (reg:CC FLAGS_REG))]
635  ""
636  "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
637
638(define_expand "atomic_bit_test_and_set<mode>"
639  [(match_operand:SWI248 0 "register_operand")
640   (match_operand:SWI248 1 "memory_operand")
641   (match_operand:SWI248 2 "nonmemory_operand")
642   (match_operand:SI 3 "const_int_operand") ;; model
643   (match_operand:SI 4 "const_int_operand")]
644  ""
645{
646  emit_insn (gen_atomic_bit_test_and_set<mode>_1 (operands[1], operands[2],
647						  operands[3]));
648  rtx tem = gen_reg_rtx (QImode);
649  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
650  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
651  if (operands[4] == const0_rtx)
652    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
653				  operands[2], operands[0], 0, OPTAB_WIDEN);
654  if (result != operands[0])
655    emit_move_insn (operands[0], result);
656  DONE;
657})
658
659(define_insn "atomic_bit_test_and_set<mode>_1"
660  [(set (reg:CCC FLAGS_REG)
661	(compare:CCC
662	  (unspec_volatile:SWI248
663	    [(match_operand:SWI248 0 "memory_operand" "+m")
664	     (match_operand:SI 2 "const_int_operand")]		;; model
665	    UNSPECV_XCHG)
666	  (const_int 0)))
667   (set (zero_extract:SWI248 (match_dup 0)
668			     (const_int 1)
669			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
670	(const_int 1))]
671  ""
672  "lock{%;} %K2bts{<imodesuffix>}\t{%1, %0|%0, %1}")
673
674(define_expand "atomic_bit_test_and_complement<mode>"
675  [(match_operand:SWI248 0 "register_operand")
676   (match_operand:SWI248 1 "memory_operand")
677   (match_operand:SWI248 2 "nonmemory_operand")
678   (match_operand:SI 3 "const_int_operand") ;; model
679   (match_operand:SI 4 "const_int_operand")]
680  ""
681{
682  emit_insn (gen_atomic_bit_test_and_complement<mode>_1 (operands[1],
683							 operands[2],
684							 operands[3]));
685  rtx tem = gen_reg_rtx (QImode);
686  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
687  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
688  if (operands[4] == const0_rtx)
689    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
690				  operands[2], operands[0], 0, OPTAB_WIDEN);
691  if (result != operands[0])
692    emit_move_insn (operands[0], result);
693  DONE;
694})
695
696(define_insn "atomic_bit_test_and_complement<mode>_1"
697  [(set (reg:CCC FLAGS_REG)
698	(compare:CCC
699	  (unspec_volatile:SWI248
700	    [(match_operand:SWI248 0 "memory_operand" "+m")
701	     (match_operand:SI 2 "const_int_operand")]		;; model
702	    UNSPECV_XCHG)
703	  (const_int 0)))
704   (set (zero_extract:SWI248 (match_dup 0)
705			     (const_int 1)
706			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
707	(not:SWI248 (zero_extract:SWI248 (match_dup 0)
708					 (const_int 1)
709					 (match_dup 1))))]
710  ""
711  "lock{%;} %K2btc{<imodesuffix>}\t{%1, %0|%0, %1}")
712
713(define_expand "atomic_bit_test_and_reset<mode>"
714  [(match_operand:SWI248 0 "register_operand")
715   (match_operand:SWI248 1 "memory_operand")
716   (match_operand:SWI248 2 "nonmemory_operand")
717   (match_operand:SI 3 "const_int_operand") ;; model
718   (match_operand:SI 4 "const_int_operand")]
719  ""
720{
721  emit_insn (gen_atomic_bit_test_and_reset<mode>_1 (operands[1], operands[2],
722						    operands[3]));
723  rtx tem = gen_reg_rtx (QImode);
724  ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
725  rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
726  if (operands[4] == const0_rtx)
727    result = expand_simple_binop (<MODE>mode, ASHIFT, result,
728				  operands[2], operands[0], 0, OPTAB_WIDEN);
729  if (result != operands[0])
730    emit_move_insn (operands[0], result);
731  DONE;
732})
733
734(define_insn "atomic_bit_test_and_reset<mode>_1"
735  [(set (reg:CCC FLAGS_REG)
736	(compare:CCC
737	  (unspec_volatile:SWI248
738	    [(match_operand:SWI248 0 "memory_operand" "+m")
739	     (match_operand:SI 2 "const_int_operand")]		;; model
740	    UNSPECV_XCHG)
741	  (const_int 0)))
742   (set (zero_extract:SWI248 (match_dup 0)
743			     (const_int 1)
744			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
745	(const_int 0))]
746  ""
747  "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}")
748