1! { dg-do compile } 2! { dg-require-effective-target vect_float } 3! { dg-additional-options "-O3 -fcray-pointer" } 4! PR 32380 - loops were not vectorized due to unaligned store. 5 subroutine trnfbt(e,f,qs,mte,gm,ihgenf,hgener,lft,llt,sthick, 6 . fibl,istupd,ies,hoff) 7 parameter (nlq=96) 8 integer nnlq 9 common/newnlq/nnlq 10c ... implicit common ... 11 integer imauto,iteopt,lauto,mthsol,ilimit,maxref,icnvrg, 12 & igdiv,nwebuf,neql,neqt,imterm,imphas,nbfgs, 13 & numupd,istif,itrlas,imerr,imdof,neqtgl,lsmtd,lsdir 14 common/bki01i/imauto,iteopt,lauto,mthsol,ilimit,maxref,icnvrg, 15 & igdiv,nwebuf,neql,neqt,imterm,imphas,nbfgs, 16 & numupd,istif,itrlas,imerr,imdof,neqtgl,lsmtd,lsdir 17 REAL dtimp,dtimp0,timeim,dtmnim,dtmxim,cvtl,ectl,rctl, 18 & tolls,dnorm2,dtprnt,dtplot,dtiter,dtrefm 19 common/bki01r/dtimp,dtimp0,timeim,dtmnim,dtmxim,cvtl,ectl,rctl, 20 & tolls,dnorm2,dtprnt(2),dtplot(2),dtiter(2),dtrefm(2) 21 REAL ascntl 22 common/bki02r/ascntl(150) 23 logical lsensw 24 common/bki01l/lsensw(20) 25 integer imip,isolvr,icwrb 26 common/bki02i/imip(100),isolvr(200),icwrb(50) 27c ... implicit common ... 28c 29c 30c 31 integer lnodim,ndofpn,nnpke,melemt,imlft,imllt,is17loc 32 common/bki03iloc/lnodim(nlq,16),ndofpn,nnpke,melemt,imlft,imllt, 33 & is17loc 34 real*4 ske 35 common/bki03rloc/ske(nlq,1176) 36 integer lmke 37 common/bki04iloc/lmke(nlq,48) 38c****************************************************************** 39c| livermore software technology corporation (lstc) | 40c| ------------------------------------------------------------ | 41c| copyright 1987,1988,1989 john o. hallquist, lstc | 42c| all rights reserved | 43c****************************************************************** 44c 45c 46c 47c 48c 49c 50c 51c 52c 53c 54c 55c 56c 57c 58c 59c 60c 61 common/bk12loc/b12,b2,qhg,qhgm,qhgb,qhgw 62 common/aux00loc/ 63 & sig1m(nlq),sig2m(nlq),sig4m(nlq),sig1n(nlq),sig2n(nlq), 64 & sig4n(nlq),sig5n(nlq),sig6n(nlq),sig5l(nlq),sig6l(nlq), 65 & str33(nlq),enginc(nlq) 66 common/aux01loc/ 67 &ft11(nlq),ft12(nlq),ft13(nlq),ft21(nlq),ft22(nlq),ft23(nlq), 68 &fm11(nlq),fm12(nlq),fm21(nlq),fm22(nlq), 69 &fm31(nlq),fm32(nlq),fm41(nlq),fm42(nlq), 70 &fmr11(nlq),fmr12(nlq),fmr21(nlq),fmr22(nlq),fmr31(nlq), 71 &fmr32(nlq),fmr41(nlq),fmr42(nlq),sg5(nlq),sg6(nlq) 72 common/aux7loc/ 73 1 vx1(nlq),vx2(nlq),vx3(nlq),vx4(nlq), 74 2 vx5(nlq),vx6(nlq),vx7(nlq),vx8(nlq), 75 3 vy1(nlq),vy2(nlq),vy3(nlq),vy4(nlq), 76 4 vy5(nlq),vy6(nlq),vy7(nlq),vy8(nlq), 77 5 vz1(nlq),vz2(nlq),vz3(nlq),vz4(nlq), 78 6 vz5(nlq),vz6(nlq),vz7(nlq),vz8(nlq) 79 common/aux10loc/area(nlq), 80 1 px1(nlq),px2(nlq),px3(nlq),px4(nlq), 81 & px5(nlq),px6(nlq),px7(nlq),px8(nlq), 82 2 py1(nlq),py2(nlq),py3(nlq),py4(nlq), 83 & py5(nlq),py6(nlq),py7(nlq),py8(nlq), 84 3 pz1(nlq),pz2(nlq),pz3(nlq),pz4(nlq), 85 & pz5(nlq),pz6(nlq),pz7(nlq),pz8(nlq), 86 4 dx1(nlq),dx2(nlq),dx3(nlq),dx4(nlq), 87 5 dx5(nlq),dx6(nlq),dx7(nlq),dx8(nlq), 88 6 dy1(nlq),dy2(nlq),dy3(nlq),dy4(nlq), 89 7 dy5(nlq),dy6(nlq),dy7(nlq),dy8(nlq), 90 8 dz1(nlq),dz2(nlq),dz3(nlq),dz4(nlq), 91 9 dz5(nlq),dz6(nlq),dz7(nlq),dz8(nlq) 92 common/aux11loc/ 93 &ft31(nlq),ft32(nlq),ft33(nlq),ft41(nlq),ft42(nlq),ft43(nlq), 94 &htx(nlq),hty(nlq),gm1(nlq),gm2(nlq),gm3(nlq),gm4(nlq), 95 &bsum(nlq),qhx(nlq),qhy(nlq),qwz(nlq),qtx(nlq),qty(nlq) 96 real*4 mx1,my1,mz1,mx2,my2,mz2,mx3,my3,mz3,mx4,my4,mz4 97 common/aux13loc/ 98 &zeta(nlq),thick(nlq),fga(nlq),fgb(nlq),fgc(nlq), 99 &gl11(nlq),gl12(nlq),gl13(nlq),gl21(nlq),gl22(nlq),gl23(nlq), 100 &gl31(nlq),gl32(nlq),gl33(nlq), 101 &x1(nlq),y1(nlq),z1(nlq),x2(nlq),y2(nlq),z2(nlq), 102 &x3(nlq),y3(nlq),z3(nlq),x4(nlq),y4(nlq),z4(nlq), 103 &fx1(nlq),fy1(nlq),fz1(nlq),fx2(nlq),fy2(nlq),fz2(nlq), 104 &fx3(nlq),fy3(nlq),fz3(nlq),fx4(nlq),fy4(nlq),fz4(nlq), 105 &mx1(nlq),my1(nlq),mz1(nlq),mx2(nlq),my2(nlq),mz2(nlq), 106 &mx3(nlq),my3(nlq),mz3(nlq),mx4(nlq),my4(nlq),mz4(nlq) 107 common/aux33loc/ 108 1 ix1(nlq),ix2(nlq),ix3(nlq),ix4(nlq),ixs(nlq,4),mxt(nlq) 109 common/aux35loc/rhoa(nlq),cxx(nlq),fcl(nlq),fcq(nlq) 110 common/hourgloc/ymod(nlq),gmod(nlq),ifsv(nlq) 111 common/soundloc/sndspd(nlq),sndsp(nlq),diagm(nlq),sarea(nlq), 112 . dxl(nlq) 113 common/bel6loc/bm(nlq,3,8),bb(nlq,3,8),bs(nlq,2,12),bhg(nlq,4), 114 1 ex(nlq,3,8),dp0(nlq,3,3),dp1(nlq,3,3),dp2(nlq,3,3), 115 2 ds(nlq),dhg(nlq,5) 116c 117 common/shlioc/ioshl(60) 118 common/failuloc/sieu(nlq),fail(nlq),ifaili(nlq) 119 logical output,slnew 120 common/csforc/ncs1,ncs2,ncs3,ncs4,ncs5,ncs6,ncs7,ncs8,ncs9, 121 1 ncs10,ncs11,ncs12,ncs13,ncs14,ncs15, 122 1 numcsd,csdinc,csdout,output,slnew,future(8) 123 common/csfsavloc/savfrc(nlq,24),svfail(nlq),ndof,ifail 124 common/sorterloc/nnc,lczc 125 common/sorter/znnc,zlczc, 126 & ns11,ns12,ns13,ns14,ns15,ns16, 127 & nh11,nh12,nh13,nh14,nh15,nh16, 128 & nt11,nt12,nt13,nt14,nt15,nt16, 129 & nb11,nb12,nb13,nb14,nb15,nb16, 130 & nu11,nu12,nu13,nu14,nu15,nu16, 131 & nd11,nd12,nd13,nd14,nd15,nd16 132 common/subtssloc/dt1siz(nlq) 133 common/matflr/mtfail(200) 134 common/berwcmloc/xll(nlq),rigx(nlq),rigy(nlq) 135 common /mem/ mp 136 integer ia(1) 137 pointer(mp,ia) 138 real*4 mmode,ies 139 dimension e(3,1),f(3,1),qs(9,1),gm(4,*),hgener(*) 140 dimension qs1(nlq),qs2(nlq),qs3(nlq),qs4(nlq),qs5(nlq) 141 dimension fibl(9,1),sthick(*),ies(*),hoff(*) 142c 143 ifail=0 144 if (qhgb+qhgw+qhgm.gt.1.e-04) then 145 tmode=qhgb*ymod(lft)/1920.0 146 wmode=qhgw*gmod(lft)/120.00 147 mmode=qhgm*ymod(lft)/80.000 148c 149 hgfac=rhoa(lft)*sndspd(lft) 150c 151 do i=lft,llt 152 htxi =area(i)*(x3(i)-x2(i)-x4(i)) 153 htyi =area(i)*(y3(i)-y2(i)-y4(i)) 154 gm1(i)= 1.-px1(i)*htxi-py1(i)*htyi 155 gm2(i)=-1.-px2(i)*htxi-py2(i)*htyi 156 gm3(i)= 2.-gm1(i) 157 gm4(i)=-2.-gm2(i) 158 qhx(i)=gm2(i)*vx2(i)+gm3(i)*vx3(i)+gm4(i)*vx4(i) 159 qhy(i)=gm2(i)*vy2(i)+gm3(i)*vy3(i)+gm4(i)*vy4(i) 160 qwz(i)=gm2(i)*vz2(i)+gm3(i)*vz3(i)+gm4(i)*vz4(i) 161 enddo 162 do i=lft,llt 163 c3= sqrt(abs(sarea(i)))*thick(i)/(dt1siz(i)+1.e-16) 164 c2=(hgfac*qhgw)*c3 165 c1=(hgfac*qhgb*.01)*c3*thick(i)*thick(i) 166 c3=(hgfac*qhgm)*c3 167 qtx(i)=gm2(i)*vx6(i)+gm3(i)*vx7(i)+gm4(i)*vx8(i) 168 qty(i)=gm2(i)*vy6(i)+gm3(i)*vy7(i)+gm4(i)*vy8(i) 169 xll2 =2.*xll(i) 170 qhxi =qhx(i)+xll2*rigy(i) 171 qhyi =qhy(i)-xll2*rigx(i) 172 qs1(i)=c3*qhxi 173 qs2(i)=c3*qhyi 174 qs3(i)=c2*qwz(i) 175 qs4(i)=c1*qtx(i) 176 qs5(i)=c1*qty(i) 177 enddo 178c 179c 180c 181 if (isolvr(18).eq.0) then 182c 183 do i=lft,llt 184 fm11(i)= fm11(i)+gm1(i)*qs4(i) 185 fm12(i)= fm12(i)+gm1(i)*qs5(i) 186 fm21(i)= fm21(i)+gm2(i)*qs4(i) 187 fm22(i)= fm22(i)+gm2(i)*qs5(i) 188 fm31(i)= fm31(i)+gm3(i)*qs4(i) 189 fm32(i)= fm32(i)+gm3(i)*qs5(i) 190 fm41(i)= fm41(i)+gm4(i)*qs4(i) 191 fm42(i)= fm42(i)+gm4(i)*qs5(i) 192 enddo 193C 194 else 195c 196 do 45 i=lft,llt 197 ft31(i)=-ft11(i)+gm3(i)*qs1(i) 198 ft32(i)=-ft12(i)+gm3(i)*qs2(i) 199 ft33(i)=-ft13(i)+gm3(i)*qs3(i) 200 ft41(i)=-ft21(i)+gm4(i)*qs1(i) 201 ft42(i)=-ft22(i)+gm4(i)*qs2(i) 202 ft43(i)=-ft23(i)+gm4(i)*qs3(i) 203 ft11(i)= ft11(i)+gm1(i)*qs1(i) 204 ft12(i)= ft12(i)+gm1(i)*qs2(i) 205 ft13(i)= ft13(i)+gm1(i)*qs3(i) 206 ft21(i)= ft21(i)+gm2(i)*qs1(i) 207 ft22(i)= ft22(i)+gm2(i)*qs2(i) 208 ft23(i)= ft23(i)+gm2(i)*qs3(i) 209 fm11(i)= fm11(i)+gm1(i)*qs4(i) 210 fm12(i)= fm12(i)+gm1(i)*qs5(i) 211 fm21(i)= fm21(i)+gm2(i)*qs4(i) 212 fm22(i)= fm22(i)+gm2(i)*qs5(i) 213 fm31(i)= fm31(i)+gm3(i)*qs4(i) 214 fm32(i)= fm32(i)+gm3(i)*qs5(i) 215 fm41(i)= fm41(i)+gm4(i)*qs4(i) 216 fm42(i)= fm42(i)+gm4(i)*qs5(i) 217 45 continue 218 endif 219c 220 else 221c 222 do 40 i=lft,llt 223 ft31(i)=-ft11(i) 224 ft32(i)=-ft12(i) 225 ft33(i)=-ft13(i) 226 ft41(i)=-ft21(i) 227 ft42(i)=-ft22(i) 228 ft43(i)=-ft23(i) 229 40 continue 230 endif 231c 232c 233 do i=lft,llt 234 mz1(i)=gl31(i)*fm11(i)+gl32(i)*fm12(i) 235 mz2(i)=gl31(i)*fm21(i)+gl32(i)*fm22(i) 236 fz1(i)=gl31(i)*ft11(i)+gl32(i)*ft12(i)+gl33(i)*ft13(i) 237 fz2(i)=gl31(i)*ft21(i)+gl32(i)*ft22(i)+gl33(i)*ft23(i) 238 mz3(i)=gl31(i)*fm31(i)+gl32(i)*fm32(i) 239 mz4(i)=gl31(i)*fm41(i)+gl32(i)*fm42(i) 240 fz3(i)=gl31(i)*ft31(i)+gl32(i)*ft32(i)+gl33(i)*ft33(i) 241 fz4(i)=gl31(i)*ft41(i)+gl32(i)*ft42(i)+gl33(i)*ft43(i) 242 enddo 243 90 continue 244c 245 if (output) then 246 do i=lft,llt 247 savfrc(i, 1)= fx1(i) 248 savfrc(i, 2)= fy1(i) 249 enddo 250c 251 ndof=4 252 if (ifail.eq.1) then 253 do i=lft,llt 254 svfail(i)=fail(i) 255 enddo 256 endif 257 endif 258c 259 return 260 end 261 262! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_element_align } } } } 263! { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { target { vect_element_align && { ! vect_call_sqrtf } } } } } 264! { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target { vect_element_align && vect_call_sqrtf } } } } 265! { dg-final { cleanup-tree-dump "vect" } } 266