level2float32.go 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297
  1. // Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
  2. // Copyright ©2014 The Gonum Authors. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package gonum
  6. import (
  7. "gonum.org/v1/gonum/blas"
  8. "gonum.org/v1/gonum/internal/asm/f32"
  9. )
  10. var _ blas.Float32Level2 = Implementation{}
  11. // Sger performs the rank-one operation
  12. // A += alpha * x * yᵀ
  13. // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
  14. //
  15. // Float32 implementations are autogenerated and not directly tested.
  16. func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) {
  17. if m < 0 {
  18. panic(mLT0)
  19. }
  20. if n < 0 {
  21. panic(nLT0)
  22. }
  23. if lda < max(1, n) {
  24. panic(badLdA)
  25. }
  26. if incX == 0 {
  27. panic(zeroIncX)
  28. }
  29. if incY == 0 {
  30. panic(zeroIncY)
  31. }
  32. // Quick return if possible.
  33. if m == 0 || n == 0 {
  34. return
  35. }
  36. // For zero matrix size the following slice length checks are trivially satisfied.
  37. if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) {
  38. panic(shortX)
  39. }
  40. if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
  41. panic(shortY)
  42. }
  43. if len(a) < lda*(m-1)+n {
  44. panic(shortA)
  45. }
  46. // Quick return if possible.
  47. if alpha == 0 {
  48. return
  49. }
  50. f32.Ger(uintptr(m), uintptr(n),
  51. alpha,
  52. x, uintptr(incX),
  53. y, uintptr(incY),
  54. a, uintptr(lda))
  55. }
  56. // Sgbmv performs one of the matrix-vector operations
  57. // y = alpha * A * x + beta * y if tA == blas.NoTrans
  58. // y = alpha * Aᵀ * x + beta * y if tA == blas.Trans or blas.ConjTrans
  59. // where A is an m×n band matrix with kL sub-diagonals and kU super-diagonals,
  60. // x and y are vectors, and alpha and beta are scalars.
  61. //
  62. // Float32 implementations are autogenerated and not directly tested.
  63. func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
  64. if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
  65. panic(badTranspose)
  66. }
  67. if m < 0 {
  68. panic(mLT0)
  69. }
  70. if n < 0 {
  71. panic(nLT0)
  72. }
  73. if kL < 0 {
  74. panic(kLLT0)
  75. }
  76. if kU < 0 {
  77. panic(kULT0)
  78. }
  79. if lda < kL+kU+1 {
  80. panic(badLdA)
  81. }
  82. if incX == 0 {
  83. panic(zeroIncX)
  84. }
  85. if incY == 0 {
  86. panic(zeroIncY)
  87. }
  88. // Quick return if possible.
  89. if m == 0 || n == 0 {
  90. return
  91. }
  92. // For zero matrix size the following slice length checks are trivially satisfied.
  93. if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 {
  94. panic(shortA)
  95. }
  96. lenX := m
  97. lenY := n
  98. if tA == blas.NoTrans {
  99. lenX = n
  100. lenY = m
  101. }
  102. if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) {
  103. panic(shortX)
  104. }
  105. if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) {
  106. panic(shortY)
  107. }
  108. // Quick return if possible.
  109. if alpha == 0 && beta == 1 {
  110. return
  111. }
  112. var kx, ky int
  113. if incX < 0 {
  114. kx = -(lenX - 1) * incX
  115. }
  116. if incY < 0 {
  117. ky = -(lenY - 1) * incY
  118. }
  119. // Form y = beta * y.
  120. if beta != 1 {
  121. if incY == 1 {
  122. if beta == 0 {
  123. for i := range y[:lenY] {
  124. y[i] = 0
  125. }
  126. } else {
  127. f32.ScalUnitary(beta, y[:lenY])
  128. }
  129. } else {
  130. iy := ky
  131. if beta == 0 {
  132. for i := 0; i < lenY; i++ {
  133. y[iy] = 0
  134. iy += incY
  135. }
  136. } else {
  137. if incY > 0 {
  138. f32.ScalInc(beta, y, uintptr(lenY), uintptr(incY))
  139. } else {
  140. f32.ScalInc(beta, y, uintptr(lenY), uintptr(-incY))
  141. }
  142. }
  143. }
  144. }
  145. if alpha == 0 {
  146. return
  147. }
  148. // i and j are indices of the compacted banded matrix.
  149. // off is the offset into the dense matrix (off + j = densej)
  150. nCol := kU + 1 + kL
  151. if tA == blas.NoTrans {
  152. iy := ky
  153. if incX == 1 {
  154. for i := 0; i < min(m, n+kL); i++ {
  155. l := max(0, kL-i)
  156. u := min(nCol, n+kL-i)
  157. off := max(0, i-kL)
  158. atmp := a[i*lda+l : i*lda+u]
  159. xtmp := x[off : off+u-l]
  160. var sum float32
  161. for j, v := range atmp {
  162. sum += xtmp[j] * v
  163. }
  164. y[iy] += sum * alpha
  165. iy += incY
  166. }
  167. return
  168. }
  169. for i := 0; i < min(m, n+kL); i++ {
  170. l := max(0, kL-i)
  171. u := min(nCol, n+kL-i)
  172. off := max(0, i-kL)
  173. atmp := a[i*lda+l : i*lda+u]
  174. jx := kx
  175. var sum float32
  176. for _, v := range atmp {
  177. sum += x[off*incX+jx] * v
  178. jx += incX
  179. }
  180. y[iy] += sum * alpha
  181. iy += incY
  182. }
  183. return
  184. }
  185. if incX == 1 {
  186. for i := 0; i < min(m, n+kL); i++ {
  187. l := max(0, kL-i)
  188. u := min(nCol, n+kL-i)
  189. off := max(0, i-kL)
  190. atmp := a[i*lda+l : i*lda+u]
  191. tmp := alpha * x[i]
  192. jy := ky
  193. for _, v := range atmp {
  194. y[jy+off*incY] += tmp * v
  195. jy += incY
  196. }
  197. }
  198. return
  199. }
  200. ix := kx
  201. for i := 0; i < min(m, n+kL); i++ {
  202. l := max(0, kL-i)
  203. u := min(nCol, n+kL-i)
  204. off := max(0, i-kL)
  205. atmp := a[i*lda+l : i*lda+u]
  206. tmp := alpha * x[ix]
  207. jy := ky
  208. for _, v := range atmp {
  209. y[jy+off*incY] += tmp * v
  210. jy += incY
  211. }
  212. ix += incX
  213. }
  214. }
  215. // Strmv performs one of the matrix-vector operations
  216. // x = A * x if tA == blas.NoTrans
  217. // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans
  218. // where A is an n×n triangular matrix, and x is a vector.
  219. //
  220. // Float32 implementations are autogenerated and not directly tested.
  221. func (Implementation) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) {
  222. if ul != blas.Lower && ul != blas.Upper {
  223. panic(badUplo)
  224. }
  225. if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
  226. panic(badTranspose)
  227. }
  228. if d != blas.NonUnit && d != blas.Unit {
  229. panic(badDiag)
  230. }
  231. if n < 0 {
  232. panic(nLT0)
  233. }
  234. if lda < max(1, n) {
  235. panic(badLdA)
  236. }
  237. if incX == 0 {
  238. panic(zeroIncX)
  239. }
  240. // Quick return if possible.
  241. if n == 0 {
  242. return
  243. }
  244. // For zero matrix size the following slice length checks are trivially satisfied.
  245. if len(a) < lda*(n-1)+n {
  246. panic(shortA)
  247. }
  248. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  249. panic(shortX)
  250. }
  251. nonUnit := d != blas.Unit
  252. if n == 1 {
  253. if nonUnit {
  254. x[0] *= a[0]
  255. }
  256. return
  257. }
  258. var kx int
  259. if incX <= 0 {
  260. kx = -(n - 1) * incX
  261. }
  262. if tA == blas.NoTrans {
  263. if ul == blas.Upper {
  264. if incX == 1 {
  265. for i := 0; i < n; i++ {
  266. ilda := i * lda
  267. var tmp float32
  268. if nonUnit {
  269. tmp = a[ilda+i] * x[i]
  270. } else {
  271. tmp = x[i]
  272. }
  273. x[i] = tmp + f32.DotUnitary(a[ilda+i+1:ilda+n], x[i+1:n])
  274. }
  275. return
  276. }
  277. ix := kx
  278. for i := 0; i < n; i++ {
  279. ilda := i * lda
  280. var tmp float32
  281. if nonUnit {
  282. tmp = a[ilda+i] * x[ix]
  283. } else {
  284. tmp = x[ix]
  285. }
  286. x[ix] = tmp + f32.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0)
  287. ix += incX
  288. }
  289. return
  290. }
  291. if incX == 1 {
  292. for i := n - 1; i >= 0; i-- {
  293. ilda := i * lda
  294. var tmp float32
  295. if nonUnit {
  296. tmp += a[ilda+i] * x[i]
  297. } else {
  298. tmp = x[i]
  299. }
  300. x[i] = tmp + f32.DotUnitary(a[ilda:ilda+i], x[:i])
  301. }
  302. return
  303. }
  304. ix := kx + (n-1)*incX
  305. for i := n - 1; i >= 0; i-- {
  306. ilda := i * lda
  307. var tmp float32
  308. if nonUnit {
  309. tmp = a[ilda+i] * x[ix]
  310. } else {
  311. tmp = x[ix]
  312. }
  313. x[ix] = tmp + f32.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0)
  314. ix -= incX
  315. }
  316. return
  317. }
  318. // Cases where a is transposed.
  319. if ul == blas.Upper {
  320. if incX == 1 {
  321. for i := n - 1; i >= 0; i-- {
  322. ilda := i * lda
  323. xi := x[i]
  324. f32.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n])
  325. if nonUnit {
  326. x[i] *= a[ilda+i]
  327. }
  328. }
  329. return
  330. }
  331. ix := kx + (n-1)*incX
  332. for i := n - 1; i >= 0; i-- {
  333. ilda := i * lda
  334. xi := x[ix]
  335. f32.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX))
  336. if nonUnit {
  337. x[ix] *= a[ilda+i]
  338. }
  339. ix -= incX
  340. }
  341. return
  342. }
  343. if incX == 1 {
  344. for i := 0; i < n; i++ {
  345. ilda := i * lda
  346. xi := x[i]
  347. f32.AxpyUnitary(xi, a[ilda:ilda+i], x[:i])
  348. if nonUnit {
  349. x[i] *= a[i*lda+i]
  350. }
  351. }
  352. return
  353. }
  354. ix := kx
  355. for i := 0; i < n; i++ {
  356. ilda := i * lda
  357. xi := x[ix]
  358. f32.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx))
  359. if nonUnit {
  360. x[ix] *= a[ilda+i]
  361. }
  362. ix += incX
  363. }
  364. }
  365. // Strsv solves one of the systems of equations
  366. // A * x = b if tA == blas.NoTrans
  367. // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans
  368. // where A is an n×n triangular matrix, and x and b are vectors.
  369. //
  370. // At entry to the function, x contains the values of b, and the result is
  371. // stored in-place into x.
  372. //
  373. // No test for singularity or near-singularity is included in this
  374. // routine. Such tests must be performed before calling this routine.
  375. //
  376. // Float32 implementations are autogenerated and not directly tested.
  377. func (Implementation) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) {
  378. if ul != blas.Lower && ul != blas.Upper {
  379. panic(badUplo)
  380. }
  381. if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
  382. panic(badTranspose)
  383. }
  384. if d != blas.NonUnit && d != blas.Unit {
  385. panic(badDiag)
  386. }
  387. if n < 0 {
  388. panic(nLT0)
  389. }
  390. if lda < max(1, n) {
  391. panic(badLdA)
  392. }
  393. if incX == 0 {
  394. panic(zeroIncX)
  395. }
  396. // Quick return if possible.
  397. if n == 0 {
  398. return
  399. }
  400. // For zero matrix size the following slice length checks are trivially satisfied.
  401. if len(a) < lda*(n-1)+n {
  402. panic(shortA)
  403. }
  404. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  405. panic(shortX)
  406. }
  407. if n == 1 {
  408. if d == blas.NonUnit {
  409. x[0] /= a[0]
  410. }
  411. return
  412. }
  413. var kx int
  414. if incX < 0 {
  415. kx = -(n - 1) * incX
  416. }
  417. nonUnit := d == blas.NonUnit
  418. if tA == blas.NoTrans {
  419. if ul == blas.Upper {
  420. if incX == 1 {
  421. for i := n - 1; i >= 0; i-- {
  422. var sum float32
  423. atmp := a[i*lda+i+1 : i*lda+n]
  424. for j, v := range atmp {
  425. jv := i + j + 1
  426. sum += x[jv] * v
  427. }
  428. x[i] -= sum
  429. if nonUnit {
  430. x[i] /= a[i*lda+i]
  431. }
  432. }
  433. return
  434. }
  435. ix := kx + (n-1)*incX
  436. for i := n - 1; i >= 0; i-- {
  437. var sum float32
  438. jx := ix + incX
  439. atmp := a[i*lda+i+1 : i*lda+n]
  440. for _, v := range atmp {
  441. sum += x[jx] * v
  442. jx += incX
  443. }
  444. x[ix] -= sum
  445. if nonUnit {
  446. x[ix] /= a[i*lda+i]
  447. }
  448. ix -= incX
  449. }
  450. return
  451. }
  452. if incX == 1 {
  453. for i := 0; i < n; i++ {
  454. var sum float32
  455. atmp := a[i*lda : i*lda+i]
  456. for j, v := range atmp {
  457. sum += x[j] * v
  458. }
  459. x[i] -= sum
  460. if nonUnit {
  461. x[i] /= a[i*lda+i]
  462. }
  463. }
  464. return
  465. }
  466. ix := kx
  467. for i := 0; i < n; i++ {
  468. jx := kx
  469. var sum float32
  470. atmp := a[i*lda : i*lda+i]
  471. for _, v := range atmp {
  472. sum += x[jx] * v
  473. jx += incX
  474. }
  475. x[ix] -= sum
  476. if nonUnit {
  477. x[ix] /= a[i*lda+i]
  478. }
  479. ix += incX
  480. }
  481. return
  482. }
  483. // Cases where a is transposed.
  484. if ul == blas.Upper {
  485. if incX == 1 {
  486. for i := 0; i < n; i++ {
  487. if nonUnit {
  488. x[i] /= a[i*lda+i]
  489. }
  490. xi := x[i]
  491. atmp := a[i*lda+i+1 : i*lda+n]
  492. for j, v := range atmp {
  493. jv := j + i + 1
  494. x[jv] -= v * xi
  495. }
  496. }
  497. return
  498. }
  499. ix := kx
  500. for i := 0; i < n; i++ {
  501. if nonUnit {
  502. x[ix] /= a[i*lda+i]
  503. }
  504. xi := x[ix]
  505. jx := kx + (i+1)*incX
  506. atmp := a[i*lda+i+1 : i*lda+n]
  507. for _, v := range atmp {
  508. x[jx] -= v * xi
  509. jx += incX
  510. }
  511. ix += incX
  512. }
  513. return
  514. }
  515. if incX == 1 {
  516. for i := n - 1; i >= 0; i-- {
  517. if nonUnit {
  518. x[i] /= a[i*lda+i]
  519. }
  520. xi := x[i]
  521. atmp := a[i*lda : i*lda+i]
  522. for j, v := range atmp {
  523. x[j] -= v * xi
  524. }
  525. }
  526. return
  527. }
  528. ix := kx + (n-1)*incX
  529. for i := n - 1; i >= 0; i-- {
  530. if nonUnit {
  531. x[ix] /= a[i*lda+i]
  532. }
  533. xi := x[ix]
  534. jx := kx
  535. atmp := a[i*lda : i*lda+i]
  536. for _, v := range atmp {
  537. x[jx] -= v * xi
  538. jx += incX
  539. }
  540. ix -= incX
  541. }
  542. }
  543. // Ssymv performs the matrix-vector operation
  544. // y = alpha * A * x + beta * y
  545. // where A is an n×n symmetric matrix, x and y are vectors, and alpha and
  546. // beta are scalars.
  547. //
  548. // Float32 implementations are autogenerated and not directly tested.
  549. func (Implementation) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
  550. if ul != blas.Lower && ul != blas.Upper {
  551. panic(badUplo)
  552. }
  553. if n < 0 {
  554. panic(nLT0)
  555. }
  556. if lda < max(1, n) {
  557. panic(badLdA)
  558. }
  559. if incX == 0 {
  560. panic(zeroIncX)
  561. }
  562. if incY == 0 {
  563. panic(zeroIncY)
  564. }
  565. // Quick return if possible.
  566. if n == 0 {
  567. return
  568. }
  569. // For zero matrix size the following slice length checks are trivially satisfied.
  570. if len(a) < lda*(n-1)+n {
  571. panic(shortA)
  572. }
  573. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  574. panic(shortX)
  575. }
  576. if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
  577. panic(shortY)
  578. }
  579. // Quick return if possible.
  580. if alpha == 0 && beta == 1 {
  581. return
  582. }
  583. // Set up start points
  584. var kx, ky int
  585. if incX < 0 {
  586. kx = -(n - 1) * incX
  587. }
  588. if incY < 0 {
  589. ky = -(n - 1) * incY
  590. }
  591. // Form y = beta * y
  592. if beta != 1 {
  593. if incY == 1 {
  594. if beta == 0 {
  595. for i := range y[:n] {
  596. y[i] = 0
  597. }
  598. } else {
  599. f32.ScalUnitary(beta, y[:n])
  600. }
  601. } else {
  602. iy := ky
  603. if beta == 0 {
  604. for i := 0; i < n; i++ {
  605. y[iy] = 0
  606. iy += incY
  607. }
  608. } else {
  609. if incY > 0 {
  610. f32.ScalInc(beta, y, uintptr(n), uintptr(incY))
  611. } else {
  612. f32.ScalInc(beta, y, uintptr(n), uintptr(-incY))
  613. }
  614. }
  615. }
  616. }
  617. if alpha == 0 {
  618. return
  619. }
  620. if n == 1 {
  621. y[0] += alpha * a[0] * x[0]
  622. return
  623. }
  624. if ul == blas.Upper {
  625. if incX == 1 {
  626. iy := ky
  627. for i := 0; i < n; i++ {
  628. xv := x[i] * alpha
  629. sum := x[i] * a[i*lda+i]
  630. jy := ky + (i+1)*incY
  631. atmp := a[i*lda+i+1 : i*lda+n]
  632. for j, v := range atmp {
  633. jp := j + i + 1
  634. sum += x[jp] * v
  635. y[jy] += xv * v
  636. jy += incY
  637. }
  638. y[iy] += alpha * sum
  639. iy += incY
  640. }
  641. return
  642. }
  643. ix := kx
  644. iy := ky
  645. for i := 0; i < n; i++ {
  646. xv := x[ix] * alpha
  647. sum := x[ix] * a[i*lda+i]
  648. jx := kx + (i+1)*incX
  649. jy := ky + (i+1)*incY
  650. atmp := a[i*lda+i+1 : i*lda+n]
  651. for _, v := range atmp {
  652. sum += x[jx] * v
  653. y[jy] += xv * v
  654. jx += incX
  655. jy += incY
  656. }
  657. y[iy] += alpha * sum
  658. ix += incX
  659. iy += incY
  660. }
  661. return
  662. }
  663. // Cases where a is lower triangular.
  664. if incX == 1 {
  665. iy := ky
  666. for i := 0; i < n; i++ {
  667. jy := ky
  668. xv := alpha * x[i]
  669. atmp := a[i*lda : i*lda+i]
  670. var sum float32
  671. for j, v := range atmp {
  672. sum += x[j] * v
  673. y[jy] += xv * v
  674. jy += incY
  675. }
  676. sum += x[i] * a[i*lda+i]
  677. sum *= alpha
  678. y[iy] += sum
  679. iy += incY
  680. }
  681. return
  682. }
  683. ix := kx
  684. iy := ky
  685. for i := 0; i < n; i++ {
  686. jx := kx
  687. jy := ky
  688. xv := alpha * x[ix]
  689. atmp := a[i*lda : i*lda+i]
  690. var sum float32
  691. for _, v := range atmp {
  692. sum += x[jx] * v
  693. y[jy] += xv * v
  694. jx += incX
  695. jy += incY
  696. }
  697. sum += x[ix] * a[i*lda+i]
  698. sum *= alpha
  699. y[iy] += sum
  700. ix += incX
  701. iy += incY
  702. }
  703. }
  704. // Stbmv performs one of the matrix-vector operations
  705. // x = A * x if tA == blas.NoTrans
  706. // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans
  707. // where A is an n×n triangular band matrix with k+1 diagonals, and x is a vector.
  708. //
  709. // Float32 implementations are autogenerated and not directly tested.
  710. func (Implementation) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) {
  711. if ul != blas.Lower && ul != blas.Upper {
  712. panic(badUplo)
  713. }
  714. if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
  715. panic(badTranspose)
  716. }
  717. if d != blas.NonUnit && d != blas.Unit {
  718. panic(badDiag)
  719. }
  720. if n < 0 {
  721. panic(nLT0)
  722. }
  723. if k < 0 {
  724. panic(kLT0)
  725. }
  726. if lda < k+1 {
  727. panic(badLdA)
  728. }
  729. if incX == 0 {
  730. panic(zeroIncX)
  731. }
  732. // Quick return if possible.
  733. if n == 0 {
  734. return
  735. }
  736. // For zero matrix size the following slice length checks are trivially satisfied.
  737. if len(a) < lda*(n-1)+k+1 {
  738. panic(shortA)
  739. }
  740. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  741. panic(shortX)
  742. }
  743. var kx int
  744. if incX < 0 {
  745. kx = -(n - 1) * incX
  746. }
  747. nonunit := d != blas.Unit
  748. if tA == blas.NoTrans {
  749. if ul == blas.Upper {
  750. if incX == 1 {
  751. for i := 0; i < n; i++ {
  752. u := min(1+k, n-i)
  753. var sum float32
  754. atmp := a[i*lda:]
  755. xtmp := x[i:]
  756. for j := 1; j < u; j++ {
  757. sum += xtmp[j] * atmp[j]
  758. }
  759. if nonunit {
  760. sum += xtmp[0] * atmp[0]
  761. } else {
  762. sum += xtmp[0]
  763. }
  764. x[i] = sum
  765. }
  766. return
  767. }
  768. ix := kx
  769. for i := 0; i < n; i++ {
  770. u := min(1+k, n-i)
  771. var sum float32
  772. atmp := a[i*lda:]
  773. jx := incX
  774. for j := 1; j < u; j++ {
  775. sum += x[ix+jx] * atmp[j]
  776. jx += incX
  777. }
  778. if nonunit {
  779. sum += x[ix] * atmp[0]
  780. } else {
  781. sum += x[ix]
  782. }
  783. x[ix] = sum
  784. ix += incX
  785. }
  786. return
  787. }
  788. if incX == 1 {
  789. for i := n - 1; i >= 0; i-- {
  790. l := max(0, k-i)
  791. atmp := a[i*lda:]
  792. var sum float32
  793. for j := l; j < k; j++ {
  794. sum += x[i-k+j] * atmp[j]
  795. }
  796. if nonunit {
  797. sum += x[i] * atmp[k]
  798. } else {
  799. sum += x[i]
  800. }
  801. x[i] = sum
  802. }
  803. return
  804. }
  805. ix := kx + (n-1)*incX
  806. for i := n - 1; i >= 0; i-- {
  807. l := max(0, k-i)
  808. atmp := a[i*lda:]
  809. var sum float32
  810. jx := l * incX
  811. for j := l; j < k; j++ {
  812. sum += x[ix-k*incX+jx] * atmp[j]
  813. jx += incX
  814. }
  815. if nonunit {
  816. sum += x[ix] * atmp[k]
  817. } else {
  818. sum += x[ix]
  819. }
  820. x[ix] = sum
  821. ix -= incX
  822. }
  823. return
  824. }
  825. if ul == blas.Upper {
  826. if incX == 1 {
  827. for i := n - 1; i >= 0; i-- {
  828. u := k + 1
  829. if i < u {
  830. u = i + 1
  831. }
  832. var sum float32
  833. for j := 1; j < u; j++ {
  834. sum += x[i-j] * a[(i-j)*lda+j]
  835. }
  836. if nonunit {
  837. sum += x[i] * a[i*lda]
  838. } else {
  839. sum += x[i]
  840. }
  841. x[i] = sum
  842. }
  843. return
  844. }
  845. ix := kx + (n-1)*incX
  846. for i := n - 1; i >= 0; i-- {
  847. u := k + 1
  848. if i < u {
  849. u = i + 1
  850. }
  851. var sum float32
  852. jx := incX
  853. for j := 1; j < u; j++ {
  854. sum += x[ix-jx] * a[(i-j)*lda+j]
  855. jx += incX
  856. }
  857. if nonunit {
  858. sum += x[ix] * a[i*lda]
  859. } else {
  860. sum += x[ix]
  861. }
  862. x[ix] = sum
  863. ix -= incX
  864. }
  865. return
  866. }
  867. if incX == 1 {
  868. for i := 0; i < n; i++ {
  869. u := k
  870. if i+k >= n {
  871. u = n - i - 1
  872. }
  873. var sum float32
  874. for j := 0; j < u; j++ {
  875. sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1]
  876. }
  877. if nonunit {
  878. sum += x[i] * a[i*lda+k]
  879. } else {
  880. sum += x[i]
  881. }
  882. x[i] = sum
  883. }
  884. return
  885. }
  886. ix := kx
  887. for i := 0; i < n; i++ {
  888. u := k
  889. if i+k >= n {
  890. u = n - i - 1
  891. }
  892. var (
  893. sum float32
  894. jx int
  895. )
  896. for j := 0; j < u; j++ {
  897. sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1]
  898. jx += incX
  899. }
  900. if nonunit {
  901. sum += x[ix] * a[i*lda+k]
  902. } else {
  903. sum += x[ix]
  904. }
  905. x[ix] = sum
  906. ix += incX
  907. }
  908. }
  909. // Stpmv performs one of the matrix-vector operations
  910. // x = A * x if tA == blas.NoTrans
  911. // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans
  912. // where A is an n×n triangular matrix in packed format, and x is a vector.
  913. //
  914. // Float32 implementations are autogenerated and not directly tested.
  915. func (Implementation) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) {
  916. if ul != blas.Lower && ul != blas.Upper {
  917. panic(badUplo)
  918. }
  919. if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
  920. panic(badTranspose)
  921. }
  922. if d != blas.NonUnit && d != blas.Unit {
  923. panic(badDiag)
  924. }
  925. if n < 0 {
  926. panic(nLT0)
  927. }
  928. if incX == 0 {
  929. panic(zeroIncX)
  930. }
  931. // Quick return if possible.
  932. if n == 0 {
  933. return
  934. }
  935. // For zero matrix size the following slice length checks are trivially satisfied.
  936. if len(ap) < n*(n+1)/2 {
  937. panic(shortAP)
  938. }
  939. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  940. panic(shortX)
  941. }
  942. var kx int
  943. if incX < 0 {
  944. kx = -(n - 1) * incX
  945. }
  946. nonUnit := d == blas.NonUnit
  947. var offset int // Offset is the index of (i,i)
  948. if tA == blas.NoTrans {
  949. if ul == blas.Upper {
  950. if incX == 1 {
  951. for i := 0; i < n; i++ {
  952. xi := x[i]
  953. if nonUnit {
  954. xi *= ap[offset]
  955. }
  956. atmp := ap[offset+1 : offset+n-i]
  957. xtmp := x[i+1:]
  958. for j, v := range atmp {
  959. xi += v * xtmp[j]
  960. }
  961. x[i] = xi
  962. offset += n - i
  963. }
  964. return
  965. }
  966. ix := kx
  967. for i := 0; i < n; i++ {
  968. xix := x[ix]
  969. if nonUnit {
  970. xix *= ap[offset]
  971. }
  972. atmp := ap[offset+1 : offset+n-i]
  973. jx := kx + (i+1)*incX
  974. for _, v := range atmp {
  975. xix += v * x[jx]
  976. jx += incX
  977. }
  978. x[ix] = xix
  979. offset += n - i
  980. ix += incX
  981. }
  982. return
  983. }
  984. if incX == 1 {
  985. offset = n*(n+1)/2 - 1
  986. for i := n - 1; i >= 0; i-- {
  987. xi := x[i]
  988. if nonUnit {
  989. xi *= ap[offset]
  990. }
  991. atmp := ap[offset-i : offset]
  992. for j, v := range atmp {
  993. xi += v * x[j]
  994. }
  995. x[i] = xi
  996. offset -= i + 1
  997. }
  998. return
  999. }
  1000. ix := kx + (n-1)*incX
  1001. offset = n*(n+1)/2 - 1
  1002. for i := n - 1; i >= 0; i-- {
  1003. xix := x[ix]
  1004. if nonUnit {
  1005. xix *= ap[offset]
  1006. }
  1007. atmp := ap[offset-i : offset]
  1008. jx := kx
  1009. for _, v := range atmp {
  1010. xix += v * x[jx]
  1011. jx += incX
  1012. }
  1013. x[ix] = xix
  1014. offset -= i + 1
  1015. ix -= incX
  1016. }
  1017. return
  1018. }
  1019. // Cases where ap is transposed.
  1020. if ul == blas.Upper {
  1021. if incX == 1 {
  1022. offset = n*(n+1)/2 - 1
  1023. for i := n - 1; i >= 0; i-- {
  1024. xi := x[i]
  1025. atmp := ap[offset+1 : offset+n-i]
  1026. xtmp := x[i+1:]
  1027. for j, v := range atmp {
  1028. xtmp[j] += v * xi
  1029. }
  1030. if nonUnit {
  1031. x[i] *= ap[offset]
  1032. }
  1033. offset -= n - i + 1
  1034. }
  1035. return
  1036. }
  1037. ix := kx + (n-1)*incX
  1038. offset = n*(n+1)/2 - 1
  1039. for i := n - 1; i >= 0; i-- {
  1040. xix := x[ix]
  1041. jx := kx + (i+1)*incX
  1042. atmp := ap[offset+1 : offset+n-i]
  1043. for _, v := range atmp {
  1044. x[jx] += v * xix
  1045. jx += incX
  1046. }
  1047. if nonUnit {
  1048. x[ix] *= ap[offset]
  1049. }
  1050. offset -= n - i + 1
  1051. ix -= incX
  1052. }
  1053. return
  1054. }
  1055. if incX == 1 {
  1056. for i := 0; i < n; i++ {
  1057. xi := x[i]
  1058. atmp := ap[offset-i : offset]
  1059. for j, v := range atmp {
  1060. x[j] += v * xi
  1061. }
  1062. if nonUnit {
  1063. x[i] *= ap[offset]
  1064. }
  1065. offset += i + 2
  1066. }
  1067. return
  1068. }
  1069. ix := kx
  1070. for i := 0; i < n; i++ {
  1071. xix := x[ix]
  1072. jx := kx
  1073. atmp := ap[offset-i : offset]
  1074. for _, v := range atmp {
  1075. x[jx] += v * xix
  1076. jx += incX
  1077. }
  1078. if nonUnit {
  1079. x[ix] *= ap[offset]
  1080. }
  1081. ix += incX
  1082. offset += i + 2
  1083. }
  1084. }
  1085. // Stbsv solves one of the systems of equations
  1086. // A * x = b if tA == blas.NoTrans
  1087. // Aᵀ * x = b if tA == blas.Trans or tA == blas.ConjTrans
  1088. // where A is an n×n triangular band matrix with k+1 diagonals,
  1089. // and x and b are vectors.
  1090. //
  1091. // At entry to the function, x contains the values of b, and the result is
  1092. // stored in-place into x.
  1093. //
  1094. // No test for singularity or near-singularity is included in this
  1095. // routine. Such tests must be performed before calling this routine.
  1096. //
  1097. // Float32 implementations are autogenerated and not directly tested.
  1098. func (Implementation) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) {
  1099. if ul != blas.Lower && ul != blas.Upper {
  1100. panic(badUplo)
  1101. }
  1102. if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
  1103. panic(badTranspose)
  1104. }
  1105. if d != blas.NonUnit && d != blas.Unit {
  1106. panic(badDiag)
  1107. }
  1108. if n < 0 {
  1109. panic(nLT0)
  1110. }
  1111. if k < 0 {
  1112. panic(kLT0)
  1113. }
  1114. if lda < k+1 {
  1115. panic(badLdA)
  1116. }
  1117. if incX == 0 {
  1118. panic(zeroIncX)
  1119. }
  1120. // Quick return if possible.
  1121. if n == 0 {
  1122. return
  1123. }
  1124. // For zero matrix size the following slice length checks are trivially satisfied.
  1125. if len(a) < lda*(n-1)+k+1 {
  1126. panic(shortA)
  1127. }
  1128. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1129. panic(shortX)
  1130. }
  1131. var kx int
  1132. if incX < 0 {
  1133. kx = -(n - 1) * incX
  1134. }
  1135. nonUnit := d == blas.NonUnit
  1136. // Form x = A^-1 x.
  1137. // Several cases below use subslices for speed improvement.
  1138. // The incX != 1 cases usually do not because incX may be negative.
  1139. if tA == blas.NoTrans {
  1140. if ul == blas.Upper {
  1141. if incX == 1 {
  1142. for i := n - 1; i >= 0; i-- {
  1143. bands := k
  1144. if i+bands >= n {
  1145. bands = n - i - 1
  1146. }
  1147. atmp := a[i*lda+1:]
  1148. xtmp := x[i+1 : i+bands+1]
  1149. var sum float32
  1150. for j, v := range xtmp {
  1151. sum += v * atmp[j]
  1152. }
  1153. x[i] -= sum
  1154. if nonUnit {
  1155. x[i] /= a[i*lda]
  1156. }
  1157. }
  1158. return
  1159. }
  1160. ix := kx + (n-1)*incX
  1161. for i := n - 1; i >= 0; i-- {
  1162. max := k + 1
  1163. if i+max > n {
  1164. max = n - i
  1165. }
  1166. atmp := a[i*lda:]
  1167. var (
  1168. jx int
  1169. sum float32
  1170. )
  1171. for j := 1; j < max; j++ {
  1172. jx += incX
  1173. sum += x[ix+jx] * atmp[j]
  1174. }
  1175. x[ix] -= sum
  1176. if nonUnit {
  1177. x[ix] /= atmp[0]
  1178. }
  1179. ix -= incX
  1180. }
  1181. return
  1182. }
  1183. if incX == 1 {
  1184. for i := 0; i < n; i++ {
  1185. bands := k
  1186. if i-k < 0 {
  1187. bands = i
  1188. }
  1189. atmp := a[i*lda+k-bands:]
  1190. xtmp := x[i-bands : i]
  1191. var sum float32
  1192. for j, v := range xtmp {
  1193. sum += v * atmp[j]
  1194. }
  1195. x[i] -= sum
  1196. if nonUnit {
  1197. x[i] /= atmp[bands]
  1198. }
  1199. }
  1200. return
  1201. }
  1202. ix := kx
  1203. for i := 0; i < n; i++ {
  1204. bands := k
  1205. if i-k < 0 {
  1206. bands = i
  1207. }
  1208. atmp := a[i*lda+k-bands:]
  1209. var (
  1210. sum float32
  1211. jx int
  1212. )
  1213. for j := 0; j < bands; j++ {
  1214. sum += x[ix-bands*incX+jx] * atmp[j]
  1215. jx += incX
  1216. }
  1217. x[ix] -= sum
  1218. if nonUnit {
  1219. x[ix] /= atmp[bands]
  1220. }
  1221. ix += incX
  1222. }
  1223. return
  1224. }
  1225. // Cases where a is transposed.
  1226. if ul == blas.Upper {
  1227. if incX == 1 {
  1228. for i := 0; i < n; i++ {
  1229. bands := k
  1230. if i-k < 0 {
  1231. bands = i
  1232. }
  1233. var sum float32
  1234. for j := 0; j < bands; j++ {
  1235. sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j]
  1236. }
  1237. x[i] -= sum
  1238. if nonUnit {
  1239. x[i] /= a[i*lda]
  1240. }
  1241. }
  1242. return
  1243. }
  1244. ix := kx
  1245. for i := 0; i < n; i++ {
  1246. bands := k
  1247. if i-k < 0 {
  1248. bands = i
  1249. }
  1250. var (
  1251. sum float32
  1252. jx int
  1253. )
  1254. for j := 0; j < bands; j++ {
  1255. sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j]
  1256. jx += incX
  1257. }
  1258. x[ix] -= sum
  1259. if nonUnit {
  1260. x[ix] /= a[i*lda]
  1261. }
  1262. ix += incX
  1263. }
  1264. return
  1265. }
  1266. if incX == 1 {
  1267. for i := n - 1; i >= 0; i-- {
  1268. bands := k
  1269. if i+bands >= n {
  1270. bands = n - i - 1
  1271. }
  1272. var sum float32
  1273. xtmp := x[i+1 : i+1+bands]
  1274. for j, v := range xtmp {
  1275. sum += v * a[(i+j+1)*lda+k-j-1]
  1276. }
  1277. x[i] -= sum
  1278. if nonUnit {
  1279. x[i] /= a[i*lda+k]
  1280. }
  1281. }
  1282. return
  1283. }
  1284. ix := kx + (n-1)*incX
  1285. for i := n - 1; i >= 0; i-- {
  1286. bands := k
  1287. if i+bands >= n {
  1288. bands = n - i - 1
  1289. }
  1290. var (
  1291. sum float32
  1292. jx int
  1293. )
  1294. for j := 0; j < bands; j++ {
  1295. sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1]
  1296. jx += incX
  1297. }
  1298. x[ix] -= sum
  1299. if nonUnit {
  1300. x[ix] /= a[i*lda+k]
  1301. }
  1302. ix -= incX
  1303. }
  1304. }
  1305. // Ssbmv performs the matrix-vector operation
  1306. // y = alpha * A * x + beta * y
  1307. // where A is an n×n symmetric band matrix with k super-diagonals, x and y are
  1308. // vectors, and alpha and beta are scalars.
  1309. //
  1310. // Float32 implementations are autogenerated and not directly tested.
  1311. func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
  1312. if ul != blas.Lower && ul != blas.Upper {
  1313. panic(badUplo)
  1314. }
  1315. if n < 0 {
  1316. panic(nLT0)
  1317. }
  1318. if k < 0 {
  1319. panic(kLT0)
  1320. }
  1321. if lda < k+1 {
  1322. panic(badLdA)
  1323. }
  1324. if incX == 0 {
  1325. panic(zeroIncX)
  1326. }
  1327. if incY == 0 {
  1328. panic(zeroIncY)
  1329. }
  1330. // Quick return if possible.
  1331. if n == 0 {
  1332. return
  1333. }
  1334. // For zero matrix size the following slice length checks are trivially satisfied.
  1335. if len(a) < lda*(n-1)+k+1 {
  1336. panic(shortA)
  1337. }
  1338. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1339. panic(shortX)
  1340. }
  1341. if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
  1342. panic(shortY)
  1343. }
  1344. // Quick return if possible.
  1345. if alpha == 0 && beta == 1 {
  1346. return
  1347. }
  1348. // Set up indexes
  1349. lenX := n
  1350. lenY := n
  1351. var kx, ky int
  1352. if incX < 0 {
  1353. kx = -(lenX - 1) * incX
  1354. }
  1355. if incY < 0 {
  1356. ky = -(lenY - 1) * incY
  1357. }
  1358. // Form y = beta * y.
  1359. if beta != 1 {
  1360. if incY == 1 {
  1361. if beta == 0 {
  1362. for i := range y[:n] {
  1363. y[i] = 0
  1364. }
  1365. } else {
  1366. f32.ScalUnitary(beta, y[:n])
  1367. }
  1368. } else {
  1369. iy := ky
  1370. if beta == 0 {
  1371. for i := 0; i < n; i++ {
  1372. y[iy] = 0
  1373. iy += incY
  1374. }
  1375. } else {
  1376. if incY > 0 {
  1377. f32.ScalInc(beta, y, uintptr(n), uintptr(incY))
  1378. } else {
  1379. f32.ScalInc(beta, y, uintptr(n), uintptr(-incY))
  1380. }
  1381. }
  1382. }
  1383. }
  1384. if alpha == 0 {
  1385. return
  1386. }
  1387. if ul == blas.Upper {
  1388. if incX == 1 {
  1389. iy := ky
  1390. for i := 0; i < n; i++ {
  1391. atmp := a[i*lda:]
  1392. tmp := alpha * x[i]
  1393. sum := tmp * atmp[0]
  1394. u := min(k, n-i-1)
  1395. jy := incY
  1396. for j := 1; j <= u; j++ {
  1397. v := atmp[j]
  1398. sum += alpha * x[i+j] * v
  1399. y[iy+jy] += tmp * v
  1400. jy += incY
  1401. }
  1402. y[iy] += sum
  1403. iy += incY
  1404. }
  1405. return
  1406. }
  1407. ix := kx
  1408. iy := ky
  1409. for i := 0; i < n; i++ {
  1410. atmp := a[i*lda:]
  1411. tmp := alpha * x[ix]
  1412. sum := tmp * atmp[0]
  1413. u := min(k, n-i-1)
  1414. jx := incX
  1415. jy := incY
  1416. for j := 1; j <= u; j++ {
  1417. v := atmp[j]
  1418. sum += alpha * x[ix+jx] * v
  1419. y[iy+jy] += tmp * v
  1420. jx += incX
  1421. jy += incY
  1422. }
  1423. y[iy] += sum
  1424. ix += incX
  1425. iy += incY
  1426. }
  1427. return
  1428. }
  1429. // Casses where a has bands below the diagonal.
  1430. if incX == 1 {
  1431. iy := ky
  1432. for i := 0; i < n; i++ {
  1433. l := max(0, k-i)
  1434. tmp := alpha * x[i]
  1435. jy := l * incY
  1436. atmp := a[i*lda:]
  1437. for j := l; j < k; j++ {
  1438. v := atmp[j]
  1439. y[iy] += alpha * v * x[i-k+j]
  1440. y[iy-k*incY+jy] += tmp * v
  1441. jy += incY
  1442. }
  1443. y[iy] += tmp * atmp[k]
  1444. iy += incY
  1445. }
  1446. return
  1447. }
  1448. ix := kx
  1449. iy := ky
  1450. for i := 0; i < n; i++ {
  1451. l := max(0, k-i)
  1452. tmp := alpha * x[ix]
  1453. jx := l * incX
  1454. jy := l * incY
  1455. atmp := a[i*lda:]
  1456. for j := l; j < k; j++ {
  1457. v := atmp[j]
  1458. y[iy] += alpha * v * x[ix-k*incX+jx]
  1459. y[iy-k*incY+jy] += tmp * v
  1460. jx += incX
  1461. jy += incY
  1462. }
  1463. y[iy] += tmp * atmp[k]
  1464. ix += incX
  1465. iy += incY
  1466. }
  1467. }
  1468. // Ssyr performs the symmetric rank-one update
  1469. // A += alpha * x * xᵀ
  1470. // where A is an n×n symmetric matrix, and x is a vector.
  1471. //
  1472. // Float32 implementations are autogenerated and not directly tested.
  1473. func (Implementation) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) {
  1474. if ul != blas.Lower && ul != blas.Upper {
  1475. panic(badUplo)
  1476. }
  1477. if n < 0 {
  1478. panic(nLT0)
  1479. }
  1480. if lda < max(1, n) {
  1481. panic(badLdA)
  1482. }
  1483. if incX == 0 {
  1484. panic(zeroIncX)
  1485. }
  1486. // Quick return if possible.
  1487. if n == 0 {
  1488. return
  1489. }
  1490. // For zero matrix size the following slice length checks are trivially satisfied.
  1491. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1492. panic(shortX)
  1493. }
  1494. if len(a) < lda*(n-1)+n {
  1495. panic(shortA)
  1496. }
  1497. // Quick return if possible.
  1498. if alpha == 0 {
  1499. return
  1500. }
  1501. lenX := n
  1502. var kx int
  1503. if incX < 0 {
  1504. kx = -(lenX - 1) * incX
  1505. }
  1506. if ul == blas.Upper {
  1507. if incX == 1 {
  1508. for i := 0; i < n; i++ {
  1509. tmp := x[i] * alpha
  1510. if tmp != 0 {
  1511. atmp := a[i*lda+i : i*lda+n]
  1512. xtmp := x[i:n]
  1513. for j, v := range xtmp {
  1514. atmp[j] += v * tmp
  1515. }
  1516. }
  1517. }
  1518. return
  1519. }
  1520. ix := kx
  1521. for i := 0; i < n; i++ {
  1522. tmp := x[ix] * alpha
  1523. if tmp != 0 {
  1524. jx := ix
  1525. atmp := a[i*lda:]
  1526. for j := i; j < n; j++ {
  1527. atmp[j] += x[jx] * tmp
  1528. jx += incX
  1529. }
  1530. }
  1531. ix += incX
  1532. }
  1533. return
  1534. }
  1535. // Cases where a is lower triangular.
  1536. if incX == 1 {
  1537. for i := 0; i < n; i++ {
  1538. tmp := x[i] * alpha
  1539. if tmp != 0 {
  1540. atmp := a[i*lda:]
  1541. xtmp := x[:i+1]
  1542. for j, v := range xtmp {
  1543. atmp[j] += tmp * v
  1544. }
  1545. }
  1546. }
  1547. return
  1548. }
  1549. ix := kx
  1550. for i := 0; i < n; i++ {
  1551. tmp := x[ix] * alpha
  1552. if tmp != 0 {
  1553. atmp := a[i*lda:]
  1554. jx := kx
  1555. for j := 0; j < i+1; j++ {
  1556. atmp[j] += tmp * x[jx]
  1557. jx += incX
  1558. }
  1559. }
  1560. ix += incX
  1561. }
  1562. }
  1563. // Ssyr2 performs the symmetric rank-two update
  1564. // A += alpha * x * yᵀ + alpha * y * xᵀ
  1565. // where A is an n×n symmetric matrix, x and y are vectors, and alpha is a scalar.
  1566. //
  1567. // Float32 implementations are autogenerated and not directly tested.
  1568. func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) {
  1569. if ul != blas.Lower && ul != blas.Upper {
  1570. panic(badUplo)
  1571. }
  1572. if n < 0 {
  1573. panic(nLT0)
  1574. }
  1575. if lda < max(1, n) {
  1576. panic(badLdA)
  1577. }
  1578. if incX == 0 {
  1579. panic(zeroIncX)
  1580. }
  1581. if incY == 0 {
  1582. panic(zeroIncY)
  1583. }
  1584. // Quick return if possible.
  1585. if n == 0 {
  1586. return
  1587. }
  1588. // For zero matrix size the following slice length checks are trivially satisfied.
  1589. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1590. panic(shortX)
  1591. }
  1592. if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
  1593. panic(shortY)
  1594. }
  1595. if len(a) < lda*(n-1)+n {
  1596. panic(shortA)
  1597. }
  1598. // Quick return if possible.
  1599. if alpha == 0 {
  1600. return
  1601. }
  1602. var ky, kx int
  1603. if incY < 0 {
  1604. ky = -(n - 1) * incY
  1605. }
  1606. if incX < 0 {
  1607. kx = -(n - 1) * incX
  1608. }
  1609. if ul == blas.Upper {
  1610. if incX == 1 && incY == 1 {
  1611. for i := 0; i < n; i++ {
  1612. xi := x[i]
  1613. yi := y[i]
  1614. atmp := a[i*lda:]
  1615. for j := i; j < n; j++ {
  1616. atmp[j] += alpha * (xi*y[j] + x[j]*yi)
  1617. }
  1618. }
  1619. return
  1620. }
  1621. ix := kx
  1622. iy := ky
  1623. for i := 0; i < n; i++ {
  1624. jx := kx + i*incX
  1625. jy := ky + i*incY
  1626. xi := x[ix]
  1627. yi := y[iy]
  1628. atmp := a[i*lda:]
  1629. for j := i; j < n; j++ {
  1630. atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
  1631. jx += incX
  1632. jy += incY
  1633. }
  1634. ix += incX
  1635. iy += incY
  1636. }
  1637. return
  1638. }
  1639. if incX == 1 && incY == 1 {
  1640. for i := 0; i < n; i++ {
  1641. xi := x[i]
  1642. yi := y[i]
  1643. atmp := a[i*lda:]
  1644. for j := 0; j <= i; j++ {
  1645. atmp[j] += alpha * (xi*y[j] + x[j]*yi)
  1646. }
  1647. }
  1648. return
  1649. }
  1650. ix := kx
  1651. iy := ky
  1652. for i := 0; i < n; i++ {
  1653. jx := kx
  1654. jy := ky
  1655. xi := x[ix]
  1656. yi := y[iy]
  1657. atmp := a[i*lda:]
  1658. for j := 0; j <= i; j++ {
  1659. atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
  1660. jx += incX
  1661. jy += incY
  1662. }
  1663. ix += incX
  1664. iy += incY
  1665. }
  1666. }
  1667. // Stpsv solves one of the systems of equations
  1668. // A * x = b if tA == blas.NoTrans
  1669. // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans
  1670. // where A is an n×n triangular matrix in packed format, and x and b are vectors.
  1671. //
  1672. // At entry to the function, x contains the values of b, and the result is
  1673. // stored in-place into x.
  1674. //
  1675. // No test for singularity or near-singularity is included in this
  1676. // routine. Such tests must be performed before calling this routine.
  1677. //
  1678. // Float32 implementations are autogenerated and not directly tested.
  1679. func (Implementation) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) {
  1680. if ul != blas.Lower && ul != blas.Upper {
  1681. panic(badUplo)
  1682. }
  1683. if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
  1684. panic(badTranspose)
  1685. }
  1686. if d != blas.NonUnit && d != blas.Unit {
  1687. panic(badDiag)
  1688. }
  1689. if n < 0 {
  1690. panic(nLT0)
  1691. }
  1692. if incX == 0 {
  1693. panic(zeroIncX)
  1694. }
  1695. // Quick return if possible.
  1696. if n == 0 {
  1697. return
  1698. }
  1699. // For zero matrix size the following slice length checks are trivially satisfied.
  1700. if len(ap) < n*(n+1)/2 {
  1701. panic(shortAP)
  1702. }
  1703. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1704. panic(shortX)
  1705. }
  1706. var kx int
  1707. if incX < 0 {
  1708. kx = -(n - 1) * incX
  1709. }
  1710. nonUnit := d == blas.NonUnit
  1711. var offset int // Offset is the index of (i,i)
  1712. if tA == blas.NoTrans {
  1713. if ul == blas.Upper {
  1714. offset = n*(n+1)/2 - 1
  1715. if incX == 1 {
  1716. for i := n - 1; i >= 0; i-- {
  1717. atmp := ap[offset+1 : offset+n-i]
  1718. xtmp := x[i+1:]
  1719. var sum float32
  1720. for j, v := range atmp {
  1721. sum += v * xtmp[j]
  1722. }
  1723. x[i] -= sum
  1724. if nonUnit {
  1725. x[i] /= ap[offset]
  1726. }
  1727. offset -= n - i + 1
  1728. }
  1729. return
  1730. }
  1731. ix := kx + (n-1)*incX
  1732. for i := n - 1; i >= 0; i-- {
  1733. atmp := ap[offset+1 : offset+n-i]
  1734. jx := kx + (i+1)*incX
  1735. var sum float32
  1736. for _, v := range atmp {
  1737. sum += v * x[jx]
  1738. jx += incX
  1739. }
  1740. x[ix] -= sum
  1741. if nonUnit {
  1742. x[ix] /= ap[offset]
  1743. }
  1744. ix -= incX
  1745. offset -= n - i + 1
  1746. }
  1747. return
  1748. }
  1749. if incX == 1 {
  1750. for i := 0; i < n; i++ {
  1751. atmp := ap[offset-i : offset]
  1752. var sum float32
  1753. for j, v := range atmp {
  1754. sum += v * x[j]
  1755. }
  1756. x[i] -= sum
  1757. if nonUnit {
  1758. x[i] /= ap[offset]
  1759. }
  1760. offset += i + 2
  1761. }
  1762. return
  1763. }
  1764. ix := kx
  1765. for i := 0; i < n; i++ {
  1766. jx := kx
  1767. atmp := ap[offset-i : offset]
  1768. var sum float32
  1769. for _, v := range atmp {
  1770. sum += v * x[jx]
  1771. jx += incX
  1772. }
  1773. x[ix] -= sum
  1774. if nonUnit {
  1775. x[ix] /= ap[offset]
  1776. }
  1777. ix += incX
  1778. offset += i + 2
  1779. }
  1780. return
  1781. }
  1782. // Cases where ap is transposed.
  1783. if ul == blas.Upper {
  1784. if incX == 1 {
  1785. for i := 0; i < n; i++ {
  1786. if nonUnit {
  1787. x[i] /= ap[offset]
  1788. }
  1789. xi := x[i]
  1790. atmp := ap[offset+1 : offset+n-i]
  1791. xtmp := x[i+1:]
  1792. for j, v := range atmp {
  1793. xtmp[j] -= v * xi
  1794. }
  1795. offset += n - i
  1796. }
  1797. return
  1798. }
  1799. ix := kx
  1800. for i := 0; i < n; i++ {
  1801. if nonUnit {
  1802. x[ix] /= ap[offset]
  1803. }
  1804. xix := x[ix]
  1805. atmp := ap[offset+1 : offset+n-i]
  1806. jx := kx + (i+1)*incX
  1807. for _, v := range atmp {
  1808. x[jx] -= v * xix
  1809. jx += incX
  1810. }
  1811. ix += incX
  1812. offset += n - i
  1813. }
  1814. return
  1815. }
  1816. if incX == 1 {
  1817. offset = n*(n+1)/2 - 1
  1818. for i := n - 1; i >= 0; i-- {
  1819. if nonUnit {
  1820. x[i] /= ap[offset]
  1821. }
  1822. xi := x[i]
  1823. atmp := ap[offset-i : offset]
  1824. for j, v := range atmp {
  1825. x[j] -= v * xi
  1826. }
  1827. offset -= i + 1
  1828. }
  1829. return
  1830. }
  1831. ix := kx + (n-1)*incX
  1832. offset = n*(n+1)/2 - 1
  1833. for i := n - 1; i >= 0; i-- {
  1834. if nonUnit {
  1835. x[ix] /= ap[offset]
  1836. }
  1837. xix := x[ix]
  1838. atmp := ap[offset-i : offset]
  1839. jx := kx
  1840. for _, v := range atmp {
  1841. x[jx] -= v * xix
  1842. jx += incX
  1843. }
  1844. ix -= incX
  1845. offset -= i + 1
  1846. }
  1847. }
  1848. // Sspmv performs the matrix-vector operation
  1849. // y = alpha * A * x + beta * y
  1850. // where A is an n×n symmetric matrix in packed format, x and y are vectors,
  1851. // and alpha and beta are scalars.
  1852. //
  1853. // Float32 implementations are autogenerated and not directly tested.
  1854. func (Implementation) Sspmv(ul blas.Uplo, n int, alpha float32, ap []float32, x []float32, incX int, beta float32, y []float32, incY int) {
  1855. if ul != blas.Lower && ul != blas.Upper {
  1856. panic(badUplo)
  1857. }
  1858. if n < 0 {
  1859. panic(nLT0)
  1860. }
  1861. if incX == 0 {
  1862. panic(zeroIncX)
  1863. }
  1864. if incY == 0 {
  1865. panic(zeroIncY)
  1866. }
  1867. // Quick return if possible.
  1868. if n == 0 {
  1869. return
  1870. }
  1871. // For zero matrix size the following slice length checks are trivially satisfied.
  1872. if len(ap) < n*(n+1)/2 {
  1873. panic(shortAP)
  1874. }
  1875. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1876. panic(shortX)
  1877. }
  1878. if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
  1879. panic(shortY)
  1880. }
  1881. // Quick return if possible.
  1882. if alpha == 0 && beta == 1 {
  1883. return
  1884. }
  1885. // Set up start points
  1886. var kx, ky int
  1887. if incX < 0 {
  1888. kx = -(n - 1) * incX
  1889. }
  1890. if incY < 0 {
  1891. ky = -(n - 1) * incY
  1892. }
  1893. // Form y = beta * y.
  1894. if beta != 1 {
  1895. if incY == 1 {
  1896. if beta == 0 {
  1897. for i := range y[:n] {
  1898. y[i] = 0
  1899. }
  1900. } else {
  1901. f32.ScalUnitary(beta, y[:n])
  1902. }
  1903. } else {
  1904. iy := ky
  1905. if beta == 0 {
  1906. for i := 0; i < n; i++ {
  1907. y[iy] = 0
  1908. iy += incY
  1909. }
  1910. } else {
  1911. if incY > 0 {
  1912. f32.ScalInc(beta, y, uintptr(n), uintptr(incY))
  1913. } else {
  1914. f32.ScalInc(beta, y, uintptr(n), uintptr(-incY))
  1915. }
  1916. }
  1917. }
  1918. }
  1919. if alpha == 0 {
  1920. return
  1921. }
  1922. if n == 1 {
  1923. y[0] += alpha * ap[0] * x[0]
  1924. return
  1925. }
  1926. var offset int // Offset is the index of (i,i).
  1927. if ul == blas.Upper {
  1928. if incX == 1 {
  1929. iy := ky
  1930. for i := 0; i < n; i++ {
  1931. xv := x[i] * alpha
  1932. sum := ap[offset] * x[i]
  1933. atmp := ap[offset+1 : offset+n-i]
  1934. xtmp := x[i+1:]
  1935. jy := ky + (i+1)*incY
  1936. for j, v := range atmp {
  1937. sum += v * xtmp[j]
  1938. y[jy] += v * xv
  1939. jy += incY
  1940. }
  1941. y[iy] += alpha * sum
  1942. iy += incY
  1943. offset += n - i
  1944. }
  1945. return
  1946. }
  1947. ix := kx
  1948. iy := ky
  1949. for i := 0; i < n; i++ {
  1950. xv := x[ix] * alpha
  1951. sum := ap[offset] * x[ix]
  1952. atmp := ap[offset+1 : offset+n-i]
  1953. jx := kx + (i+1)*incX
  1954. jy := ky + (i+1)*incY
  1955. for _, v := range atmp {
  1956. sum += v * x[jx]
  1957. y[jy] += v * xv
  1958. jx += incX
  1959. jy += incY
  1960. }
  1961. y[iy] += alpha * sum
  1962. ix += incX
  1963. iy += incY
  1964. offset += n - i
  1965. }
  1966. return
  1967. }
  1968. if incX == 1 {
  1969. iy := ky
  1970. for i := 0; i < n; i++ {
  1971. xv := x[i] * alpha
  1972. atmp := ap[offset-i : offset]
  1973. jy := ky
  1974. var sum float32
  1975. for j, v := range atmp {
  1976. sum += v * x[j]
  1977. y[jy] += v * xv
  1978. jy += incY
  1979. }
  1980. sum += ap[offset] * x[i]
  1981. y[iy] += alpha * sum
  1982. iy += incY
  1983. offset += i + 2
  1984. }
  1985. return
  1986. }
  1987. ix := kx
  1988. iy := ky
  1989. for i := 0; i < n; i++ {
  1990. xv := x[ix] * alpha
  1991. atmp := ap[offset-i : offset]
  1992. jx := kx
  1993. jy := ky
  1994. var sum float32
  1995. for _, v := range atmp {
  1996. sum += v * x[jx]
  1997. y[jy] += v * xv
  1998. jx += incX
  1999. jy += incY
  2000. }
  2001. sum += ap[offset] * x[ix]
  2002. y[iy] += alpha * sum
  2003. ix += incX
  2004. iy += incY
  2005. offset += i + 2
  2006. }
  2007. }
  2008. // Sspr performs the symmetric rank-one operation
  2009. // A += alpha * x * xᵀ
  2010. // where A is an n×n symmetric matrix in packed format, x is a vector, and
  2011. // alpha is a scalar.
  2012. //
  2013. // Float32 implementations are autogenerated and not directly tested.
  2014. func (Implementation) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, ap []float32) {
  2015. if ul != blas.Lower && ul != blas.Upper {
  2016. panic(badUplo)
  2017. }
  2018. if n < 0 {
  2019. panic(nLT0)
  2020. }
  2021. if incX == 0 {
  2022. panic(zeroIncX)
  2023. }
  2024. // Quick return if possible.
  2025. if n == 0 {
  2026. return
  2027. }
  2028. // For zero matrix size the following slice length checks are trivially satisfied.
  2029. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  2030. panic(shortX)
  2031. }
  2032. if len(ap) < n*(n+1)/2 {
  2033. panic(shortAP)
  2034. }
  2035. // Quick return if possible.
  2036. if alpha == 0 {
  2037. return
  2038. }
  2039. lenX := n
  2040. var kx int
  2041. if incX < 0 {
  2042. kx = -(lenX - 1) * incX
  2043. }
  2044. var offset int // Offset is the index of (i,i).
  2045. if ul == blas.Upper {
  2046. if incX == 1 {
  2047. for i := 0; i < n; i++ {
  2048. atmp := ap[offset:]
  2049. xv := alpha * x[i]
  2050. xtmp := x[i:n]
  2051. for j, v := range xtmp {
  2052. atmp[j] += xv * v
  2053. }
  2054. offset += n - i
  2055. }
  2056. return
  2057. }
  2058. ix := kx
  2059. for i := 0; i < n; i++ {
  2060. jx := kx + i*incX
  2061. atmp := ap[offset:]
  2062. xv := alpha * x[ix]
  2063. for j := 0; j < n-i; j++ {
  2064. atmp[j] += xv * x[jx]
  2065. jx += incX
  2066. }
  2067. ix += incX
  2068. offset += n - i
  2069. }
  2070. return
  2071. }
  2072. if incX == 1 {
  2073. for i := 0; i < n; i++ {
  2074. atmp := ap[offset-i:]
  2075. xv := alpha * x[i]
  2076. xtmp := x[:i+1]
  2077. for j, v := range xtmp {
  2078. atmp[j] += xv * v
  2079. }
  2080. offset += i + 2
  2081. }
  2082. return
  2083. }
  2084. ix := kx
  2085. for i := 0; i < n; i++ {
  2086. jx := kx
  2087. atmp := ap[offset-i:]
  2088. xv := alpha * x[ix]
  2089. for j := 0; j <= i; j++ {
  2090. atmp[j] += xv * x[jx]
  2091. jx += incX
  2092. }
  2093. ix += incX
  2094. offset += i + 2
  2095. }
  2096. }
  2097. // Sspr2 performs the symmetric rank-2 update
  2098. // A += alpha * x * yᵀ + alpha * y * xᵀ
  2099. // where A is an n×n symmetric matrix in packed format, x and y are vectors,
  2100. // and alpha is a scalar.
  2101. //
  2102. // Float32 implementations are autogenerated and not directly tested.
  2103. func (Implementation) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, ap []float32) {
  2104. if ul != blas.Lower && ul != blas.Upper {
  2105. panic(badUplo)
  2106. }
  2107. if n < 0 {
  2108. panic(nLT0)
  2109. }
  2110. if incX == 0 {
  2111. panic(zeroIncX)
  2112. }
  2113. if incY == 0 {
  2114. panic(zeroIncY)
  2115. }
  2116. // Quick return if possible.
  2117. if n == 0 {
  2118. return
  2119. }
  2120. // For zero matrix size the following slice length checks are trivially satisfied.
  2121. if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  2122. panic(shortX)
  2123. }
  2124. if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
  2125. panic(shortY)
  2126. }
  2127. if len(ap) < n*(n+1)/2 {
  2128. panic(shortAP)
  2129. }
  2130. // Quick return if possible.
  2131. if alpha == 0 {
  2132. return
  2133. }
  2134. var ky, kx int
  2135. if incY < 0 {
  2136. ky = -(n - 1) * incY
  2137. }
  2138. if incX < 0 {
  2139. kx = -(n - 1) * incX
  2140. }
  2141. var offset int // Offset is the index of (i,i).
  2142. if ul == blas.Upper {
  2143. if incX == 1 && incY == 1 {
  2144. for i := 0; i < n; i++ {
  2145. atmp := ap[offset:]
  2146. xi := x[i]
  2147. yi := y[i]
  2148. xtmp := x[i:n]
  2149. ytmp := y[i:n]
  2150. for j, v := range xtmp {
  2151. atmp[j] += alpha * (xi*ytmp[j] + v*yi)
  2152. }
  2153. offset += n - i
  2154. }
  2155. return
  2156. }
  2157. ix := kx
  2158. iy := ky
  2159. for i := 0; i < n; i++ {
  2160. jx := kx + i*incX
  2161. jy := ky + i*incY
  2162. atmp := ap[offset:]
  2163. xi := x[ix]
  2164. yi := y[iy]
  2165. for j := 0; j < n-i; j++ {
  2166. atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
  2167. jx += incX
  2168. jy += incY
  2169. }
  2170. ix += incX
  2171. iy += incY
  2172. offset += n - i
  2173. }
  2174. return
  2175. }
  2176. if incX == 1 && incY == 1 {
  2177. for i := 0; i < n; i++ {
  2178. atmp := ap[offset-i:]
  2179. xi := x[i]
  2180. yi := y[i]
  2181. xtmp := x[:i+1]
  2182. for j, v := range xtmp {
  2183. atmp[j] += alpha * (xi*y[j] + v*yi)
  2184. }
  2185. offset += i + 2
  2186. }
  2187. return
  2188. }
  2189. ix := kx
  2190. iy := ky
  2191. for i := 0; i < n; i++ {
  2192. jx := kx
  2193. jy := ky
  2194. atmp := ap[offset-i:]
  2195. for j := 0; j <= i; j++ {
  2196. atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy])
  2197. jx += incX
  2198. jy += incY
  2199. }
  2200. ix += incX
  2201. iy += incY
  2202. offset += i + 2
  2203. }
  2204. }