| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877 |
- // Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
- // Copyright ©2014 The Gonum Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package gonum
- import (
- "gonum.org/v1/gonum/blas"
- "gonum.org/v1/gonum/internal/asm/f32"
- )
- var _ blas.Float32Level3 = Implementation{}
- // Strsm solves one of the matrix equations
- // A * X = alpha * B if tA == blas.NoTrans and side == blas.Left
- // Aᵀ * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
- // X * A = alpha * B if tA == blas.NoTrans and side == blas.Right
- // X * Aᵀ = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
- // where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a
- // scalar.
- //
- // At entry to the function, X contains the values of B, and the result is
- // stored in-place into X.
- //
- // No check is made that A is invertible.
- //
- // Float32 implementations are autogenerated and not directly tested.
- func (Implementation) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) {
- if s != blas.Left && s != blas.Right {
- panic(badSide)
- }
- if ul != blas.Lower && ul != blas.Upper {
- panic(badUplo)
- }
- if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
- panic(badTranspose)
- }
- if d != blas.NonUnit && d != blas.Unit {
- panic(badDiag)
- }
- if m < 0 {
- panic(mLT0)
- }
- if n < 0 {
- panic(nLT0)
- }
- k := n
- if s == blas.Left {
- k = m
- }
- if lda < max(1, k) {
- panic(badLdA)
- }
- if ldb < max(1, n) {
- panic(badLdB)
- }
- // Quick return if possible.
- if m == 0 || n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < lda*(k-1)+k {
- panic(shortA)
- }
- if len(b) < ldb*(m-1)+n {
- panic(shortB)
- }
- if alpha == 0 {
- for i := 0; i < m; i++ {
- btmp := b[i*ldb : i*ldb+n]
- for j := range btmp {
- btmp[j] = 0
- }
- }
- return
- }
- nonUnit := d == blas.NonUnit
- if s == blas.Left {
- if tA == blas.NoTrans {
- if ul == blas.Upper {
- for i := m - 1; i >= 0; i-- {
- btmp := b[i*ldb : i*ldb+n]
- if alpha != 1 {
- f32.ScalUnitary(alpha, btmp)
- }
- for ka, va := range a[i*lda+i+1 : i*lda+m] {
- if va != 0 {
- k := ka + i + 1
- f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp)
- }
- }
- if nonUnit {
- tmp := 1 / a[i*lda+i]
- f32.ScalUnitary(tmp, btmp)
- }
- }
- return
- }
- for i := 0; i < m; i++ {
- btmp := b[i*ldb : i*ldb+n]
- if alpha != 1 {
- f32.ScalUnitary(alpha, btmp)
- }
- for k, va := range a[i*lda : i*lda+i] {
- if va != 0 {
- f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp)
- }
- }
- if nonUnit {
- tmp := 1 / a[i*lda+i]
- f32.ScalUnitary(tmp, btmp)
- }
- }
- return
- }
- // Cases where a is transposed
- if ul == blas.Upper {
- for k := 0; k < m; k++ {
- btmpk := b[k*ldb : k*ldb+n]
- if nonUnit {
- tmp := 1 / a[k*lda+k]
- f32.ScalUnitary(tmp, btmpk)
- }
- for ia, va := range a[k*lda+k+1 : k*lda+m] {
- if va != 0 {
- i := ia + k + 1
- f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n])
- }
- }
- if alpha != 1 {
- f32.ScalUnitary(alpha, btmpk)
- }
- }
- return
- }
- for k := m - 1; k >= 0; k-- {
- btmpk := b[k*ldb : k*ldb+n]
- if nonUnit {
- tmp := 1 / a[k*lda+k]
- f32.ScalUnitary(tmp, btmpk)
- }
- for i, va := range a[k*lda : k*lda+k] {
- if va != 0 {
- f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n])
- }
- }
- if alpha != 1 {
- f32.ScalUnitary(alpha, btmpk)
- }
- }
- return
- }
- // Cases where a is to the right of X.
- if tA == blas.NoTrans {
- if ul == blas.Upper {
- for i := 0; i < m; i++ {
- btmp := b[i*ldb : i*ldb+n]
- if alpha != 1 {
- f32.ScalUnitary(alpha, btmp)
- }
- for k, vb := range btmp {
- if vb == 0 {
- continue
- }
- if nonUnit {
- btmp[k] /= a[k*lda+k]
- }
- f32.AxpyUnitary(-btmp[k], a[k*lda+k+1:k*lda+n], btmp[k+1:n])
- }
- }
- return
- }
- for i := 0; i < m; i++ {
- btmp := b[i*ldb : i*ldb+n]
- if alpha != 1 {
- f32.ScalUnitary(alpha, btmp)
- }
- for k := n - 1; k >= 0; k-- {
- if btmp[k] == 0 {
- continue
- }
- if nonUnit {
- btmp[k] /= a[k*lda+k]
- }
- f32.AxpyUnitary(-btmp[k], a[k*lda:k*lda+k], btmp[:k])
- }
- }
- return
- }
- // Cases where a is transposed.
- if ul == blas.Upper {
- for i := 0; i < m; i++ {
- btmp := b[i*ldb : i*ldb+n]
- for j := n - 1; j >= 0; j-- {
- tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:])
- if nonUnit {
- tmp /= a[j*lda+j]
- }
- btmp[j] = tmp
- }
- }
- return
- }
- for i := 0; i < m; i++ {
- btmp := b[i*ldb : i*ldb+n]
- for j := 0; j < n; j++ {
- tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
- if nonUnit {
- tmp /= a[j*lda+j]
- }
- btmp[j] = tmp
- }
- }
- }
- // Ssymm performs one of the matrix-matrix operations
- // C = alpha * A * B + beta * C if side == blas.Left
- // C = alpha * B * A + beta * C if side == blas.Right
- // where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha
- // is a scalar.
- //
- // Float32 implementations are autogenerated and not directly tested.
- func (Implementation) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
- if s != blas.Right && s != blas.Left {
- panic(badSide)
- }
- if ul != blas.Lower && ul != blas.Upper {
- panic(badUplo)
- }
- if m < 0 {
- panic(mLT0)
- }
- if n < 0 {
- panic(nLT0)
- }
- k := n
- if s == blas.Left {
- k = m
- }
- if lda < max(1, k) {
- panic(badLdA)
- }
- if ldb < max(1, n) {
- panic(badLdB)
- }
- if ldc < max(1, n) {
- panic(badLdC)
- }
- // Quick return if possible.
- if m == 0 || n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < lda*(k-1)+k {
- panic(shortA)
- }
- if len(b) < ldb*(m-1)+n {
- panic(shortB)
- }
- if len(c) < ldc*(m-1)+n {
- panic(shortC)
- }
- // Quick return if possible.
- if alpha == 0 && beta == 1 {
- return
- }
- if alpha == 0 {
- if beta == 0 {
- for i := 0; i < m; i++ {
- ctmp := c[i*ldc : i*ldc+n]
- for j := range ctmp {
- ctmp[j] = 0
- }
- }
- return
- }
- for i := 0; i < m; i++ {
- ctmp := c[i*ldc : i*ldc+n]
- for j := 0; j < n; j++ {
- ctmp[j] *= beta
- }
- }
- return
- }
- isUpper := ul == blas.Upper
- if s == blas.Left {
- for i := 0; i < m; i++ {
- atmp := alpha * a[i*lda+i]
- btmp := b[i*ldb : i*ldb+n]
- ctmp := c[i*ldc : i*ldc+n]
- for j, v := range btmp {
- ctmp[j] *= beta
- ctmp[j] += atmp * v
- }
- for k := 0; k < i; k++ {
- var atmp float32
- if isUpper {
- atmp = a[k*lda+i]
- } else {
- atmp = a[i*lda+k]
- }
- atmp *= alpha
- f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp)
- }
- for k := i + 1; k < m; k++ {
- var atmp float32
- if isUpper {
- atmp = a[i*lda+k]
- } else {
- atmp = a[k*lda+i]
- }
- atmp *= alpha
- f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp)
- }
- }
- return
- }
- if isUpper {
- for i := 0; i < m; i++ {
- for j := n - 1; j >= 0; j-- {
- tmp := alpha * b[i*ldb+j]
- var tmp2 float32
- atmp := a[j*lda+j+1 : j*lda+n]
- btmp := b[i*ldb+j+1 : i*ldb+n]
- ctmp := c[i*ldc+j+1 : i*ldc+n]
- for k, v := range atmp {
- ctmp[k] += tmp * v
- tmp2 += btmp[k] * v
- }
- c[i*ldc+j] *= beta
- c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
- }
- }
- return
- }
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- tmp := alpha * b[i*ldb+j]
- var tmp2 float32
- atmp := a[j*lda : j*lda+j]
- btmp := b[i*ldb : i*ldb+j]
- ctmp := c[i*ldc : i*ldc+j]
- for k, v := range atmp {
- ctmp[k] += tmp * v
- tmp2 += btmp[k] * v
- }
- c[i*ldc+j] *= beta
- c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
- }
- }
- }
- // Ssyrk performs one of the symmetric rank-k operations
- // C = alpha * A * Aᵀ + beta * C if tA == blas.NoTrans
- // C = alpha * Aᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans
- // where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and
- // beta are scalars.
- //
- // Float32 implementations are autogenerated and not directly tested.
- func (Implementation) Ssyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) {
- if ul != blas.Lower && ul != blas.Upper {
- panic(badUplo)
- }
- if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
- panic(badTranspose)
- }
- if n < 0 {
- panic(nLT0)
- }
- if k < 0 {
- panic(kLT0)
- }
- row, col := k, n
- if tA == blas.NoTrans {
- row, col = n, k
- }
- if lda < max(1, col) {
- panic(badLdA)
- }
- if ldc < max(1, n) {
- panic(badLdC)
- }
- // Quick return if possible.
- if n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < lda*(row-1)+col {
- panic(shortA)
- }
- if len(c) < ldc*(n-1)+n {
- panic(shortC)
- }
- if alpha == 0 {
- if beta == 0 {
- if ul == blas.Upper {
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc+i : i*ldc+n]
- for j := range ctmp {
- ctmp[j] = 0
- }
- }
- return
- }
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc : i*ldc+i+1]
- for j := range ctmp {
- ctmp[j] = 0
- }
- }
- return
- }
- if ul == blas.Upper {
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc+i : i*ldc+n]
- for j := range ctmp {
- ctmp[j] *= beta
- }
- }
- return
- }
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc : i*ldc+i+1]
- for j := range ctmp {
- ctmp[j] *= beta
- }
- }
- return
- }
- if tA == blas.NoTrans {
- if ul == blas.Upper {
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc+i : i*ldc+n]
- atmp := a[i*lda : i*lda+k]
- if beta == 0 {
- for jc := range ctmp {
- j := jc + i
- ctmp[jc] = alpha * f32.DotUnitary(atmp, a[j*lda:j*lda+k])
- }
- } else {
- for jc, vc := range ctmp {
- j := jc + i
- ctmp[jc] = vc*beta + alpha*f32.DotUnitary(atmp, a[j*lda:j*lda+k])
- }
- }
- }
- return
- }
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc : i*ldc+i+1]
- atmp := a[i*lda : i*lda+k]
- if beta == 0 {
- for j := range ctmp {
- ctmp[j] = alpha * f32.DotUnitary(a[j*lda:j*lda+k], atmp)
- }
- } else {
- for j, vc := range ctmp {
- ctmp[j] = vc*beta + alpha*f32.DotUnitary(a[j*lda:j*lda+k], atmp)
- }
- }
- }
- return
- }
- // Cases where a is transposed.
- if ul == blas.Upper {
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc+i : i*ldc+n]
- if beta == 0 {
- for j := range ctmp {
- ctmp[j] = 0
- }
- } else if beta != 1 {
- for j := range ctmp {
- ctmp[j] *= beta
- }
- }
- for l := 0; l < k; l++ {
- tmp := alpha * a[l*lda+i]
- if tmp != 0 {
- f32.AxpyUnitary(tmp, a[l*lda+i:l*lda+n], ctmp)
- }
- }
- }
- return
- }
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc : i*ldc+i+1]
- if beta != 1 {
- for j := range ctmp {
- ctmp[j] *= beta
- }
- }
- for l := 0; l < k; l++ {
- tmp := alpha * a[l*lda+i]
- if tmp != 0 {
- f32.AxpyUnitary(tmp, a[l*lda:l*lda+i+1], ctmp)
- }
- }
- }
- }
- // Ssyr2k performs one of the symmetric rank 2k operations
- // C = alpha * A * Bᵀ + alpha * B * Aᵀ + beta * C if tA == blas.NoTrans
- // C = alpha * Aᵀ * B + alpha * Bᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans
- // where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and
- // alpha and beta are scalars.
- //
- // Float32 implementations are autogenerated and not directly tested.
- func (Implementation) Ssyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
- if ul != blas.Lower && ul != blas.Upper {
- panic(badUplo)
- }
- if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
- panic(badTranspose)
- }
- if n < 0 {
- panic(nLT0)
- }
- if k < 0 {
- panic(kLT0)
- }
- row, col := k, n
- if tA == blas.NoTrans {
- row, col = n, k
- }
- if lda < max(1, col) {
- panic(badLdA)
- }
- if ldb < max(1, col) {
- panic(badLdB)
- }
- if ldc < max(1, n) {
- panic(badLdC)
- }
- // Quick return if possible.
- if n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < lda*(row-1)+col {
- panic(shortA)
- }
- if len(b) < ldb*(row-1)+col {
- panic(shortB)
- }
- if len(c) < ldc*(n-1)+n {
- panic(shortC)
- }
- if alpha == 0 {
- if beta == 0 {
- if ul == blas.Upper {
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc+i : i*ldc+n]
- for j := range ctmp {
- ctmp[j] = 0
- }
- }
- return
- }
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc : i*ldc+i+1]
- for j := range ctmp {
- ctmp[j] = 0
- }
- }
- return
- }
- if ul == blas.Upper {
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc+i : i*ldc+n]
- for j := range ctmp {
- ctmp[j] *= beta
- }
- }
- return
- }
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc : i*ldc+i+1]
- for j := range ctmp {
- ctmp[j] *= beta
- }
- }
- return
- }
- if tA == blas.NoTrans {
- if ul == blas.Upper {
- for i := 0; i < n; i++ {
- atmp := a[i*lda : i*lda+k]
- btmp := b[i*ldb : i*ldb+k]
- ctmp := c[i*ldc+i : i*ldc+n]
- for jc := range ctmp {
- j := i + jc
- var tmp1, tmp2 float32
- binner := b[j*ldb : j*ldb+k]
- for l, v := range a[j*lda : j*lda+k] {
- tmp1 += v * btmp[l]
- tmp2 += atmp[l] * binner[l]
- }
- ctmp[jc] *= beta
- ctmp[jc] += alpha * (tmp1 + tmp2)
- }
- }
- return
- }
- for i := 0; i < n; i++ {
- atmp := a[i*lda : i*lda+k]
- btmp := b[i*ldb : i*ldb+k]
- ctmp := c[i*ldc : i*ldc+i+1]
- for j := 0; j <= i; j++ {
- var tmp1, tmp2 float32
- binner := b[j*ldb : j*ldb+k]
- for l, v := range a[j*lda : j*lda+k] {
- tmp1 += v * btmp[l]
- tmp2 += atmp[l] * binner[l]
- }
- ctmp[j] *= beta
- ctmp[j] += alpha * (tmp1 + tmp2)
- }
- }
- return
- }
- if ul == blas.Upper {
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc+i : i*ldc+n]
- if beta != 1 {
- for j := range ctmp {
- ctmp[j] *= beta
- }
- }
- for l := 0; l < k; l++ {
- tmp1 := alpha * b[l*ldb+i]
- tmp2 := alpha * a[l*lda+i]
- btmp := b[l*ldb+i : l*ldb+n]
- if tmp1 != 0 || tmp2 != 0 {
- for j, v := range a[l*lda+i : l*lda+n] {
- ctmp[j] += v*tmp1 + btmp[j]*tmp2
- }
- }
- }
- }
- return
- }
- for i := 0; i < n; i++ {
- ctmp := c[i*ldc : i*ldc+i+1]
- if beta != 1 {
- for j := range ctmp {
- ctmp[j] *= beta
- }
- }
- for l := 0; l < k; l++ {
- tmp1 := alpha * b[l*ldb+i]
- tmp2 := alpha * a[l*lda+i]
- btmp := b[l*ldb : l*ldb+i+1]
- if tmp1 != 0 || tmp2 != 0 {
- for j, v := range a[l*lda : l*lda+i+1] {
- ctmp[j] += v*tmp1 + btmp[j]*tmp2
- }
- }
- }
- }
- }
- // Strmm performs one of the matrix-matrix operations
- // B = alpha * A * B if tA == blas.NoTrans and side == blas.Left
- // B = alpha * Aᵀ * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
- // B = alpha * B * A if tA == blas.NoTrans and side == blas.Right
- // B = alpha * B * Aᵀ if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
- // where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar.
- //
- // Float32 implementations are autogenerated and not directly tested.
- func (Implementation) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) {
- if s != blas.Left && s != blas.Right {
- panic(badSide)
- }
- if ul != blas.Lower && ul != blas.Upper {
- panic(badUplo)
- }
- if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
- panic(badTranspose)
- }
- if d != blas.NonUnit && d != blas.Unit {
- panic(badDiag)
- }
- if m < 0 {
- panic(mLT0)
- }
- if n < 0 {
- panic(nLT0)
- }
- k := n
- if s == blas.Left {
- k = m
- }
- if lda < max(1, k) {
- panic(badLdA)
- }
- if ldb < max(1, n) {
- panic(badLdB)
- }
- // Quick return if possible.
- if m == 0 || n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < lda*(k-1)+k {
- panic(shortA)
- }
- if len(b) < ldb*(m-1)+n {
- panic(shortB)
- }
- if alpha == 0 {
- for i := 0; i < m; i++ {
- btmp := b[i*ldb : i*ldb+n]
- for j := range btmp {
- btmp[j] = 0
- }
- }
- return
- }
- nonUnit := d == blas.NonUnit
- if s == blas.Left {
- if tA == blas.NoTrans {
- if ul == blas.Upper {
- for i := 0; i < m; i++ {
- tmp := alpha
- if nonUnit {
- tmp *= a[i*lda+i]
- }
- btmp := b[i*ldb : i*ldb+n]
- f32.ScalUnitary(tmp, btmp)
- for ka, va := range a[i*lda+i+1 : i*lda+m] {
- k := ka + i + 1
- if va != 0 {
- f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp)
- }
- }
- }
- return
- }
- for i := m - 1; i >= 0; i-- {
- tmp := alpha
- if nonUnit {
- tmp *= a[i*lda+i]
- }
- btmp := b[i*ldb : i*ldb+n]
- f32.ScalUnitary(tmp, btmp)
- for k, va := range a[i*lda : i*lda+i] {
- if va != 0 {
- f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp)
- }
- }
- }
- return
- }
- // Cases where a is transposed.
- if ul == blas.Upper {
- for k := m - 1; k >= 0; k-- {
- btmpk := b[k*ldb : k*ldb+n]
- for ia, va := range a[k*lda+k+1 : k*lda+m] {
- i := ia + k + 1
- btmp := b[i*ldb : i*ldb+n]
- if va != 0 {
- f32.AxpyUnitary(alpha*va, btmpk, btmp)
- }
- }
- tmp := alpha
- if nonUnit {
- tmp *= a[k*lda+k]
- }
- if tmp != 1 {
- f32.ScalUnitary(tmp, btmpk)
- }
- }
- return
- }
- for k := 0; k < m; k++ {
- btmpk := b[k*ldb : k*ldb+n]
- for i, va := range a[k*lda : k*lda+k] {
- btmp := b[i*ldb : i*ldb+n]
- if va != 0 {
- f32.AxpyUnitary(alpha*va, btmpk, btmp)
- }
- }
- tmp := alpha
- if nonUnit {
- tmp *= a[k*lda+k]
- }
- if tmp != 1 {
- f32.ScalUnitary(tmp, btmpk)
- }
- }
- return
- }
- // Cases where a is on the right
- if tA == blas.NoTrans {
- if ul == blas.Upper {
- for i := 0; i < m; i++ {
- btmp := b[i*ldb : i*ldb+n]
- for k := n - 1; k >= 0; k-- {
- tmp := alpha * btmp[k]
- if tmp == 0 {
- continue
- }
- btmp[k] = tmp
- if nonUnit {
- btmp[k] *= a[k*lda+k]
- }
- f32.AxpyUnitary(tmp, a[k*lda+k+1:k*lda+n], btmp[k+1:n])
- }
- }
- return
- }
- for i := 0; i < m; i++ {
- btmp := b[i*ldb : i*ldb+n]
- for k := 0; k < n; k++ {
- tmp := alpha * btmp[k]
- if tmp == 0 {
- continue
- }
- btmp[k] = tmp
- if nonUnit {
- btmp[k] *= a[k*lda+k]
- }
- f32.AxpyUnitary(tmp, a[k*lda:k*lda+k], btmp[:k])
- }
- }
- return
- }
- // Cases where a is transposed.
- if ul == blas.Upper {
- for i := 0; i < m; i++ {
- btmp := b[i*ldb : i*ldb+n]
- for j, vb := range btmp {
- tmp := vb
- if nonUnit {
- tmp *= a[j*lda+j]
- }
- tmp += f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n])
- btmp[j] = alpha * tmp
- }
- }
- return
- }
- for i := 0; i < m; i++ {
- btmp := b[i*ldb : i*ldb+n]
- for j := n - 1; j >= 0; j-- {
- tmp := btmp[j]
- if nonUnit {
- tmp *= a[j*lda+j]
- }
- tmp += f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
- btmp[j] = alpha * tmp
- }
- }
- }
|