curve25519_generic.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package curve25519
  5. import "encoding/binary"
  6. // This code is a port of the public domain, "ref10" implementation of
  7. // curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
  8. // fieldElement represents an element of the field GF(2^255 - 19). An element
  9. // t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
  10. // t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
  11. // context.
  12. type fieldElement [10]int32
  13. func feZero(fe *fieldElement) {
  14. for i := range fe {
  15. fe[i] = 0
  16. }
  17. }
  18. func feOne(fe *fieldElement) {
  19. feZero(fe)
  20. fe[0] = 1
  21. }
  22. func feAdd(dst, a, b *fieldElement) {
  23. for i := range dst {
  24. dst[i] = a[i] + b[i]
  25. }
  26. }
  27. func feSub(dst, a, b *fieldElement) {
  28. for i := range dst {
  29. dst[i] = a[i] - b[i]
  30. }
  31. }
  32. func feCopy(dst, src *fieldElement) {
  33. for i := range dst {
  34. dst[i] = src[i]
  35. }
  36. }
  37. // feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
  38. //
  39. // Preconditions: b in {0,1}.
  40. func feCSwap(f, g *fieldElement, b int32) {
  41. b = -b
  42. for i := range f {
  43. t := b & (f[i] ^ g[i])
  44. f[i] ^= t
  45. g[i] ^= t
  46. }
  47. }
  48. // load3 reads a 24-bit, little-endian value from in.
  49. func load3(in []byte) int64 {
  50. var r int64
  51. r = int64(in[0])
  52. r |= int64(in[1]) << 8
  53. r |= int64(in[2]) << 16
  54. return r
  55. }
  56. // load4 reads a 32-bit, little-endian value from in.
  57. func load4(in []byte) int64 {
  58. return int64(binary.LittleEndian.Uint32(in))
  59. }
  60. func feFromBytes(dst *fieldElement, src *[32]byte) {
  61. h0 := load4(src[:])
  62. h1 := load3(src[4:]) << 6
  63. h2 := load3(src[7:]) << 5
  64. h3 := load3(src[10:]) << 3
  65. h4 := load3(src[13:]) << 2
  66. h5 := load4(src[16:])
  67. h6 := load3(src[20:]) << 7
  68. h7 := load3(src[23:]) << 5
  69. h8 := load3(src[26:]) << 4
  70. h9 := (load3(src[29:]) & 0x7fffff) << 2
  71. var carry [10]int64
  72. carry[9] = (h9 + 1<<24) >> 25
  73. h0 += carry[9] * 19
  74. h9 -= carry[9] << 25
  75. carry[1] = (h1 + 1<<24) >> 25
  76. h2 += carry[1]
  77. h1 -= carry[1] << 25
  78. carry[3] = (h3 + 1<<24) >> 25
  79. h4 += carry[3]
  80. h3 -= carry[3] << 25
  81. carry[5] = (h5 + 1<<24) >> 25
  82. h6 += carry[5]
  83. h5 -= carry[5] << 25
  84. carry[7] = (h7 + 1<<24) >> 25
  85. h8 += carry[7]
  86. h7 -= carry[7] << 25
  87. carry[0] = (h0 + 1<<25) >> 26
  88. h1 += carry[0]
  89. h0 -= carry[0] << 26
  90. carry[2] = (h2 + 1<<25) >> 26
  91. h3 += carry[2]
  92. h2 -= carry[2] << 26
  93. carry[4] = (h4 + 1<<25) >> 26
  94. h5 += carry[4]
  95. h4 -= carry[4] << 26
  96. carry[6] = (h6 + 1<<25) >> 26
  97. h7 += carry[6]
  98. h6 -= carry[6] << 26
  99. carry[8] = (h8 + 1<<25) >> 26
  100. h9 += carry[8]
  101. h8 -= carry[8] << 26
  102. dst[0] = int32(h0)
  103. dst[1] = int32(h1)
  104. dst[2] = int32(h2)
  105. dst[3] = int32(h3)
  106. dst[4] = int32(h4)
  107. dst[5] = int32(h5)
  108. dst[6] = int32(h6)
  109. dst[7] = int32(h7)
  110. dst[8] = int32(h8)
  111. dst[9] = int32(h9)
  112. }
  113. // feToBytes marshals h to s.
  114. // Preconditions:
  115. // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  116. //
  117. // Write p=2^255-19; q=floor(h/p).
  118. // Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
  119. //
  120. // Proof:
  121. // Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
  122. // Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
  123. //
  124. // Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
  125. // Then 0<y<1.
  126. //
  127. // Write r=h-pq.
  128. // Have 0<=r<=p-1=2^255-20.
  129. // Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
  130. //
  131. // Write x=r+19(2^-255)r+y.
  132. // Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
  133. //
  134. // Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
  135. // so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
  136. func feToBytes(s *[32]byte, h *fieldElement) {
  137. var carry [10]int32
  138. q := (19*h[9] + (1 << 24)) >> 25
  139. q = (h[0] + q) >> 26
  140. q = (h[1] + q) >> 25
  141. q = (h[2] + q) >> 26
  142. q = (h[3] + q) >> 25
  143. q = (h[4] + q) >> 26
  144. q = (h[5] + q) >> 25
  145. q = (h[6] + q) >> 26
  146. q = (h[7] + q) >> 25
  147. q = (h[8] + q) >> 26
  148. q = (h[9] + q) >> 25
  149. // Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
  150. h[0] += 19 * q
  151. // Goal: Output h-2^255 q, which is between 0 and 2^255-20.
  152. carry[0] = h[0] >> 26
  153. h[1] += carry[0]
  154. h[0] -= carry[0] << 26
  155. carry[1] = h[1] >> 25
  156. h[2] += carry[1]
  157. h[1] -= carry[1] << 25
  158. carry[2] = h[2] >> 26
  159. h[3] += carry[2]
  160. h[2] -= carry[2] << 26
  161. carry[3] = h[3] >> 25
  162. h[4] += carry[3]
  163. h[3] -= carry[3] << 25
  164. carry[4] = h[4] >> 26
  165. h[5] += carry[4]
  166. h[4] -= carry[4] << 26
  167. carry[5] = h[5] >> 25
  168. h[6] += carry[5]
  169. h[5] -= carry[5] << 25
  170. carry[6] = h[6] >> 26
  171. h[7] += carry[6]
  172. h[6] -= carry[6] << 26
  173. carry[7] = h[7] >> 25
  174. h[8] += carry[7]
  175. h[7] -= carry[7] << 25
  176. carry[8] = h[8] >> 26
  177. h[9] += carry[8]
  178. h[8] -= carry[8] << 26
  179. carry[9] = h[9] >> 25
  180. h[9] -= carry[9] << 25
  181. // h10 = carry9
  182. // Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
  183. // Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
  184. // evidently 2^255 h10-2^255 q = 0.
  185. // Goal: Output h[0]+...+2^230 h[9].
  186. s[0] = byte(h[0] >> 0)
  187. s[1] = byte(h[0] >> 8)
  188. s[2] = byte(h[0] >> 16)
  189. s[3] = byte((h[0] >> 24) | (h[1] << 2))
  190. s[4] = byte(h[1] >> 6)
  191. s[5] = byte(h[1] >> 14)
  192. s[6] = byte((h[1] >> 22) | (h[2] << 3))
  193. s[7] = byte(h[2] >> 5)
  194. s[8] = byte(h[2] >> 13)
  195. s[9] = byte((h[2] >> 21) | (h[3] << 5))
  196. s[10] = byte(h[3] >> 3)
  197. s[11] = byte(h[3] >> 11)
  198. s[12] = byte((h[3] >> 19) | (h[4] << 6))
  199. s[13] = byte(h[4] >> 2)
  200. s[14] = byte(h[4] >> 10)
  201. s[15] = byte(h[4] >> 18)
  202. s[16] = byte(h[5] >> 0)
  203. s[17] = byte(h[5] >> 8)
  204. s[18] = byte(h[5] >> 16)
  205. s[19] = byte((h[5] >> 24) | (h[6] << 1))
  206. s[20] = byte(h[6] >> 7)
  207. s[21] = byte(h[6] >> 15)
  208. s[22] = byte((h[6] >> 23) | (h[7] << 3))
  209. s[23] = byte(h[7] >> 5)
  210. s[24] = byte(h[7] >> 13)
  211. s[25] = byte((h[7] >> 21) | (h[8] << 4))
  212. s[26] = byte(h[8] >> 4)
  213. s[27] = byte(h[8] >> 12)
  214. s[28] = byte((h[8] >> 20) | (h[9] << 6))
  215. s[29] = byte(h[9] >> 2)
  216. s[30] = byte(h[9] >> 10)
  217. s[31] = byte(h[9] >> 18)
  218. }
  219. // feMul calculates h = f * g
  220. // Can overlap h with f or g.
  221. //
  222. // Preconditions:
  223. // |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  224. // |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  225. //
  226. // Postconditions:
  227. // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  228. //
  229. // Notes on implementation strategy:
  230. //
  231. // Using schoolbook multiplication.
  232. // Karatsuba would save a little in some cost models.
  233. //
  234. // Most multiplications by 2 and 19 are 32-bit precomputations;
  235. // cheaper than 64-bit postcomputations.
  236. //
  237. // There is one remaining multiplication by 19 in the carry chain;
  238. // one *19 precomputation can be merged into this,
  239. // but the resulting data flow is considerably less clean.
  240. //
  241. // There are 12 carries below.
  242. // 10 of them are 2-way parallelizable and vectorizable.
  243. // Can get away with 11 carries, but then data flow is much deeper.
  244. //
  245. // With tighter constraints on inputs can squeeze carries into int32.
  246. func feMul(h, f, g *fieldElement) {
  247. f0 := f[0]
  248. f1 := f[1]
  249. f2 := f[2]
  250. f3 := f[3]
  251. f4 := f[4]
  252. f5 := f[5]
  253. f6 := f[6]
  254. f7 := f[7]
  255. f8 := f[8]
  256. f9 := f[9]
  257. g0 := g[0]
  258. g1 := g[1]
  259. g2 := g[2]
  260. g3 := g[3]
  261. g4 := g[4]
  262. g5 := g[5]
  263. g6 := g[6]
  264. g7 := g[7]
  265. g8 := g[8]
  266. g9 := g[9]
  267. g1_19 := 19 * g1 // 1.4*2^29
  268. g2_19 := 19 * g2 // 1.4*2^30; still ok
  269. g3_19 := 19 * g3
  270. g4_19 := 19 * g4
  271. g5_19 := 19 * g5
  272. g6_19 := 19 * g6
  273. g7_19 := 19 * g7
  274. g8_19 := 19 * g8
  275. g9_19 := 19 * g9
  276. f1_2 := 2 * f1
  277. f3_2 := 2 * f3
  278. f5_2 := 2 * f5
  279. f7_2 := 2 * f7
  280. f9_2 := 2 * f9
  281. f0g0 := int64(f0) * int64(g0)
  282. f0g1 := int64(f0) * int64(g1)
  283. f0g2 := int64(f0) * int64(g2)
  284. f0g3 := int64(f0) * int64(g3)
  285. f0g4 := int64(f0) * int64(g4)
  286. f0g5 := int64(f0) * int64(g5)
  287. f0g6 := int64(f0) * int64(g6)
  288. f0g7 := int64(f0) * int64(g7)
  289. f0g8 := int64(f0) * int64(g8)
  290. f0g9 := int64(f0) * int64(g9)
  291. f1g0 := int64(f1) * int64(g0)
  292. f1g1_2 := int64(f1_2) * int64(g1)
  293. f1g2 := int64(f1) * int64(g2)
  294. f1g3_2 := int64(f1_2) * int64(g3)
  295. f1g4 := int64(f1) * int64(g4)
  296. f1g5_2 := int64(f1_2) * int64(g5)
  297. f1g6 := int64(f1) * int64(g6)
  298. f1g7_2 := int64(f1_2) * int64(g7)
  299. f1g8 := int64(f1) * int64(g8)
  300. f1g9_38 := int64(f1_2) * int64(g9_19)
  301. f2g0 := int64(f2) * int64(g0)
  302. f2g1 := int64(f2) * int64(g1)
  303. f2g2 := int64(f2) * int64(g2)
  304. f2g3 := int64(f2) * int64(g3)
  305. f2g4 := int64(f2) * int64(g4)
  306. f2g5 := int64(f2) * int64(g5)
  307. f2g6 := int64(f2) * int64(g6)
  308. f2g7 := int64(f2) * int64(g7)
  309. f2g8_19 := int64(f2) * int64(g8_19)
  310. f2g9_19 := int64(f2) * int64(g9_19)
  311. f3g0 := int64(f3) * int64(g0)
  312. f3g1_2 := int64(f3_2) * int64(g1)
  313. f3g2 := int64(f3) * int64(g2)
  314. f3g3_2 := int64(f3_2) * int64(g3)
  315. f3g4 := int64(f3) * int64(g4)
  316. f3g5_2 := int64(f3_2) * int64(g5)
  317. f3g6 := int64(f3) * int64(g6)
  318. f3g7_38 := int64(f3_2) * int64(g7_19)
  319. f3g8_19 := int64(f3) * int64(g8_19)
  320. f3g9_38 := int64(f3_2) * int64(g9_19)
  321. f4g0 := int64(f4) * int64(g0)
  322. f4g1 := int64(f4) * int64(g1)
  323. f4g2 := int64(f4) * int64(g2)
  324. f4g3 := int64(f4) * int64(g3)
  325. f4g4 := int64(f4) * int64(g4)
  326. f4g5 := int64(f4) * int64(g5)
  327. f4g6_19 := int64(f4) * int64(g6_19)
  328. f4g7_19 := int64(f4) * int64(g7_19)
  329. f4g8_19 := int64(f4) * int64(g8_19)
  330. f4g9_19 := int64(f4) * int64(g9_19)
  331. f5g0 := int64(f5) * int64(g0)
  332. f5g1_2 := int64(f5_2) * int64(g1)
  333. f5g2 := int64(f5) * int64(g2)
  334. f5g3_2 := int64(f5_2) * int64(g3)
  335. f5g4 := int64(f5) * int64(g4)
  336. f5g5_38 := int64(f5_2) * int64(g5_19)
  337. f5g6_19 := int64(f5) * int64(g6_19)
  338. f5g7_38 := int64(f5_2) * int64(g7_19)
  339. f5g8_19 := int64(f5) * int64(g8_19)
  340. f5g9_38 := int64(f5_2) * int64(g9_19)
  341. f6g0 := int64(f6) * int64(g0)
  342. f6g1 := int64(f6) * int64(g1)
  343. f6g2 := int64(f6) * int64(g2)
  344. f6g3 := int64(f6) * int64(g3)
  345. f6g4_19 := int64(f6) * int64(g4_19)
  346. f6g5_19 := int64(f6) * int64(g5_19)
  347. f6g6_19 := int64(f6) * int64(g6_19)
  348. f6g7_19 := int64(f6) * int64(g7_19)
  349. f6g8_19 := int64(f6) * int64(g8_19)
  350. f6g9_19 := int64(f6) * int64(g9_19)
  351. f7g0 := int64(f7) * int64(g0)
  352. f7g1_2 := int64(f7_2) * int64(g1)
  353. f7g2 := int64(f7) * int64(g2)
  354. f7g3_38 := int64(f7_2) * int64(g3_19)
  355. f7g4_19 := int64(f7) * int64(g4_19)
  356. f7g5_38 := int64(f7_2) * int64(g5_19)
  357. f7g6_19 := int64(f7) * int64(g6_19)
  358. f7g7_38 := int64(f7_2) * int64(g7_19)
  359. f7g8_19 := int64(f7) * int64(g8_19)
  360. f7g9_38 := int64(f7_2) * int64(g9_19)
  361. f8g0 := int64(f8) * int64(g0)
  362. f8g1 := int64(f8) * int64(g1)
  363. f8g2_19 := int64(f8) * int64(g2_19)
  364. f8g3_19 := int64(f8) * int64(g3_19)
  365. f8g4_19 := int64(f8) * int64(g4_19)
  366. f8g5_19 := int64(f8) * int64(g5_19)
  367. f8g6_19 := int64(f8) * int64(g6_19)
  368. f8g7_19 := int64(f8) * int64(g7_19)
  369. f8g8_19 := int64(f8) * int64(g8_19)
  370. f8g9_19 := int64(f8) * int64(g9_19)
  371. f9g0 := int64(f9) * int64(g0)
  372. f9g1_38 := int64(f9_2) * int64(g1_19)
  373. f9g2_19 := int64(f9) * int64(g2_19)
  374. f9g3_38 := int64(f9_2) * int64(g3_19)
  375. f9g4_19 := int64(f9) * int64(g4_19)
  376. f9g5_38 := int64(f9_2) * int64(g5_19)
  377. f9g6_19 := int64(f9) * int64(g6_19)
  378. f9g7_38 := int64(f9_2) * int64(g7_19)
  379. f9g8_19 := int64(f9) * int64(g8_19)
  380. f9g9_38 := int64(f9_2) * int64(g9_19)
  381. h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
  382. h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
  383. h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
  384. h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
  385. h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
  386. h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
  387. h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
  388. h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
  389. h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
  390. h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
  391. var carry [10]int64
  392. // |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
  393. // i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
  394. // |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
  395. // i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
  396. carry[0] = (h0 + (1 << 25)) >> 26
  397. h1 += carry[0]
  398. h0 -= carry[0] << 26
  399. carry[4] = (h4 + (1 << 25)) >> 26
  400. h5 += carry[4]
  401. h4 -= carry[4] << 26
  402. // |h0| <= 2^25
  403. // |h4| <= 2^25
  404. // |h1| <= 1.51*2^58
  405. // |h5| <= 1.51*2^58
  406. carry[1] = (h1 + (1 << 24)) >> 25
  407. h2 += carry[1]
  408. h1 -= carry[1] << 25
  409. carry[5] = (h5 + (1 << 24)) >> 25
  410. h6 += carry[5]
  411. h5 -= carry[5] << 25
  412. // |h1| <= 2^24; from now on fits into int32
  413. // |h5| <= 2^24; from now on fits into int32
  414. // |h2| <= 1.21*2^59
  415. // |h6| <= 1.21*2^59
  416. carry[2] = (h2 + (1 << 25)) >> 26
  417. h3 += carry[2]
  418. h2 -= carry[2] << 26
  419. carry[6] = (h6 + (1 << 25)) >> 26
  420. h7 += carry[6]
  421. h6 -= carry[6] << 26
  422. // |h2| <= 2^25; from now on fits into int32 unchanged
  423. // |h6| <= 2^25; from now on fits into int32 unchanged
  424. // |h3| <= 1.51*2^58
  425. // |h7| <= 1.51*2^58
  426. carry[3] = (h3 + (1 << 24)) >> 25
  427. h4 += carry[3]
  428. h3 -= carry[3] << 25
  429. carry[7] = (h7 + (1 << 24)) >> 25
  430. h8 += carry[7]
  431. h7 -= carry[7] << 25
  432. // |h3| <= 2^24; from now on fits into int32 unchanged
  433. // |h7| <= 2^24; from now on fits into int32 unchanged
  434. // |h4| <= 1.52*2^33
  435. // |h8| <= 1.52*2^33
  436. carry[4] = (h4 + (1 << 25)) >> 26
  437. h5 += carry[4]
  438. h4 -= carry[4] << 26
  439. carry[8] = (h8 + (1 << 25)) >> 26
  440. h9 += carry[8]
  441. h8 -= carry[8] << 26
  442. // |h4| <= 2^25; from now on fits into int32 unchanged
  443. // |h8| <= 2^25; from now on fits into int32 unchanged
  444. // |h5| <= 1.01*2^24
  445. // |h9| <= 1.51*2^58
  446. carry[9] = (h9 + (1 << 24)) >> 25
  447. h0 += carry[9] * 19
  448. h9 -= carry[9] << 25
  449. // |h9| <= 2^24; from now on fits into int32 unchanged
  450. // |h0| <= 1.8*2^37
  451. carry[0] = (h0 + (1 << 25)) >> 26
  452. h1 += carry[0]
  453. h0 -= carry[0] << 26
  454. // |h0| <= 2^25; from now on fits into int32 unchanged
  455. // |h1| <= 1.01*2^24
  456. h[0] = int32(h0)
  457. h[1] = int32(h1)
  458. h[2] = int32(h2)
  459. h[3] = int32(h3)
  460. h[4] = int32(h4)
  461. h[5] = int32(h5)
  462. h[6] = int32(h6)
  463. h[7] = int32(h7)
  464. h[8] = int32(h8)
  465. h[9] = int32(h9)
  466. }
  467. // feSquare calculates h = f*f. Can overlap h with f.
  468. //
  469. // Preconditions:
  470. // |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  471. //
  472. // Postconditions:
  473. // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  474. func feSquare(h, f *fieldElement) {
  475. f0 := f[0]
  476. f1 := f[1]
  477. f2 := f[2]
  478. f3 := f[3]
  479. f4 := f[4]
  480. f5 := f[5]
  481. f6 := f[6]
  482. f7 := f[7]
  483. f8 := f[8]
  484. f9 := f[9]
  485. f0_2 := 2 * f0
  486. f1_2 := 2 * f1
  487. f2_2 := 2 * f2
  488. f3_2 := 2 * f3
  489. f4_2 := 2 * f4
  490. f5_2 := 2 * f5
  491. f6_2 := 2 * f6
  492. f7_2 := 2 * f7
  493. f5_38 := 38 * f5 // 1.31*2^30
  494. f6_19 := 19 * f6 // 1.31*2^30
  495. f7_38 := 38 * f7 // 1.31*2^30
  496. f8_19 := 19 * f8 // 1.31*2^30
  497. f9_38 := 38 * f9 // 1.31*2^30
  498. f0f0 := int64(f0) * int64(f0)
  499. f0f1_2 := int64(f0_2) * int64(f1)
  500. f0f2_2 := int64(f0_2) * int64(f2)
  501. f0f3_2 := int64(f0_2) * int64(f3)
  502. f0f4_2 := int64(f0_2) * int64(f4)
  503. f0f5_2 := int64(f0_2) * int64(f5)
  504. f0f6_2 := int64(f0_2) * int64(f6)
  505. f0f7_2 := int64(f0_2) * int64(f7)
  506. f0f8_2 := int64(f0_2) * int64(f8)
  507. f0f9_2 := int64(f0_2) * int64(f9)
  508. f1f1_2 := int64(f1_2) * int64(f1)
  509. f1f2_2 := int64(f1_2) * int64(f2)
  510. f1f3_4 := int64(f1_2) * int64(f3_2)
  511. f1f4_2 := int64(f1_2) * int64(f4)
  512. f1f5_4 := int64(f1_2) * int64(f5_2)
  513. f1f6_2 := int64(f1_2) * int64(f6)
  514. f1f7_4 := int64(f1_2) * int64(f7_2)
  515. f1f8_2 := int64(f1_2) * int64(f8)
  516. f1f9_76 := int64(f1_2) * int64(f9_38)
  517. f2f2 := int64(f2) * int64(f2)
  518. f2f3_2 := int64(f2_2) * int64(f3)
  519. f2f4_2 := int64(f2_2) * int64(f4)
  520. f2f5_2 := int64(f2_2) * int64(f5)
  521. f2f6_2 := int64(f2_2) * int64(f6)
  522. f2f7_2 := int64(f2_2) * int64(f7)
  523. f2f8_38 := int64(f2_2) * int64(f8_19)
  524. f2f9_38 := int64(f2) * int64(f9_38)
  525. f3f3_2 := int64(f3_2) * int64(f3)
  526. f3f4_2 := int64(f3_2) * int64(f4)
  527. f3f5_4 := int64(f3_2) * int64(f5_2)
  528. f3f6_2 := int64(f3_2) * int64(f6)
  529. f3f7_76 := int64(f3_2) * int64(f7_38)
  530. f3f8_38 := int64(f3_2) * int64(f8_19)
  531. f3f9_76 := int64(f3_2) * int64(f9_38)
  532. f4f4 := int64(f4) * int64(f4)
  533. f4f5_2 := int64(f4_2) * int64(f5)
  534. f4f6_38 := int64(f4_2) * int64(f6_19)
  535. f4f7_38 := int64(f4) * int64(f7_38)
  536. f4f8_38 := int64(f4_2) * int64(f8_19)
  537. f4f9_38 := int64(f4) * int64(f9_38)
  538. f5f5_38 := int64(f5) * int64(f5_38)
  539. f5f6_38 := int64(f5_2) * int64(f6_19)
  540. f5f7_76 := int64(f5_2) * int64(f7_38)
  541. f5f8_38 := int64(f5_2) * int64(f8_19)
  542. f5f9_76 := int64(f5_2) * int64(f9_38)
  543. f6f6_19 := int64(f6) * int64(f6_19)
  544. f6f7_38 := int64(f6) * int64(f7_38)
  545. f6f8_38 := int64(f6_2) * int64(f8_19)
  546. f6f9_38 := int64(f6) * int64(f9_38)
  547. f7f7_38 := int64(f7) * int64(f7_38)
  548. f7f8_38 := int64(f7_2) * int64(f8_19)
  549. f7f9_76 := int64(f7_2) * int64(f9_38)
  550. f8f8_19 := int64(f8) * int64(f8_19)
  551. f8f9_38 := int64(f8) * int64(f9_38)
  552. f9f9_38 := int64(f9) * int64(f9_38)
  553. h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
  554. h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
  555. h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
  556. h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
  557. h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
  558. h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
  559. h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
  560. h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
  561. h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
  562. h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
  563. var carry [10]int64
  564. carry[0] = (h0 + (1 << 25)) >> 26
  565. h1 += carry[0]
  566. h0 -= carry[0] << 26
  567. carry[4] = (h4 + (1 << 25)) >> 26
  568. h5 += carry[4]
  569. h4 -= carry[4] << 26
  570. carry[1] = (h1 + (1 << 24)) >> 25
  571. h2 += carry[1]
  572. h1 -= carry[1] << 25
  573. carry[5] = (h5 + (1 << 24)) >> 25
  574. h6 += carry[5]
  575. h5 -= carry[5] << 25
  576. carry[2] = (h2 + (1 << 25)) >> 26
  577. h3 += carry[2]
  578. h2 -= carry[2] << 26
  579. carry[6] = (h6 + (1 << 25)) >> 26
  580. h7 += carry[6]
  581. h6 -= carry[6] << 26
  582. carry[3] = (h3 + (1 << 24)) >> 25
  583. h4 += carry[3]
  584. h3 -= carry[3] << 25
  585. carry[7] = (h7 + (1 << 24)) >> 25
  586. h8 += carry[7]
  587. h7 -= carry[7] << 25
  588. carry[4] = (h4 + (1 << 25)) >> 26
  589. h5 += carry[4]
  590. h4 -= carry[4] << 26
  591. carry[8] = (h8 + (1 << 25)) >> 26
  592. h9 += carry[8]
  593. h8 -= carry[8] << 26
  594. carry[9] = (h9 + (1 << 24)) >> 25
  595. h0 += carry[9] * 19
  596. h9 -= carry[9] << 25
  597. carry[0] = (h0 + (1 << 25)) >> 26
  598. h1 += carry[0]
  599. h0 -= carry[0] << 26
  600. h[0] = int32(h0)
  601. h[1] = int32(h1)
  602. h[2] = int32(h2)
  603. h[3] = int32(h3)
  604. h[4] = int32(h4)
  605. h[5] = int32(h5)
  606. h[6] = int32(h6)
  607. h[7] = int32(h7)
  608. h[8] = int32(h8)
  609. h[9] = int32(h9)
  610. }
  611. // feMul121666 calculates h = f * 121666. Can overlap h with f.
  612. //
  613. // Preconditions:
  614. // |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  615. //
  616. // Postconditions:
  617. // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  618. func feMul121666(h, f *fieldElement) {
  619. h0 := int64(f[0]) * 121666
  620. h1 := int64(f[1]) * 121666
  621. h2 := int64(f[2]) * 121666
  622. h3 := int64(f[3]) * 121666
  623. h4 := int64(f[4]) * 121666
  624. h5 := int64(f[5]) * 121666
  625. h6 := int64(f[6]) * 121666
  626. h7 := int64(f[7]) * 121666
  627. h8 := int64(f[8]) * 121666
  628. h9 := int64(f[9]) * 121666
  629. var carry [10]int64
  630. carry[9] = (h9 + (1 << 24)) >> 25
  631. h0 += carry[9] * 19
  632. h9 -= carry[9] << 25
  633. carry[1] = (h1 + (1 << 24)) >> 25
  634. h2 += carry[1]
  635. h1 -= carry[1] << 25
  636. carry[3] = (h3 + (1 << 24)) >> 25
  637. h4 += carry[3]
  638. h3 -= carry[3] << 25
  639. carry[5] = (h5 + (1 << 24)) >> 25
  640. h6 += carry[5]
  641. h5 -= carry[5] << 25
  642. carry[7] = (h7 + (1 << 24)) >> 25
  643. h8 += carry[7]
  644. h7 -= carry[7] << 25
  645. carry[0] = (h0 + (1 << 25)) >> 26
  646. h1 += carry[0]
  647. h0 -= carry[0] << 26
  648. carry[2] = (h2 + (1 << 25)) >> 26
  649. h3 += carry[2]
  650. h2 -= carry[2] << 26
  651. carry[4] = (h4 + (1 << 25)) >> 26
  652. h5 += carry[4]
  653. h4 -= carry[4] << 26
  654. carry[6] = (h6 + (1 << 25)) >> 26
  655. h7 += carry[6]
  656. h6 -= carry[6] << 26
  657. carry[8] = (h8 + (1 << 25)) >> 26
  658. h9 += carry[8]
  659. h8 -= carry[8] << 26
  660. h[0] = int32(h0)
  661. h[1] = int32(h1)
  662. h[2] = int32(h2)
  663. h[3] = int32(h3)
  664. h[4] = int32(h4)
  665. h[5] = int32(h5)
  666. h[6] = int32(h6)
  667. h[7] = int32(h7)
  668. h[8] = int32(h8)
  669. h[9] = int32(h9)
  670. }
  671. // feInvert sets out = z^-1.
  672. func feInvert(out, z *fieldElement) {
  673. var t0, t1, t2, t3 fieldElement
  674. var i int
  675. feSquare(&t0, z)
  676. for i = 1; i < 1; i++ {
  677. feSquare(&t0, &t0)
  678. }
  679. feSquare(&t1, &t0)
  680. for i = 1; i < 2; i++ {
  681. feSquare(&t1, &t1)
  682. }
  683. feMul(&t1, z, &t1)
  684. feMul(&t0, &t0, &t1)
  685. feSquare(&t2, &t0)
  686. for i = 1; i < 1; i++ {
  687. feSquare(&t2, &t2)
  688. }
  689. feMul(&t1, &t1, &t2)
  690. feSquare(&t2, &t1)
  691. for i = 1; i < 5; i++ {
  692. feSquare(&t2, &t2)
  693. }
  694. feMul(&t1, &t2, &t1)
  695. feSquare(&t2, &t1)
  696. for i = 1; i < 10; i++ {
  697. feSquare(&t2, &t2)
  698. }
  699. feMul(&t2, &t2, &t1)
  700. feSquare(&t3, &t2)
  701. for i = 1; i < 20; i++ {
  702. feSquare(&t3, &t3)
  703. }
  704. feMul(&t2, &t3, &t2)
  705. feSquare(&t2, &t2)
  706. for i = 1; i < 10; i++ {
  707. feSquare(&t2, &t2)
  708. }
  709. feMul(&t1, &t2, &t1)
  710. feSquare(&t2, &t1)
  711. for i = 1; i < 50; i++ {
  712. feSquare(&t2, &t2)
  713. }
  714. feMul(&t2, &t2, &t1)
  715. feSquare(&t3, &t2)
  716. for i = 1; i < 100; i++ {
  717. feSquare(&t3, &t3)
  718. }
  719. feMul(&t2, &t3, &t2)
  720. feSquare(&t2, &t2)
  721. for i = 1; i < 50; i++ {
  722. feSquare(&t2, &t2)
  723. }
  724. feMul(&t1, &t2, &t1)
  725. feSquare(&t1, &t1)
  726. for i = 1; i < 5; i++ {
  727. feSquare(&t1, &t1)
  728. }
  729. feMul(out, &t1, &t0)
  730. }
  731. func scalarMultGeneric(out, in, base *[32]byte) {
  732. var e [32]byte
  733. copy(e[:], in[:])
  734. e[0] &= 248
  735. e[31] &= 127
  736. e[31] |= 64
  737. var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
  738. feFromBytes(&x1, base)
  739. feOne(&x2)
  740. feCopy(&x3, &x1)
  741. feOne(&z3)
  742. swap := int32(0)
  743. for pos := 254; pos >= 0; pos-- {
  744. b := e[pos/8] >> uint(pos&7)
  745. b &= 1
  746. swap ^= int32(b)
  747. feCSwap(&x2, &x3, swap)
  748. feCSwap(&z2, &z3, swap)
  749. swap = int32(b)
  750. feSub(&tmp0, &x3, &z3)
  751. feSub(&tmp1, &x2, &z2)
  752. feAdd(&x2, &x2, &z2)
  753. feAdd(&z2, &x3, &z3)
  754. feMul(&z3, &tmp0, &x2)
  755. feMul(&z2, &z2, &tmp1)
  756. feSquare(&tmp0, &tmp1)
  757. feSquare(&tmp1, &x2)
  758. feAdd(&x3, &z3, &z2)
  759. feSub(&z2, &z3, &z2)
  760. feMul(&x2, &tmp1, &tmp0)
  761. feSub(&tmp1, &tmp1, &tmp0)
  762. feSquare(&z2, &z2)
  763. feMul121666(&z3, &tmp1)
  764. feSquare(&x3, &x3)
  765. feAdd(&tmp0, &tmp0, &z3)
  766. feMul(&z3, &x1, &z2)
  767. feMul(&z2, &tmp1, &tmp0)
  768. }
  769. feCSwap(&x2, &x3, swap)
  770. feCSwap(&z2, &z3, swap)
  771. feInvert(&z2, &z2)
  772. feMul(&x2, &x2, &z2)
  773. feToBytes(out, &x2)
  774. }