pax_global_header00006660000000000000000000000064142245750340014520gustar00rootroot0000000000000052 comment=08ff755fd8f7e060d9756857368435c53540d83b saferith-0.33.0/000077500000000000000000000000001422457503400134105ustar00rootroot00000000000000saferith-0.33.0/CITATION.cff000066400000000000000000000004031422457503400152770ustar00rootroot00000000000000cff-version: 1.2.0 message: "If you use this software, please cite it as below." authors: - family-names: Meier given-names: "Lúcás Críostóir" title: "saferith" version: 0.28.0 date-released: 2021-04-16 url: "https://github.com/cronokirby/saferith" saferith-0.33.0/LICENSE000066400000000000000000000020411422457503400144120ustar00rootroot00000000000000Copyright (c) 2021 Lúcás Meier Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. saferith-0.33.0/LICENSE_go000066400000000000000000000027071422457503400151100ustar00rootroot00000000000000Copyright (c) 2009 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. saferith-0.33.0/README.md000066400000000000000000000036421422457503400146740ustar00rootroot00000000000000# saferith The purpose of this package is to provide a version of arbitrary sized arithmetic, in a safer (i.e. constant-time) way, for cryptography. *This is experimental software, use at your own peril*. # Assembly This code reuses some assembly routines from Go's standard library, inside of the `arith*.go`. These have been adjusted to remove some non-constant-time codepaths, most of which aren't used anyways. # Integrating with Go Initially, this code was structured to be relatively straightforwardly patched into Go's standard library. The idea would be to use the `arith*.go` files already in Go's `math/big` package, and just add a `num.go` file. Unfortunately, this approach doesn't seem to be possible, because of `addVWlarge` and `subVWlarge`, which are two non-constant time routines. These are jumped to inside of the assembly code in Go's `math/big` routines, so using them would require intrusive modification, which rules out this code living alongside `math/big`, and sharing its routines. ## Merging things upstream The easiest path towards merging this work upstream, in all likelihood, is having this package live in `crypto`, and duplicating some of the assembly code as necessary. The rationale here is that `math/big`'s needs will inevitably lead to situations like this, where a routine is tempted to bail towards a non-constant time variant for large or special inputs. Ultimately, having this code live in `crypto` is much more likely to allow us to ensure its integrity. It would also allow us to add assembly specifically tailored for our operations, such as conditional addition, and things like that. # Benchmarks Run with assembly routines: ``` go test -bench=. ``` Run with pure Go code: ``` go test -bench=. -tags math_big_pure_go ``` # Licensing The files `arith*.go` come from Go's standard library, and are licensed under a BSD license in `LICENSE_go`. The rest of the code is under an MIT license. saferith-0.33.0/arith.go000066400000000000000000000103561422457503400150530ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. // This file provides Go implementations of elementary multi-precision // arithmetic operations on word vectors. These have the suffix _g. // These are needed for platforms without assembly implementations of these routines. // This file also contains elementary operations that can be implemented // sufficiently efficiently in Go. package saferith import ( "math/bits" ) // A Word represents a single digit of a multi-precision unsigned integer. type Word uint const ( _S = _W / 8 // word size in bytes _W = bits.UintSize // word size in bits _B = 1 << _W // digit base _M = _B - 1 // digit mask ) // Many of the loops in this file are of the form // for i := 0; i < len(z) && i < len(x) && i < len(y); i++ // i < len(z) is the real condition. // However, checking i < len(x) && i < len(y) as well is faster than // having the compiler do a bounds check in the body of the loop; // remarkably it is even faster than hoisting the bounds check // out of the loop, by doing something like // _, _ = x[len(z)-1], y[len(z)-1] // There are other ways to hoist the bounds check out of the loop, // but the compiler's BCE isn't powerful enough for them (yet?). // See the discussion in CL 164966. // ---------------------------------------------------------------------------- // Elementary operations on words // // These operations are used by the vector operations below. // z1<<_W + z0 = x*y func mulWW_g(x, y Word) (z1, z0 Word) { hi, lo := bits.Mul(uint(x), uint(y)) return Word(hi), Word(lo) } // z1<<_W + z0 = x*y + c func mulAddWWW_g(x, y, c Word) (z1, z0 Word) { hi, lo := bits.Mul(uint(x), uint(y)) var cc uint lo, cc = bits.Add(lo, uint(c), 0) return Word(hi + cc), Word(lo) } // The resulting carry c is either 0 or 1. func addVV_g(z, x, y []Word) (c Word) { // The comment near the top of this file discusses this for loop condition. for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c)) z[i] = Word(zi) c = Word(cc) } return } // The resulting carry c is either 0 or 1. func subVV_g(z, x, y []Word) (c Word) { // The comment near the top of this file discusses this for loop condition. for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c)) z[i] = Word(zi) c = Word(cc) } return } // The resulting carry c is either 0 or 1. func addVW_g(z, x []Word, y Word) (c Word) { c = y // The comment near the top of this file discusses this for loop condition. for i := 0; i < len(z) && i < len(x); i++ { zi, cc := bits.Add(uint(x[i]), uint(c), 0) z[i] = Word(zi) c = Word(cc) } return } func subVW_g(z, x []Word, y Word) (c Word) { c = y // The comment near the top of this file discusses this for loop condition. for i := 0; i < len(z) && i < len(x); i++ { zi, cc := bits.Sub(uint(x[i]), uint(c), 0) z[i] = Word(zi) c = Word(cc) } return } func shlVU_g(z, x []Word, s uint) (c Word) { if s == 0 { copy(z, x) return } if len(z) == 0 { return } s &= _W - 1 // hint to the compiler that shifts by s don't need guard code ŝ := _W - s ŝ &= _W - 1 // ditto c = x[len(z)-1] >> ŝ for i := len(z) - 1; i > 0; i-- { z[i] = x[i]<>ŝ } z[0] = x[0] << s return } func shrVU_g(z, x []Word, s uint) (c Word) { if s == 0 { copy(z, x) return } if len(z) == 0 { return } s &= _W - 1 // hint to the compiler that shifts by s don't need guard code ŝ := _W - s ŝ &= _W - 1 // ditto c = x[0] << ŝ for i := 0; i < len(z)-1; i++ { z[i] = x[i]>>s | x[i+1]<<ŝ } z[len(z)-1] = x[len(z)-1] >> s return } func mulAddVWW_g(z, x []Word, y, r Word) (c Word) { c = r // The comment near the top of this file discusses this for loop condition. for i := 0; i < len(z) && i < len(x); i++ { c, z[i] = mulAddWWW_g(x[i], y, c) } return } func addMulVVW_g(z, x []Word, y Word) (c Word) { // The comment near the top of this file discusses this for loop condition. for i := 0; i < len(z) && i < len(x); i++ { z1, z0 := mulAddWWW_g(x[i], y, z[i]) lo, cc := bits.Add(uint(z0), uint(c), 0) c, z[i] = Word(cc), Word(lo) c += z1 } return } saferith-0.33.0/arith_386.s000066400000000000000000000103241422457503400153030ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go // +build !math_big_pure_go #include "textflag.h" // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. // func mulWW(x, y Word) (z1, z0 Word) TEXT ·mulWW(SB),NOSPLIT,$0 MOVL x+0(FP), AX MULL y+4(FP) MOVL DX, z1+8(FP) MOVL AX, z0+12(FP) RET // func addVV(z, x, y []Word) (c Word) TEXT ·addVV(SB),NOSPLIT,$0 MOVL z+0(FP), DI MOVL x+12(FP), SI MOVL y+24(FP), CX MOVL z_len+4(FP), BP MOVL $0, BX // i = 0 MOVL $0, DX // c = 0 JMP E1 L1: MOVL (SI)(BX*4), AX ADDL DX, DX // restore CF ADCL (CX)(BX*4), AX SBBL DX, DX // save CF MOVL AX, (DI)(BX*4) ADDL $1, BX // i++ E1: CMPL BX, BP // i < n JL L1 NEGL DX MOVL DX, c+36(FP) RET // func subVV(z, x, y []Word) (c Word) // (same as addVV except for SBBL instead of ADCL and label names) TEXT ·subVV(SB),NOSPLIT,$0 MOVL z+0(FP), DI MOVL x+12(FP), SI MOVL y+24(FP), CX MOVL z_len+4(FP), BP MOVL $0, BX // i = 0 MOVL $0, DX // c = 0 JMP E2 L2: MOVL (SI)(BX*4), AX ADDL DX, DX // restore CF SBBL (CX)(BX*4), AX SBBL DX, DX // save CF MOVL AX, (DI)(BX*4) ADDL $1, BX // i++ E2: CMPL BX, BP // i < n JL L2 NEGL DX MOVL DX, c+36(FP) RET // func addVW(z, x []Word, y Word) (c Word) TEXT ·addVW(SB),NOSPLIT,$0 MOVL z+0(FP), DI MOVL x+12(FP), SI MOVL y+24(FP), AX // c = y MOVL z_len+4(FP), BP MOVL $0, BX // i = 0 JMP E3 L3: ADDL (SI)(BX*4), AX MOVL AX, (DI)(BX*4) SBBL AX, AX // save CF NEGL AX ADDL $1, BX // i++ E3: CMPL BX, BP // i < n JL L3 MOVL AX, c+28(FP) RET // func subVW(z, x []Word, y Word) (c Word) TEXT ·subVW(SB),NOSPLIT,$0 MOVL z+0(FP), DI MOVL x+12(FP), SI MOVL y+24(FP), AX // c = y MOVL z_len+4(FP), BP MOVL $0, BX // i = 0 JMP E4 L4: MOVL (SI)(BX*4), DX SUBL AX, DX MOVL DX, (DI)(BX*4) SBBL AX, AX // save CF NEGL AX ADDL $1, BX // i++ E4: CMPL BX, BP // i < n JL L4 MOVL AX, c+28(FP) RET // func shlVU(z, x []Word, s uint) (c Word) TEXT ·shlVU(SB),NOSPLIT,$0 MOVL z_len+4(FP), BX // i = z SUBL $1, BX // i-- JL X8b // i < 0 (n <= 0) // n > 0 MOVL z+0(FP), DI MOVL x+12(FP), SI MOVL s+24(FP), CX MOVL (SI)(BX*4), AX // w1 = x[n-1] MOVL $0, DX SHLL CX, AX, DX // w1>>ŝ MOVL DX, c+28(FP) CMPL BX, $0 JLE X8a // i <= 0 // i > 0 L8: MOVL AX, DX // w = w1 MOVL -4(SI)(BX*4), AX // w1 = x[i-1] SHLL CX, AX, DX // w<>ŝ MOVL DX, (DI)(BX*4) // z[i] = w<>ŝ SUBL $1, BX // i-- JG L8 // i > 0 // i <= 0 X8a: SHLL CX, AX // w1< 0 MOVL z+0(FP), DI MOVL x+12(FP), SI MOVL s+24(FP), CX MOVL (SI), AX // w1 = x[0] MOVL $0, DX SHRL CX, AX, DX // w1<<ŝ MOVL DX, c+28(FP) MOVL $0, BX // i = 0 JMP E9 // i < n-1 L9: MOVL AX, DX // w = w1 MOVL 4(SI)(BX*4), AX // w1 = x[i+1] SHRL CX, AX, DX // w>>s | w1<<ŝ MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ ADDL $1, BX // i++ E9: CMPL BX, BP JL L9 // i < n-1 // i >= n-1 X9a: SHRL CX, AX // w1>>s MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s RET X9b: MOVL $0, c+28(FP) RET // func mulAddVWW(z, x []Word, y, r Word) (c Word) TEXT ·mulAddVWW(SB),NOSPLIT,$0 MOVL z+0(FP), DI MOVL x+12(FP), SI MOVL y+24(FP), BP MOVL r+28(FP), CX // c = r MOVL z_len+4(FP), BX LEAL (DI)(BX*4), DI LEAL (SI)(BX*4), SI NEGL BX // i = -n JMP E5 L5: MOVL (SI)(BX*4), AX MULL BP ADDL CX, AX ADCL $0, DX MOVL AX, (DI)(BX*4) MOVL DX, CX ADDL $1, BX // i++ E5: CMPL BX, $0 // i < 0 JL L5 MOVL CX, c+32(FP) RET // func addMulVVW(z, x []Word, y Word) (c Word) TEXT ·addMulVVW(SB),NOSPLIT,$0 MOVL z+0(FP), DI MOVL x+12(FP), SI MOVL y+24(FP), BP MOVL z_len+4(FP), BX LEAL (DI)(BX*4), DI LEAL (SI)(BX*4), SI NEGL BX // i = -n MOVL $0, CX // c = 0 JMP E6 L6: MOVL (SI)(BX*4), AX MULL BP ADDL CX, AX ADCL $0, DX ADDL AX, (DI)(BX*4) ADCL $0, DX MOVL DX, CX ADDL $1, BX // i++ E6: CMPL BX, $0 // i < 0 JL L6 MOVL CX, c+28(FP) RET saferith-0.33.0/arith_amd64.go000066400000000000000000000004701422457503400160420ustar00rootroot00000000000000// Copyright 2017 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. // +build !math_big_pure_go package saferith // This should be feature detected, but we can't use the internal/cpu package var support_adx = false saferith-0.33.0/arith_amd64.s000066400000000000000000000222161422457503400157010ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go // +build !math_big_pure_go #include "textflag.h" // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. // func mulWW(x, y Word) (z1, z0 Word) TEXT ·mulWW(SB),NOSPLIT,$0 MOVQ x+0(FP), AX MULQ y+8(FP) MOVQ DX, z1+16(FP) MOVQ AX, z0+24(FP) RET // The carry bit is saved with SBBQ Rx, Rx: if the carry was set, Rx is -1, otherwise it is 0. // It is restored with ADDQ Rx, Rx: if Rx was -1 the carry is set, otherwise it is cleared. // This is faster than using rotate instructions. // func addVV(z, x, y []Word) (c Word) TEXT ·addVV(SB),NOSPLIT,$0 MOVQ z_len+8(FP), DI MOVQ x+24(FP), R8 MOVQ y+48(FP), R9 MOVQ z+0(FP), R10 MOVQ $0, CX // c = 0 MOVQ $0, SI // i = 0 // s/JL/JMP/ below to disable the unrolled loop SUBQ $4, DI // n -= 4 JL V1 // if n < 0 goto V1 U1: // n >= 0 // regular loop body unrolled 4x ADDQ CX, CX // restore CF MOVQ 0(R8)(SI*8), R11 MOVQ 8(R8)(SI*8), R12 MOVQ 16(R8)(SI*8), R13 MOVQ 24(R8)(SI*8), R14 ADCQ 0(R9)(SI*8), R11 ADCQ 8(R9)(SI*8), R12 ADCQ 16(R9)(SI*8), R13 ADCQ 24(R9)(SI*8), R14 MOVQ R11, 0(R10)(SI*8) MOVQ R12, 8(R10)(SI*8) MOVQ R13, 16(R10)(SI*8) MOVQ R14, 24(R10)(SI*8) SBBQ CX, CX // save CF ADDQ $4, SI // i += 4 SUBQ $4, DI // n -= 4 JGE U1 // if n >= 0 goto U1 V1: ADDQ $4, DI // n += 4 JLE E1 // if n <= 0 goto E1 L1: // n > 0 ADDQ CX, CX // restore CF MOVQ 0(R8)(SI*8), R11 ADCQ 0(R9)(SI*8), R11 MOVQ R11, 0(R10)(SI*8) SBBQ CX, CX // save CF ADDQ $1, SI // i++ SUBQ $1, DI // n-- JG L1 // if n > 0 goto L1 E1: NEGQ CX MOVQ CX, c+72(FP) // return c RET // func subVV(z, x, y []Word) (c Word) // (same as addVV except for SBBQ instead of ADCQ and label names) TEXT ·subVV(SB),NOSPLIT,$0 MOVQ z_len+8(FP), DI MOVQ x+24(FP), R8 MOVQ y+48(FP), R9 MOVQ z+0(FP), R10 MOVQ $0, CX // c = 0 MOVQ $0, SI // i = 0 // s/JL/JMP/ below to disable the unrolled loop SUBQ $4, DI // n -= 4 JL V2 // if n < 0 goto V2 U2: // n >= 0 // regular loop body unrolled 4x ADDQ CX, CX // restore CF MOVQ 0(R8)(SI*8), R11 MOVQ 8(R8)(SI*8), R12 MOVQ 16(R8)(SI*8), R13 MOVQ 24(R8)(SI*8), R14 SBBQ 0(R9)(SI*8), R11 SBBQ 8(R9)(SI*8), R12 SBBQ 16(R9)(SI*8), R13 SBBQ 24(R9)(SI*8), R14 MOVQ R11, 0(R10)(SI*8) MOVQ R12, 8(R10)(SI*8) MOVQ R13, 16(R10)(SI*8) MOVQ R14, 24(R10)(SI*8) SBBQ CX, CX // save CF ADDQ $4, SI // i += 4 SUBQ $4, DI // n -= 4 JGE U2 // if n >= 0 goto U2 V2: ADDQ $4, DI // n += 4 JLE E2 // if n <= 0 goto E2 L2: // n > 0 ADDQ CX, CX // restore CF MOVQ 0(R8)(SI*8), R11 SBBQ 0(R9)(SI*8), R11 MOVQ R11, 0(R10)(SI*8) SBBQ CX, CX // save CF ADDQ $1, SI // i++ SUBQ $1, DI // n-- JG L2 // if n > 0 goto L2 E2: NEGQ CX MOVQ CX, c+72(FP) // return c RET // func addVW(z, x []Word, y Word) (c Word) TEXT ·addVW(SB),NOSPLIT,$0 MOVQ z_len+8(FP), DI MOVQ x+24(FP), R8 MOVQ y+48(FP), CX // c = y MOVQ z+0(FP), R10 MOVQ $0, SI // i = 0 // s/JL/JMP/ below to disable the unrolled loop SUBQ $4, DI // n -= 4 JL V3 // if n < 4 goto V3 U3: // n >= 0 // regular loop body unrolled 4x MOVQ 0(R8)(SI*8), R11 MOVQ 8(R8)(SI*8), R12 MOVQ 16(R8)(SI*8), R13 MOVQ 24(R8)(SI*8), R14 ADDQ CX, R11 ADCQ $0, R12 ADCQ $0, R13 ADCQ $0, R14 SBBQ CX, CX // save CF NEGQ CX MOVQ R11, 0(R10)(SI*8) MOVQ R12, 8(R10)(SI*8) MOVQ R13, 16(R10)(SI*8) MOVQ R14, 24(R10)(SI*8) ADDQ $4, SI // i += 4 SUBQ $4, DI // n -= 4 JGE U3 // if n >= 0 goto U3 V3: ADDQ $4, DI // n += 4 JLE E3 // if n <= 0 goto E3 L3: // n > 0 ADDQ 0(R8)(SI*8), CX MOVQ CX, 0(R10)(SI*8) SBBQ CX, CX // save CF NEGQ CX ADDQ $1, SI // i++ SUBQ $1, DI // n-- JG L3 // if n > 0 goto L3 E3: MOVQ CX, c+56(FP) // return c RET // func subVW(z, x []Word, y Word) (c Word) // (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names) TEXT ·subVW(SB),NOSPLIT,$0 MOVQ z_len+8(FP), DI MOVQ x+24(FP), R8 MOVQ y+48(FP), CX // c = y MOVQ z+0(FP), R10 MOVQ $0, SI // i = 0 // s/JL/JMP/ below to disable the unrolled loop SUBQ $4, DI // n -= 4 JL V4 // if n < 4 goto V4 U4: // n >= 0 // regular loop body unrolled 4x MOVQ 0(R8)(SI*8), R11 MOVQ 8(R8)(SI*8), R12 MOVQ 16(R8)(SI*8), R13 MOVQ 24(R8)(SI*8), R14 SUBQ CX, R11 SBBQ $0, R12 SBBQ $0, R13 SBBQ $0, R14 SBBQ CX, CX // save CF NEGQ CX MOVQ R11, 0(R10)(SI*8) MOVQ R12, 8(R10)(SI*8) MOVQ R13, 16(R10)(SI*8) MOVQ R14, 24(R10)(SI*8) ADDQ $4, SI // i += 4 SUBQ $4, DI // n -= 4 JGE U4 // if n >= 0 goto U4 V4: ADDQ $4, DI // n += 4 JLE E4 // if n <= 0 goto E4 L4: // n > 0 MOVQ 0(R8)(SI*8), R11 SUBQ CX, R11 MOVQ R11, 0(R10)(SI*8) SBBQ CX, CX // save CF NEGQ CX ADDQ $1, SI // i++ SUBQ $1, DI // n-- JG L4 // if n > 0 goto L4 E4: MOVQ CX, c+56(FP) // return c RET // func shlVU(z, x []Word, s uint) (c Word) TEXT ·shlVU(SB),NOSPLIT,$0 MOVQ z_len+8(FP), BX // i = z SUBQ $1, BX // i-- JL X8b // i < 0 (n <= 0) // n > 0 MOVQ z+0(FP), R10 MOVQ x+24(FP), R8 MOVQ s+48(FP), CX MOVQ (R8)(BX*8), AX // w1 = x[n-1] MOVQ $0, DX SHLQ CX, AX, DX // w1>>ŝ MOVQ DX, c+56(FP) CMPQ BX, $0 JLE X8a // i <= 0 // i > 0 L8: MOVQ AX, DX // w = w1 MOVQ -8(R8)(BX*8), AX // w1 = x[i-1] SHLQ CX, AX, DX // w<>ŝ MOVQ DX, (R10)(BX*8) // z[i] = w<>ŝ SUBQ $1, BX // i-- JG L8 // i > 0 // i <= 0 X8a: SHLQ CX, AX // w1< 0 MOVQ z+0(FP), R10 MOVQ x+24(FP), R8 MOVQ s+48(FP), CX MOVQ (R8), AX // w1 = x[0] MOVQ $0, DX SHRQ CX, AX, DX // w1<<ŝ MOVQ DX, c+56(FP) MOVQ $0, BX // i = 0 JMP E9 // i < n-1 L9: MOVQ AX, DX // w = w1 MOVQ 8(R8)(BX*8), AX // w1 = x[i+1] SHRQ CX, AX, DX // w>>s | w1<<ŝ MOVQ DX, (R10)(BX*8) // z[i] = w>>s | w1<<ŝ ADDQ $1, BX // i++ E9: CMPQ BX, R11 JL L9 // i < n-1 // i >= n-1 X9a: SHRQ CX, AX // w1>>s MOVQ AX, (R10)(R11*8) // z[n-1] = w1>>s RET X9b: MOVQ $0, c+56(FP) RET // func mulAddVWW(z, x []Word, y, r Word) (c Word) TEXT ·mulAddVWW(SB),NOSPLIT,$0 MOVQ z+0(FP), R10 MOVQ x+24(FP), R8 MOVQ y+48(FP), R9 MOVQ r+56(FP), CX // c = r MOVQ z_len+8(FP), R11 MOVQ $0, BX // i = 0 CMPQ R11, $4 JL E5 U5: // i+4 <= n // regular loop body unrolled 4x MOVQ (0*8)(R8)(BX*8), AX MULQ R9 ADDQ CX, AX ADCQ $0, DX MOVQ AX, (0*8)(R10)(BX*8) MOVQ DX, CX MOVQ (1*8)(R8)(BX*8), AX MULQ R9 ADDQ CX, AX ADCQ $0, DX MOVQ AX, (1*8)(R10)(BX*8) MOVQ DX, CX MOVQ (2*8)(R8)(BX*8), AX MULQ R9 ADDQ CX, AX ADCQ $0, DX MOVQ AX, (2*8)(R10)(BX*8) MOVQ DX, CX MOVQ (3*8)(R8)(BX*8), AX MULQ R9 ADDQ CX, AX ADCQ $0, DX MOVQ AX, (3*8)(R10)(BX*8) MOVQ DX, CX ADDQ $4, BX // i += 4 LEAQ 4(BX), DX CMPQ DX, R11 JLE U5 JMP E5 L5: MOVQ (R8)(BX*8), AX MULQ R9 ADDQ CX, AX ADCQ $0, DX MOVQ AX, (R10)(BX*8) MOVQ DX, CX ADDQ $1, BX // i++ E5: CMPQ BX, R11 // i < n JL L5 MOVQ CX, c+64(FP) RET // func addMulVVW(z, x []Word, y Word) (c Word) TEXT ·addMulVVW(SB),NOSPLIT,$0 CMPB ·support_adx(SB), $1 JEQ adx MOVQ z+0(FP), R10 MOVQ x+24(FP), R8 MOVQ y+48(FP), R9 MOVQ z_len+8(FP), R11 MOVQ $0, BX // i = 0 MOVQ $0, CX // c = 0 MOVQ R11, R12 ANDQ $-2, R12 CMPQ R11, $2 JAE A6 JMP E6 A6: MOVQ (R8)(BX*8), AX MULQ R9 ADDQ (R10)(BX*8), AX ADCQ $0, DX ADDQ CX, AX ADCQ $0, DX MOVQ DX, CX MOVQ AX, (R10)(BX*8) MOVQ (8)(R8)(BX*8), AX MULQ R9 ADDQ (8)(R10)(BX*8), AX ADCQ $0, DX ADDQ CX, AX ADCQ $0, DX MOVQ DX, CX MOVQ AX, (8)(R10)(BX*8) ADDQ $2, BX CMPQ BX, R12 JL A6 JMP E6 L6: MOVQ (R8)(BX*8), AX MULQ R9 ADDQ CX, AX ADCQ $0, DX ADDQ AX, (R10)(BX*8) ADCQ $0, DX MOVQ DX, CX ADDQ $1, BX // i++ E6: CMPQ BX, R11 // i < n JL L6 MOVQ CX, c+56(FP) RET adx: MOVQ z_len+8(FP), R11 MOVQ z+0(FP), R10 MOVQ x+24(FP), R8 MOVQ y+48(FP), DX MOVQ $0, BX // i = 0 MOVQ $0, CX // carry CMPQ R11, $8 JAE adx_loop_header CMPQ BX, R11 JL adx_short MOVQ CX, c+56(FP) RET adx_loop_header: MOVQ R11, R13 ANDQ $-8, R13 adx_loop: XORQ R9, R9 // unset flags MULXQ (R8), SI, DI ADCXQ CX,SI ADOXQ (R10), SI MOVQ SI,(R10) MULXQ 8(R8), AX, CX ADCXQ DI, AX ADOXQ 8(R10), AX MOVQ AX, 8(R10) MULXQ 16(R8), SI, DI ADCXQ CX, SI ADOXQ 16(R10), SI MOVQ SI, 16(R10) MULXQ 24(R8), AX, CX ADCXQ DI, AX ADOXQ 24(R10), AX MOVQ AX, 24(R10) MULXQ 32(R8), SI, DI ADCXQ CX, SI ADOXQ 32(R10), SI MOVQ SI, 32(R10) MULXQ 40(R8), AX, CX ADCXQ DI, AX ADOXQ 40(R10), AX MOVQ AX, 40(R10) MULXQ 48(R8), SI, DI ADCXQ CX, SI ADOXQ 48(R10), SI MOVQ SI, 48(R10) MULXQ 56(R8), AX, CX ADCXQ DI, AX ADOXQ 56(R10), AX MOVQ AX, 56(R10) ADCXQ R9, CX ADOXQ R9, CX ADDQ $64, R8 ADDQ $64, R10 ADDQ $8, BX CMPQ BX, R13 JL adx_loop MOVQ z+0(FP), R10 MOVQ x+24(FP), R8 CMPQ BX, R11 JL adx_short MOVQ CX, c+56(FP) RET adx_short: MULXQ (R8)(BX*8), SI, DI ADDQ CX, SI ADCQ $0, DI ADDQ SI, (R10)(BX*8) ADCQ $0, DI MOVQ DI, CX ADDQ $1, BX // i++ CMPQ BX, R11 JL adx_short MOVQ CX, c+56(FP) RET saferith-0.33.0/arith_arm.s000066400000000000000000000103171422457503400155440ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go // +build !math_big_pure_go #include "textflag.h" // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. // func addVV(z, x, y []Word) (c Word) TEXT ·addVV(SB),NOSPLIT,$0 ADD.S $0, R0 // clear carry flag MOVW z+0(FP), R1 MOVW z_len+4(FP), R4 MOVW x+12(FP), R2 MOVW y+24(FP), R3 ADD R4<<2, R1, R4 B E1 L1: MOVW.P 4(R2), R5 MOVW.P 4(R3), R6 ADC.S R6, R5 MOVW.P R5, 4(R1) E1: TEQ R1, R4 BNE L1 MOVW $0, R0 MOVW.CS $1, R0 MOVW R0, c+36(FP) RET // func subVV(z, x, y []Word) (c Word) // (same as addVV except for SBC instead of ADC and label names) TEXT ·subVV(SB),NOSPLIT,$0 SUB.S $0, R0 // clear borrow flag MOVW z+0(FP), R1 MOVW z_len+4(FP), R4 MOVW x+12(FP), R2 MOVW y+24(FP), R3 ADD R4<<2, R1, R4 B E2 L2: MOVW.P 4(R2), R5 MOVW.P 4(R3), R6 SBC.S R6, R5 MOVW.P R5, 4(R1) E2: TEQ R1, R4 BNE L2 MOVW $0, R0 MOVW.CC $1, R0 MOVW R0, c+36(FP) RET // func addVW(z, x []Word, y Word) (c Word) TEXT ·addVW(SB),NOSPLIT,$0 MOVW z+0(FP), R1 MOVW z_len+4(FP), R4 MOVW x+12(FP), R2 MOVW y+24(FP), R3 ADD R4<<2, R1, R4 TEQ R1, R4 BNE L3a MOVW R3, c+28(FP) RET L3a: MOVW.P 4(R2), R5 ADD.S R3, R5 MOVW.P R5, 4(R1) B E3 L3: MOVW.P 4(R2), R5 ADC.S $0, R5 MOVW.P R5, 4(R1) E3: TEQ R1, R4 BNE L3 MOVW $0, R0 MOVW.CS $1, R0 MOVW R0, c+28(FP) RET // func subVW(z, x []Word, y Word) (c Word) TEXT ·subVW(SB),NOSPLIT,$0 MOVW z+0(FP), R1 MOVW z_len+4(FP), R4 MOVW x+12(FP), R2 MOVW y+24(FP), R3 ADD R4<<2, R1, R4 TEQ R1, R4 BNE L4a MOVW R3, c+28(FP) RET L4a: MOVW.P 4(R2), R5 SUB.S R3, R5 MOVW.P R5, 4(R1) B E4 L4: MOVW.P 4(R2), R5 SBC.S $0, R5 MOVW.P R5, 4(R1) E4: TEQ R1, R4 BNE L4 MOVW $0, R0 MOVW.CC $1, R0 MOVW R0, c+28(FP) RET // func shlVU(z, x []Word, s uint) (c Word) TEXT ·shlVU(SB),NOSPLIT,$0 MOVW z_len+4(FP), R5 TEQ $0, R5 BEQ X7 MOVW z+0(FP), R1 MOVW x+12(FP), R2 ADD R5<<2, R2, R2 ADD R5<<2, R1, R5 MOVW s+24(FP), R3 TEQ $0, R3 // shift 0 is special BEQ Y7 ADD $4, R1 // stop one word early MOVW $32, R4 SUB R3, R4 MOVW $0, R7 MOVW.W -4(R2), R6 MOVW R6<>R4, R6 MOVW R6, c+28(FP) B E7 L7: MOVW.W -4(R2), R6 ORR R6>>R4, R7 MOVW.W R7, -4(R5) MOVW R6<>R3, R7 MOVW R6<>R3, R7 E6: TEQ R1, R5 BNE L6 MOVW R7, 0(R1) RET Y6: // copy loop, because shift 0 == shift 32 MOVW.P 4(R2), R6 MOVW.P R6, 4(R1) TEQ R1, R5 BNE Y6 X6: MOVW $0, R1 MOVW R1, c+28(FP) RET // func mulAddVWW(z, x []Word, y, r Word) (c Word) TEXT ·mulAddVWW(SB),NOSPLIT,$0 MOVW $0, R0 MOVW z+0(FP), R1 MOVW z_len+4(FP), R5 MOVW x+12(FP), R2 MOVW y+24(FP), R3 MOVW r+28(FP), R4 ADD R5<<2, R1, R5 B E8 // word loop L8: MOVW.P 4(R2), R6 MULLU R6, R3, (R7, R6) ADD.S R4, R6 ADC R0, R7 MOVW.P R6, 4(R1) MOVW R7, R4 E8: TEQ R1, R5 BNE L8 MOVW R4, c+32(FP) RET // func addMulVVW(z, x []Word, y Word) (c Word) TEXT ·addMulVVW(SB),NOSPLIT,$0 MOVW $0, R0 MOVW z+0(FP), R1 MOVW z_len+4(FP), R5 MOVW x+12(FP), R2 MOVW y+24(FP), R3 ADD R5<<2, R1, R5 MOVW $0, R4 B E9 // word loop L9: MOVW.P 4(R2), R6 MULLU R6, R3, (R7, R6) ADD.S R4, R6 ADC R0, R7 MOVW 0(R1), R4 ADD.S R4, R6 ADC R0, R7 MOVW.P R6, 4(R1) MOVW R7, R4 E9: TEQ R1, R5 BNE L9 MOVW R4, c+28(FP) RET // func mulWW(x, y Word) (z1, z0 Word) TEXT ·mulWW(SB),NOSPLIT,$0 MOVW x+0(FP), R1 MOVW y+4(FP), R2 MULLU R1, R2, (R4, R3) MOVW R4, z1+8(FP) MOVW R3, z0+12(FP) RET saferith-0.33.0/arith_arm64.s000066400000000000000000000254211422457503400157200ustar00rootroot00000000000000// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go // +build !math_big_pure_go #include "textflag.h" // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. // TODO: Consider re-implementing using Advanced SIMD // once the assembler supports those instructions. // func mulWW(x, y Word) (z1, z0 Word) TEXT ·mulWW(SB),NOSPLIT,$0 MOVD x+0(FP), R0 MOVD y+8(FP), R1 MUL R0, R1, R2 UMULH R0, R1, R3 MOVD R3, z1+16(FP) MOVD R2, z0+24(FP) RET // func addVV(z, x, y []Word) (c Word) TEXT ·addVV(SB),NOSPLIT,$0 MOVD z_len+8(FP), R0 MOVD x+24(FP), R8 MOVD y+48(FP), R9 MOVD z+0(FP), R10 ADDS $0, R0 // clear carry flag TBZ $0, R0, two MOVD.P 8(R8), R11 MOVD.P 8(R9), R15 ADCS R15, R11 MOVD.P R11, 8(R10) SUB $1, R0 two: TBZ $1, R0, loop LDP.P 16(R8), (R11, R12) LDP.P 16(R9), (R15, R16) ADCS R15, R11 ADCS R16, R12 STP.P (R11, R12), 16(R10) SUB $2, R0 loop: CBZ R0, done // careful not to touch the carry flag LDP.P 32(R8), (R11, R12) LDP -16(R8), (R13, R14) LDP.P 32(R9), (R15, R16) LDP -16(R9), (R17, R19) ADCS R15, R11 ADCS R16, R12 ADCS R17, R13 ADCS R19, R14 STP.P (R11, R12), 32(R10) STP (R13, R14), -16(R10) SUB $4, R0 B loop done: CSET HS, R0 // extract carry flag MOVD R0, c+72(FP) RET // func subVV(z, x, y []Word) (c Word) TEXT ·subVV(SB),NOSPLIT,$0 MOVD z_len+8(FP), R0 MOVD x+24(FP), R8 MOVD y+48(FP), R9 MOVD z+0(FP), R10 CMP R0, R0 // set carry flag TBZ $0, R0, two MOVD.P 8(R8), R11 MOVD.P 8(R9), R15 SBCS R15, R11 MOVD.P R11, 8(R10) SUB $1, R0 two: TBZ $1, R0, loop LDP.P 16(R8), (R11, R12) LDP.P 16(R9), (R15, R16) SBCS R15, R11 SBCS R16, R12 STP.P (R11, R12), 16(R10) SUB $2, R0 loop: CBZ R0, done // careful not to touch the carry flag LDP.P 32(R8), (R11, R12) LDP -16(R8), (R13, R14) LDP.P 32(R9), (R15, R16) LDP -16(R9), (R17, R19) SBCS R15, R11 SBCS R16, R12 SBCS R17, R13 SBCS R19, R14 STP.P (R11, R12), 32(R10) STP (R13, R14), -16(R10) SUB $4, R0 B loop done: CSET LO, R0 // extract carry flag MOVD R0, c+72(FP) RET #define vwOneOp(instr, op1) \ MOVD.P 8(R1), R4; \ instr op1, R4; \ MOVD.P R4, 8(R3); // handle the first 1~4 elements before starting iteration in addVW/subVW #define vwPreIter(instr1, instr2, counter, target) \ vwOneOp(instr1, R2); \ SUB $1, counter; \ CBZ counter, target; \ vwOneOp(instr2, $0); \ SUB $1, counter; \ CBZ counter, target; \ vwOneOp(instr2, $0); \ SUB $1, counter; \ CBZ counter, target; \ vwOneOp(instr2, $0); // do one iteration of add or sub in addVW/subVW #define vwOneIter(instr, counter, exit) \ CBZ counter, exit; \ // careful not to touch the carry flag LDP.P 32(R1), (R4, R5); \ LDP -16(R1), (R6, R7); \ instr $0, R4, R8; \ instr $0, R5, R9; \ instr $0, R6, R10; \ instr $0, R7, R11; \ STP.P (R8, R9), 32(R3); \ STP (R10, R11), -16(R3); \ SUB $4, counter; // do one iteration of copy in addVW/subVW #define vwOneIterCopy(counter, exit) \ CBZ counter, exit; \ LDP.P 32(R1), (R4, R5); \ LDP -16(R1), (R6, R7); \ STP.P (R4, R5), 32(R3); \ STP (R6, R7), -16(R3); \ SUB $4, counter; // func addVW(z, x []Word, y Word) (c Word) TEXT ·addVW(SB),NOSPLIT,$0 MOVD z+0(FP), R3 MOVD z_len+8(FP), R0 MOVD x+24(FP), R1 MOVD y+48(FP), R2 CBZ R0, len0 // the length of z is 0 MOVD.P 8(R1), R4 ADDS R2, R4 // z[0] = x[0] + y, set carry MOVD.P R4, 8(R3) SUB $1, R0 CBZ R0, len1 // the length of z is 1 TBZ $0, R0, two MOVD.P 8(R1), R4 // do it once ADCS $0, R4 MOVD.P R4, 8(R3) SUB $1, R0 two: // do it twice TBZ $1, R0, loop LDP.P 16(R1), (R4, R5) ADCS $0, R4, R8 // c, z[i] = x[i] + c ADCS $0, R5, R9 STP.P (R8, R9), 16(R3) SUB $2, R0 loop: // do four times per round vwOneIter(ADCS, R0, len1) B loop len1: CSET HS, R2 // extract carry flag len0: MOVD R2, c+56(FP) done: RET add4: BCC copy vwOneIter(ADCS, R0, len1) B add4 copy: MOVD ZR, c+56(FP) CMP R1, R3 BEQ done copy_4: // no carry flag, copy the rest vwOneIterCopy(R0, done) B copy_4 // func subVW(z, x []Word, y Word) (c Word) TEXT ·subVW(SB),NOSPLIT,$0 MOVD z+0(FP), R3 MOVD z_len+8(FP), R0 MOVD x+24(FP), R1 MOVD y+48(FP), R2 CBZ R0, len0 // the length of z is 0 MOVD.P 8(R1), R4 SUBS R2, R4 // z[0] = x[0] - y, set carry MOVD.P R4, 8(R3) SUB $1, R0 CBZ R0, len1 // the length of z is 1 TBZ $0, R0, two // do it once MOVD.P 8(R1), R4 SBCS $0, R4 MOVD.P R4, 8(R3) SUB $1, R0 two: // do it twice TBZ $1, R0, loop LDP.P 16(R1), (R4, R5) SBCS $0, R4, R8 // c, z[i] = x[i] + c SBCS $0, R5, R9 STP.P (R8, R9), 16(R3) SUB $2, R0 loop: // do four times per round vwOneIter(SBCS, R0, len1) B loop len1: CSET LO, R2 // extract carry flag len0: MOVD R2, c+56(FP) done: RET sub4: BCS copy vwOneIter(SBCS, R0, len1) B sub4 copy: MOVD ZR, c+56(FP) CMP R1, R3 BEQ done copy_4: // no carry flag, copy the rest vwOneIterCopy(R0, done) B copy_4 // func shlVU(z, x []Word, s uint) (c Word) // This implementation handles the shift operation from the high word to the low word, // which may be an error for the case where the low word of x overlaps with the high // word of z. When calling this function directly, you need to pay attention to this // situation. TEXT ·shlVU(SB),NOSPLIT,$0 LDP z+0(FP), (R0, R1) // R0 = z.ptr, R1 = len(z) MOVD x+24(FP), R2 MOVD s+48(FP), R3 ADD R1<<3, R0 // R0 = &z[n] ADD R1<<3, R2 // R2 = &x[n] CBZ R1, len0 CBZ R3, copy // if the number of shift is 0, just copy x to z MOVD $64, R4 SUB R3, R4 // handling the most significant element x[n-1] MOVD.W -8(R2), R6 LSR R4, R6, R5 // return value LSL R3, R6, R8 // x[i] << s SUB $1, R1 one: TBZ $0, R1, two MOVD.W -8(R2), R6 LSR R4, R6, R7 ORR R8, R7 LSL R3, R6, R8 SUB $1, R1 MOVD.W R7, -8(R0) two: TBZ $1, R1, loop LDP.W -16(R2), (R6, R7) LSR R4, R7, R10 ORR R8, R10 LSL R3, R7 LSR R4, R6, R9 ORR R7, R9 LSL R3, R6, R8 SUB $2, R1 STP.W (R9, R10), -16(R0) loop: CBZ R1, done LDP.W -32(R2), (R10, R11) LDP 16(R2), (R12, R13) LSR R4, R13, R23 ORR R8, R23 // z[i] = (x[i] << s) | (x[i-1] >> (64 - s)) LSL R3, R13 LSR R4, R12, R22 ORR R13, R22 LSL R3, R12 LSR R4, R11, R21 ORR R12, R21 LSL R3, R11 LSR R4, R10, R20 ORR R11, R20 LSL R3, R10, R8 STP.W (R20, R21), -32(R0) STP (R22, R23), 16(R0) SUB $4, R1 B loop done: MOVD.W R8, -8(R0) // the first element x[0] MOVD R5, c+56(FP) // the part moved out from x[n-1] RET copy: CMP R0, R2 BEQ len0 TBZ $0, R1, ctwo MOVD.W -8(R2), R4 MOVD.W R4, -8(R0) SUB $1, R1 ctwo: TBZ $1, R1, cloop LDP.W -16(R2), (R4, R5) STP.W (R4, R5), -16(R0) SUB $2, R1 cloop: CBZ R1, len0 LDP.W -32(R2), (R4, R5) LDP 16(R2), (R6, R7) STP.W (R4, R5), -32(R0) STP (R6, R7), 16(R0) SUB $4, R1 B cloop len0: MOVD $0, c+56(FP) RET // func shrVU(z, x []Word, s uint) (c Word) // This implementation handles the shift operation from the low word to the high word, // which may be an error for the case where the high word of x overlaps with the low // word of z. When calling this function directly, you need to pay attention to this // situation. TEXT ·shrVU(SB),NOSPLIT,$0 MOVD z+0(FP), R0 MOVD z_len+8(FP), R1 MOVD x+24(FP), R2 MOVD s+48(FP), R3 MOVD $0, R8 MOVD $64, R4 SUB R3, R4 CBZ R1, len0 CBZ R3, copy // if the number of shift is 0, just copy x to z MOVD.P 8(R2), R20 LSR R3, R20, R8 LSL R4, R20 MOVD R20, c+56(FP) // deal with the first element SUB $1, R1 TBZ $0, R1, two MOVD.P 8(R2), R6 LSL R4, R6, R20 ORR R8, R20 LSR R3, R6, R8 MOVD.P R20, 8(R0) SUB $1, R1 two: TBZ $1, R1, loop LDP.P 16(R2), (R6, R7) LSL R4, R6, R20 LSR R3, R6 ORR R8, R20 LSL R4, R7, R21 LSR R3, R7, R8 ORR R6, R21 STP.P (R20, R21), 16(R0) SUB $2, R1 loop: CBZ R1, done LDP.P 32(R2), (R10, R11) LDP -16(R2), (R12, R13) LSL R4, R10, R20 LSR R3, R10 ORR R8, R20 // z[i] = (x[i] >> s) | (x[i+1] << (64 - s)) LSL R4, R11, R21 LSR R3, R11 ORR R10, R21 LSL R4, R12, R22 LSR R3, R12 ORR R11, R22 LSL R4, R13, R23 LSR R3, R13, R8 ORR R12, R23 STP.P (R20, R21), 32(R0) STP (R22, R23), -16(R0) SUB $4, R1 B loop done: MOVD R8, (R0) // deal with the last element RET copy: CMP R0, R2 BEQ len0 TBZ $0, R1, ctwo MOVD.P 8(R2), R3 MOVD.P R3, 8(R0) SUB $1, R1 ctwo: TBZ $1, R1, cloop LDP.P 16(R2), (R4, R5) STP.P (R4, R5), 16(R0) SUB $2, R1 cloop: CBZ R1, len0 LDP.P 32(R2), (R4, R5) LDP -16(R2), (R6, R7) STP.P (R4, R5), 32(R0) STP (R6, R7), -16(R0) SUB $4, R1 B cloop len0: MOVD $0, c+56(FP) RET // func mulAddVWW(z, x []Word, y, r Word) (c Word) TEXT ·mulAddVWW(SB),NOSPLIT,$0 MOVD z+0(FP), R1 MOVD z_len+8(FP), R0 MOVD x+24(FP), R2 MOVD y+48(FP), R3 MOVD r+56(FP), R4 // c, z = x * y + r TBZ $0, R0, two MOVD.P 8(R2), R5 MUL R3, R5, R7 UMULH R3, R5, R8 ADDS R4, R7 ADC $0, R8, R4 // c, z[i] = x[i] * y + r MOVD.P R7, 8(R1) SUB $1, R0 two: TBZ $1, R0, loop LDP.P 16(R2), (R5, R6) MUL R3, R5, R10 UMULH R3, R5, R11 ADDS R4, R10 MUL R3, R6, R12 UMULH R3, R6, R13 ADCS R12, R11 ADC $0, R13, R4 STP.P (R10, R11), 16(R1) SUB $2, R0 loop: CBZ R0, done LDP.P 32(R2), (R5, R6) LDP -16(R2), (R7, R8) MUL R3, R5, R10 UMULH R3, R5, R11 ADDS R4, R10 MUL R3, R6, R12 UMULH R3, R6, R13 ADCS R11, R12 MUL R3, R7, R14 UMULH R3, R7, R15 ADCS R13, R14 MUL R3, R8, R16 UMULH R3, R8, R17 ADCS R15, R16 ADC $0, R17, R4 STP.P (R10, R12), 32(R1) STP (R14, R16), -16(R1) SUB $4, R0 B loop done: MOVD R4, c+64(FP) RET // func addMulVVW(z, x []Word, y Word) (c Word) TEXT ·addMulVVW(SB),NOSPLIT,$0 MOVD z+0(FP), R1 MOVD z_len+8(FP), R0 MOVD x+24(FP), R2 MOVD y+48(FP), R3 MOVD $0, R4 TBZ $0, R0, two MOVD.P 8(R2), R5 MOVD (R1), R6 MUL R5, R3, R7 UMULH R5, R3, R8 ADDS R7, R6 ADC $0, R8, R4 MOVD.P R6, 8(R1) SUB $1, R0 two: TBZ $1, R0, loop LDP.P 16(R2), (R5, R10) LDP (R1), (R6, R11) MUL R10, R3, R13 UMULH R10, R3, R12 MUL R5, R3, R7 UMULH R5, R3, R8 ADDS R4, R6 ADCS R13, R11 ADC $0, R12 ADDS R7, R6 ADCS R8, R11 ADC $0, R12, R4 STP.P (R6, R11), 16(R1) SUB $2, R0 // The main loop of this code operates on a block of 4 words every iteration // performing [R4:R12:R11:R10:R9] = R4 + R3 * [R8:R7:R6:R5] + [R12:R11:R10:R9] // where R4 is carried from the previous iteration, R8:R7:R6:R5 hold the next // 4 words of x, R3 is y and R12:R11:R10:R9 are part of the result z. loop: CBZ R0, done LDP.P 16(R2), (R5, R6) LDP.P 16(R2), (R7, R8) LDP (R1), (R9, R10) ADDS R4, R9 MUL R6, R3, R14 ADCS R14, R10 MUL R7, R3, R15 LDP 16(R1), (R11, R12) ADCS R15, R11 MUL R8, R3, R16 ADCS R16, R12 UMULH R8, R3, R20 ADC $0, R20 MUL R5, R3, R13 ADDS R13, R9 UMULH R5, R3, R17 ADCS R17, R10 UMULH R6, R3, R21 STP.P (R9, R10), 16(R1) ADCS R21, R11 UMULH R7, R3, R19 ADCS R19, R12 STP.P (R11, R12), 16(R1) ADC $0, R20, R4 SUB $4, R0 B loop done: MOVD R4, c+56(FP) RET saferith-0.33.0/arith_decl.go000066400000000000000000000011761422457503400160420ustar00rootroot00000000000000// Copyright 2010 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go // +build !math_big_pure_go package saferith // implemented in arith_$GOARCH.s func mulWW(x, y Word) (z1, z0 Word) func addVV(z, x, y []Word) (c Word) func subVV(z, x, y []Word) (c Word) func addVW(z, x []Word, y Word) (c Word) func subVW(z, x []Word, y Word) (c Word) func shlVU(z, x []Word, s uint) (c Word) func shrVU(z, x []Word, s uint) (c Word) func mulAddVWW(z, x []Word, y, r Word) (c Word) func addMulVVW(z, x []Word, y Word) (c Word) saferith-0.33.0/arith_decl_pure.go000066400000000000000000000015571422457503400171000ustar00rootroot00000000000000// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build math_big_pure_go // +build math_big_pure_go package saferith func mulWW(x, y Word) (z1, z0 Word) { return mulWW_g(x, y) } func addVV(z, x, y []Word) (c Word) { return addVV_g(z, x, y) } func subVV(z, x, y []Word) (c Word) { return subVV_g(z, x, y) } func addVW(z, x []Word, y Word) (c Word) { return addVW_g(z, x, y) } func subVW(z, x []Word, y Word) (c Word) { return subVW_g(z, x, y) } func shlVU(z, x []Word, s uint) (c Word) { return shlVU_g(z, x, s) } func shrVU(z, x []Word, s uint) (c Word) { return shrVU_g(z, x, s) } func mulAddVWW(z, x []Word, y, r Word) (c Word) { return mulAddVWW_g(z, x, y, r) } func addMulVVW(z, x []Word, y Word) (c Word) { return addMulVVW_g(z, x, y) } saferith-0.33.0/arith_decl_s390x.go000066400000000000000000000011101422457503400167740ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go // +build !math_big_pure_go package saferith func addVV_check(z, x, y []Word) (c Word) func addVV_vec(z, x, y []Word) (c Word) func addVV_novec(z, x, y []Word) (c Word) func subVV_check(z, x, y []Word) (c Word) func subVV_vec(z, x, y []Word) (c Word) func subVV_novec(z, x, y []Word) (c Word) // This should be feature detected, but we can't use the internal/cpu package var hasVX = false saferith-0.33.0/arith_mips64x.s000066400000000000000000000015431422457503400163000ustar00rootroot00000000000000// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go && (mips64 || mips64le) // +build !math_big_pure_go // +build mips64 mips64le #include "textflag.h" // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. TEXT ·mulWW(SB),NOSPLIT,$0 JMP ·mulWW_g(SB) TEXT ·addVV(SB),NOSPLIT,$0 JMP ·addVV_g(SB) TEXT ·subVV(SB),NOSPLIT,$0 JMP ·subVV_g(SB) TEXT ·addVW(SB),NOSPLIT,$0 JMP ·addVW_g(SB) TEXT ·subVW(SB),NOSPLIT,$0 JMP ·subVW_g(SB) TEXT ·shlVU(SB),NOSPLIT,$0 JMP ·shlVU_g(SB) TEXT ·shrVU(SB),NOSPLIT,$0 JMP ·shrVU_g(SB) TEXT ·mulAddVWW(SB),NOSPLIT,$0 JMP ·mulAddVWW_g(SB) TEXT ·addMulVVW(SB),NOSPLIT,$0 JMP ·addMulVVW_g(SB) saferith-0.33.0/arith_mipsx.s000066400000000000000000000015331422457503400161250ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go && (mips || mipsle) // +build !math_big_pure_go // +build mips mipsle #include "textflag.h" // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. TEXT ·mulWW(SB),NOSPLIT,$0 JMP ·mulWW_g(SB) TEXT ·addVV(SB),NOSPLIT,$0 JMP ·addVV_g(SB) TEXT ·subVV(SB),NOSPLIT,$0 JMP ·subVV_g(SB) TEXT ·addVW(SB),NOSPLIT,$0 JMP ·addVW_g(SB) TEXT ·subVW(SB),NOSPLIT,$0 JMP ·subVW_g(SB) TEXT ·shlVU(SB),NOSPLIT,$0 JMP ·shlVU_g(SB) TEXT ·shrVU(SB),NOSPLIT,$0 JMP ·shrVU_g(SB) TEXT ·mulAddVWW(SB),NOSPLIT,$0 JMP ·mulAddVWW_g(SB) TEXT ·addMulVVW(SB),NOSPLIT,$0 JMP ·addMulVVW_g(SB) saferith-0.33.0/arith_ppc64x.s000066400000000000000000000300231422457503400161050ustar00rootroot00000000000000// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go && (ppc64 || ppc64le) // +build !math_big_pure_go // +build ppc64 ppc64le #include "textflag.h" // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. // func mulWW(x, y Word) (z1, z0 Word) TEXT ·mulWW(SB), NOSPLIT, $0 MOVD x+0(FP), R4 MOVD y+8(FP), R5 MULHDU R4, R5, R6 MULLD R4, R5, R7 MOVD R6, z1+16(FP) MOVD R7, z0+24(FP) RET // func addVV(z, y, y []Word) (c Word) // z[i] = x[i] + y[i] for all i, carrying TEXT ·addVV(SB), NOSPLIT, $0 MOVD z_len+8(FP), R7 // R7 = z_len MOVD x+24(FP), R8 // R8 = x[] MOVD y+48(FP), R9 // R9 = y[] MOVD z+0(FP), R10 // R10 = z[] // If z_len = 0, we are done CMP R0, R7 MOVD R0, R4 BEQ done // Process the first iteration out of the loop so we can // use MOVDU and avoid 3 index registers updates. MOVD 0(R8), R11 // R11 = x[i] MOVD 0(R9), R12 // R12 = y[i] ADD $-1, R7 // R7 = z_len - 1 ADDC R12, R11, R15 // R15 = x[i] + y[i], set CA CMP R0, R7 MOVD R15, 0(R10) // z[i] BEQ final // If z_len was 1, we are done SRD $2, R7, R5 // R5 = z_len/4 CMP R0, R5 MOVD R5, CTR // Set up loop counter BEQ tail // If R5 = 0, we can't use the loop // Process 4 elements per iteration. Unrolling this loop // means a performance trade-off: we will lose performance // for small values of z_len (0.90x in the worst case), but // gain significant performance as z_len increases (up to // 1.45x). loop: MOVD 8(R8), R11 // R11 = x[i] MOVD 16(R8), R12 // R12 = x[i+1] MOVD 24(R8), R14 // R14 = x[i+2] MOVDU 32(R8), R15 // R15 = x[i+3] MOVD 8(R9), R16 // R16 = y[i] MOVD 16(R9), R17 // R17 = y[i+1] MOVD 24(R9), R18 // R18 = y[i+2] MOVDU 32(R9), R19 // R19 = y[i+3] ADDE R11, R16, R20 // R20 = x[i] + y[i] + CA ADDE R12, R17, R21 // R21 = x[i+1] + y[i+1] + CA ADDE R14, R18, R22 // R22 = x[i+2] + y[i+2] + CA ADDE R15, R19, R23 // R23 = x[i+3] + y[i+3] + CA MOVD R20, 8(R10) // z[i] MOVD R21, 16(R10) // z[i+1] MOVD R22, 24(R10) // z[i+2] MOVDU R23, 32(R10) // z[i+3] ADD $-4, R7 // R7 = z_len - 4 BC 16, 0, loop // bdnz // We may have more elements to read CMP R0, R7 BEQ final // Process the remaining elements, one at a time tail: MOVDU 8(R8), R11 // R11 = x[i] MOVDU 8(R9), R16 // R16 = y[i] ADD $-1, R7 // R7 = z_len - 1 ADDE R11, R16, R20 // R20 = x[i] + y[i] + CA CMP R0, R7 MOVDU R20, 8(R10) // z[i] BEQ final // If R7 = 0, we are done MOVDU 8(R8), R11 MOVDU 8(R9), R16 ADD $-1, R7 ADDE R11, R16, R20 CMP R0, R7 MOVDU R20, 8(R10) BEQ final MOVD 8(R8), R11 MOVD 8(R9), R16 ADDE R11, R16, R20 MOVD R20, 8(R10) final: ADDZE R4 // Capture CA done: MOVD R4, c+72(FP) RET // func subVV(z, x, y []Word) (c Word) // z[i] = x[i] - y[i] for all i, carrying TEXT ·subVV(SB), NOSPLIT, $0 MOVD z_len+8(FP), R7 // R7 = z_len MOVD x+24(FP), R8 // R8 = x[] MOVD y+48(FP), R9 // R9 = y[] MOVD z+0(FP), R10 // R10 = z[] // If z_len = 0, we are done CMP R0, R7 MOVD R0, R4 BEQ done // Process the first iteration out of the loop so we can // use MOVDU and avoid 3 index registers updates. MOVD 0(R8), R11 // R11 = x[i] MOVD 0(R9), R12 // R12 = y[i] ADD $-1, R7 // R7 = z_len - 1 SUBC R12, R11, R15 // R15 = x[i] - y[i], set CA CMP R0, R7 MOVD R15, 0(R10) // z[i] BEQ final // If z_len was 1, we are done SRD $2, R7, R5 // R5 = z_len/4 CMP R0, R5 MOVD R5, CTR // Set up loop counter BEQ tail // If R5 = 0, we can't use the loop // Process 4 elements per iteration. Unrolling this loop // means a performance trade-off: we will lose performance // for small values of z_len (0.92x in the worst case), but // gain significant performance as z_len increases (up to // 1.45x). loop: MOVD 8(R8), R11 // R11 = x[i] MOVD 16(R8), R12 // R12 = x[i+1] MOVD 24(R8), R14 // R14 = x[i+2] MOVDU 32(R8), R15 // R15 = x[i+3] MOVD 8(R9), R16 // R16 = y[i] MOVD 16(R9), R17 // R17 = y[i+1] MOVD 24(R9), R18 // R18 = y[i+2] MOVDU 32(R9), R19 // R19 = y[i+3] SUBE R16, R11, R20 // R20 = x[i] - y[i] + CA SUBE R17, R12, R21 // R21 = x[i+1] - y[i+1] + CA SUBE R18, R14, R22 // R22 = x[i+2] - y[i+2] + CA SUBE R19, R15, R23 // R23 = x[i+3] - y[i+3] + CA MOVD R20, 8(R10) // z[i] MOVD R21, 16(R10) // z[i+1] MOVD R22, 24(R10) // z[i+2] MOVDU R23, 32(R10) // z[i+3] ADD $-4, R7 // R7 = z_len - 4 BC 16, 0, loop // bdnz // We may have more elements to read CMP R0, R7 BEQ final // Process the remaining elements, one at a time tail: MOVDU 8(R8), R11 // R11 = x[i] MOVDU 8(R9), R16 // R16 = y[i] ADD $-1, R7 // R7 = z_len - 1 SUBE R16, R11, R20 // R20 = x[i] - y[i] + CA CMP R0, R7 MOVDU R20, 8(R10) // z[i] BEQ final // If R7 = 0, we are done MOVDU 8(R8), R11 MOVDU 8(R9), R16 ADD $-1, R7 SUBE R16, R11, R20 CMP R0, R7 MOVDU R20, 8(R10) BEQ final MOVD 8(R8), R11 MOVD 8(R9), R16 SUBE R16, R11, R20 MOVD R20, 8(R10) final: ADDZE R4 XOR $1, R4 done: MOVD R4, c+72(FP) RET // func addVW(z, x []Word, y Word) (c Word) TEXT ·addVW(SB), NOSPLIT, $0 MOVD z+0(FP), R10 // R10 = z[] MOVD x+24(FP), R8 // R8 = x[] MOVD y+48(FP), R4 // R4 = y = c MOVD z_len+8(FP), R11 // R11 = z_len CMP R0, R11 // If z_len is zero, return BEQ done // We will process the first iteration out of the loop so we capture // the value of c. In the subsequent iterations, we will rely on the // value of CA set here. MOVD 0(R8), R20 // R20 = x[i] ADD $-1, R11 // R11 = z_len - 1 ADDC R20, R4, R6 // R6 = x[i] + c CMP R0, R11 // If z_len was 1, we are done MOVD R6, 0(R10) // z[i] BEQ final // We will read 4 elements per iteration SRD $2, R11, R9 // R9 = z_len/4 DCBT (R8) CMP R0, R9 MOVD R9, CTR // Set up the loop counter BEQ tail // If R9 = 0, we can't use the loop loop: MOVD 8(R8), R20 // R20 = x[i] MOVD 16(R8), R21 // R21 = x[i+1] MOVD 24(R8), R22 // R22 = x[i+2] MOVDU 32(R8), R23 // R23 = x[i+3] ADDZE R20, R24 // R24 = x[i] + CA ADDZE R21, R25 // R25 = x[i+1] + CA ADDZE R22, R26 // R26 = x[i+2] + CA ADDZE R23, R27 // R27 = x[i+3] + CA MOVD R24, 8(R10) // z[i] MOVD R25, 16(R10) // z[i+1] MOVD R26, 24(R10) // z[i+2] MOVDU R27, 32(R10) // z[i+3] ADD $-4, R11 // R11 = z_len - 4 BC 16, 0, loop // bdnz // We may have some elements to read CMP R0, R11 BEQ final tail: MOVDU 8(R8), R20 ADDZE R20, R24 ADD $-1, R11 MOVDU R24, 8(R10) CMP R0, R11 BEQ final MOVDU 8(R8), R20 ADDZE R20, R24 ADD $-1, R11 MOVDU R24, 8(R10) CMP R0, R11 BEQ final MOVD 8(R8), R20 ADDZE R20, R24 MOVD R24, 8(R10) final: ADDZE R0, R4 // c = CA done: MOVD R4, c+56(FP) RET // func subVW(z, x []Word, y Word) (c Word) TEXT ·subVW(SB), NOSPLIT, $0 MOVD z+0(FP), R10 // R10 = z[] MOVD x+24(FP), R8 // R8 = x[] MOVD y+48(FP), R4 // R4 = y = c MOVD z_len+8(FP), R11 // R11 = z_len CMP R0, R11 // If z_len is zero, return BEQ done // We will process the first iteration out of the loop so we capture // the value of c. In the subsequent iterations, we will rely on the // value of CA set here. MOVD 0(R8), R20 // R20 = x[i] ADD $-1, R11 // R11 = z_len - 1 SUBC R4, R20, R6 // R6 = x[i] - c CMP R0, R11 // If z_len was 1, we are done MOVD R6, 0(R10) // z[i] BEQ final // We will read 4 elements per iteration SRD $2, R11, R9 // R9 = z_len/4 DCBT (R8) CMP R0, R9 MOVD R9, CTR // Set up the loop counter BEQ tail // If R9 = 0, we can't use the loop // The loop here is almost the same as the one used in s390x, but // we don't need to capture CA every iteration because we've already // done that above. loop: MOVD 8(R8), R20 MOVD 16(R8), R21 MOVD 24(R8), R22 MOVDU 32(R8), R23 SUBE R0, R20 SUBE R0, R21 SUBE R0, R22 SUBE R0, R23 MOVD R20, 8(R10) MOVD R21, 16(R10) MOVD R22, 24(R10) MOVDU R23, 32(R10) ADD $-4, R11 BC 16, 0, loop // bdnz // We may have some elements to read CMP R0, R11 BEQ final tail: MOVDU 8(R8), R20 SUBE R0, R20 ADD $-1, R11 MOVDU R20, 8(R10) CMP R0, R11 BEQ final MOVDU 8(R8), R20 SUBE R0, R20 ADD $-1, R11 MOVDU R20, 8(R10) CMP R0, R11 BEQ final MOVD 8(R8), R20 SUBE R0, R20 MOVD R20, 8(R10) final: // Capture CA SUBE R4, R4 NEG R4, R4 done: MOVD R4, c+56(FP) RET TEXT ·shlVU(SB), NOSPLIT, $0 BR ·shlVU_g(SB) TEXT ·shrVU(SB), NOSPLIT, $0 BR ·shrVU_g(SB) // func mulAddVWW(z, x []Word, y, r Word) (c Word) TEXT ·mulAddVWW(SB), NOSPLIT, $0 MOVD z+0(FP), R10 // R10 = z[] MOVD x+24(FP), R8 // R8 = x[] MOVD y+48(FP), R9 // R9 = y MOVD r+56(FP), R4 // R4 = r = c MOVD z_len+8(FP), R11 // R11 = z_len CMP R0, R11 BEQ done MOVD 0(R8), R20 ADD $-1, R11 MULLD R9, R20, R6 // R6 = z0 = Low-order(x[i]*y) MULHDU R9, R20, R7 // R7 = z1 = High-order(x[i]*y) ADDC R4, R6 // R6 = z0 + r ADDZE R7 // R7 = z1 + CA CMP R0, R11 MOVD R7, R4 // R4 = c MOVD R6, 0(R10) // z[i] BEQ done // We will read 4 elements per iteration SRD $2, R11, R14 // R14 = z_len/4 DCBT (R8) CMP R0, R14 MOVD R14, CTR // Set up the loop counter BEQ tail // If R9 = 0, we can't use the loop loop: MOVD 8(R8), R20 // R20 = x[i] MOVD 16(R8), R21 // R21 = x[i+1] MOVD 24(R8), R22 // R22 = x[i+2] MOVDU 32(R8), R23 // R23 = x[i+3] MULLD R9, R20, R24 // R24 = z0[i] MULHDU R9, R20, R20 // R20 = z1[i] ADDC R4, R24 // R24 = z0[i] + c ADDZE R20 // R7 = z1[i] + CA MULLD R9, R21, R25 MULHDU R9, R21, R21 ADDC R20, R25 ADDZE R21 MULLD R9, R22, R26 MULHDU R9, R22, R22 MULLD R9, R23, R27 MULHDU R9, R23, R23 ADDC R21, R26 ADDZE R22 MOVD R24, 8(R10) // z[i] MOVD R25, 16(R10) // z[i+1] ADDC R22, R27 ADDZE R23,R4 // update carry MOVD R26, 24(R10) // z[i+2] MOVDU R27, 32(R10) // z[i+3] ADD $-4, R11 // R11 = z_len - 4 BC 16, 0, loop // bdnz // We may have some elements to read CMP R0, R11 BEQ done // Process the remaining elements, one at a time tail: MOVDU 8(R8), R20 // R20 = x[i] MULLD R9, R20, R24 // R24 = z0[i] MULHDU R9, R20, R25 // R25 = z1[i] ADD $-1, R11 // R11 = z_len - 1 ADDC R4, R24 ADDZE R25 MOVDU R24, 8(R10) // z[i] CMP R0, R11 MOVD R25, R4 // R4 = c BEQ done // If R11 = 0, we are done MOVDU 8(R8), R20 MULLD R9, R20, R24 MULHDU R9, R20, R25 ADD $-1, R11 ADDC R4, R24 ADDZE R25 MOVDU R24, 8(R10) CMP R0, R11 MOVD R25, R4 BEQ done MOVD 8(R8), R20 MULLD R9, R20, R24 MULHDU R9, R20, R25 ADD $-1, R11 ADDC R4, R24 ADDZE R25 MOVD R24, 8(R10) MOVD R25, R4 done: MOVD R4, c+64(FP) RET // func addMulVVW(z, x []Word, y Word) (c Word) TEXT ·addMulVVW(SB), NOSPLIT, $0 MOVD z+0(FP), R10 // R10 = z[] MOVD x+24(FP), R8 // R8 = x[] MOVD y+48(FP), R9 // R9 = y MOVD z_len+8(FP), R22 // R22 = z_len MOVD R0, R3 // R3 will be the index register CMP R0, R22 MOVD R0, R4 // R4 = c = 0 MOVD R22, CTR // Initialize loop counter BEQ done loop: MOVD (R8)(R3), R20 // Load x[i] MOVD (R10)(R3), R21 // Load z[i] MULLD R9, R20, R6 // R6 = Low-order(x[i]*y) MULHDU R9, R20, R7 // R7 = High-order(x[i]*y) ADDC R21, R6 // R6 = z0 ADDZE R7 // R7 = z1 ADDC R4, R6 // R6 = z0 + c + 0 ADDZE R7, R4 // c += z1 MOVD R6, (R10)(R3) // Store z[i] ADD $8, R3 BC 16, 0, loop // bdnz done: MOVD R4, c+56(FP) RET saferith-0.33.0/arith_riscv64.s000066400000000000000000000017111422457503400162630ustar00rootroot00000000000000// Copyright 2020 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go && riscv64 // +build !math_big_pure_go,riscv64 #include "textflag.h" // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. // func mulWW(x, y Word) (z1, z0 Word) TEXT ·mulWW(SB),NOSPLIT,$0 MOV x+0(FP), X5 MOV y+8(FP), X6 MULHU X5, X6, X7 MUL X5, X6, X8 MOV X7, z1+16(FP) MOV X8, z0+24(FP) RET TEXT ·addVV(SB),NOSPLIT,$0 JMP ·addVV_g(SB) TEXT ·subVV(SB),NOSPLIT,$0 JMP ·subVV_g(SB) TEXT ·addVW(SB),NOSPLIT,$0 JMP ·addVW_g(SB) TEXT ·subVW(SB),NOSPLIT,$0 JMP ·subVW_g(SB) TEXT ·shlVU(SB),NOSPLIT,$0 JMP ·shlVU_g(SB) TEXT ·shrVU(SB),NOSPLIT,$0 JMP ·shrVU_g(SB) TEXT ·mulAddVWW(SB),NOSPLIT,$0 JMP ·mulAddVWW_g(SB) TEXT ·addMulVVW(SB),NOSPLIT,$0 JMP ·addMulVVW_g(SB) saferith-0.33.0/arith_s390x.s000066400000000000000000000507271422457503400156640ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go // +build !math_big_pure_go #include "textflag.h" // This file provides fast assembly versions for the elementary // arithmetic operations on vectors implemented in arith.go. TEXT ·mulWW(SB), NOSPLIT, $0 MOVD x+0(FP), R3 MOVD y+8(FP), R4 MULHDU R3, R4 MOVD R10, z1+16(FP) MOVD R11, z0+24(FP) RET // DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2, r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11 // func addVV(z, x, y []Word) (c Word) TEXT ·addVV(SB), NOSPLIT, $0 MOVD addvectorfacility+0x00(SB), R1 BR (R1) TEXT ·addVV_check(SB), NOSPLIT, $0 MOVB ·hasVX(SB), R1 CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported MOVD $addvectorfacility+0x00(SB), R1 MOVD $·addVV_novec(SB), R2 MOVD R2, 0(R1) // MOVD $·addVV_novec(SB), 0(R1) BR ·addVV_novec(SB) vectorimpl: MOVD $addvectorfacility+0x00(SB), R1 MOVD $·addVV_vec(SB), R2 MOVD R2, 0(R1) // MOVD $·addVV_vec(SB), 0(R1) BR ·addVV_vec(SB) GLOBL addvectorfacility+0x00(SB), NOPTR, $8 DATA addvectorfacility+0x00(SB)/8, $·addVV_check(SB) TEXT ·addVV_vec(SB), NOSPLIT, $0 MOVD z_len+8(FP), R3 MOVD x+24(FP), R8 MOVD y+48(FP), R9 MOVD z+0(FP), R2 MOVD $0, R4 // c = 0 MOVD $0, R0 // make sure it's zero MOVD $0, R10 // i = 0 // s/JL/JMP/ below to disable the unrolled loop SUB $4, R3 BLT v1 SUB $12, R3 // n -= 16 BLT A1 // if n < 0 goto A1 MOVD R8, R5 MOVD R9, R6 MOVD R2, R7 // n >= 0 // regular loop body unrolled 16x VZERO V0 // c = 0 UU1: VLM 0(R5), V1, V4 // 64-bytes into V1..V8 ADD $64, R5 VPDI $0x4, V1, V1, V1 // flip the doublewords to big-endian order VPDI $0x4, V2, V2, V2 // flip the doublewords to big-endian order VLM 0(R6), V9, V12 // 64-bytes into V9..V16 ADD $64, R6 VPDI $0x4, V9, V9, V9 // flip the doublewords to big-endian order VPDI $0x4, V10, V10, V10 // flip the doublewords to big-endian order VACCCQ V1, V9, V0, V25 VACQ V1, V9, V0, V17 VACCCQ V2, V10, V25, V26 VACQ V2, V10, V25, V18 VLM 0(R5), V5, V6 // 32-bytes into V1..V8 VLM 0(R6), V13, V14 // 32-bytes into V9..V16 ADD $32, R5 ADD $32, R6 VPDI $0x4, V3, V3, V3 // flip the doublewords to big-endian order VPDI $0x4, V4, V4, V4 // flip the doublewords to big-endian order VPDI $0x4, V11, V11, V11 // flip the doublewords to big-endian order VPDI $0x4, V12, V12, V12 // flip the doublewords to big-endian order VACCCQ V3, V11, V26, V27 VACQ V3, V11, V26, V19 VACCCQ V4, V12, V27, V28 VACQ V4, V12, V27, V20 VLM 0(R5), V7, V8 // 32-bytes into V1..V8 VLM 0(R6), V15, V16 // 32-bytes into V9..V16 ADD $32, R5 ADD $32, R6 VPDI $0x4, V5, V5, V5 // flip the doublewords to big-endian order VPDI $0x4, V6, V6, V6 // flip the doublewords to big-endian order VPDI $0x4, V13, V13, V13 // flip the doublewords to big-endian order VPDI $0x4, V14, V14, V14 // flip the doublewords to big-endian order VACCCQ V5, V13, V28, V29 VACQ V5, V13, V28, V21 VACCCQ V6, V14, V29, V30 VACQ V6, V14, V29, V22 VPDI $0x4, V7, V7, V7 // flip the doublewords to big-endian order VPDI $0x4, V8, V8, V8 // flip the doublewords to big-endian order VPDI $0x4, V15, V15, V15 // flip the doublewords to big-endian order VPDI $0x4, V16, V16, V16 // flip the doublewords to big-endian order VACCCQ V7, V15, V30, V31 VACQ V7, V15, V30, V23 VACCCQ V8, V16, V31, V0 // V0 has carry-over VACQ V8, V16, V31, V24 VPDI $0x4, V17, V17, V17 // flip the doublewords to big-endian order VPDI $0x4, V18, V18, V18 // flip the doublewords to big-endian order VPDI $0x4, V19, V19, V19 // flip the doublewords to big-endian order VPDI $0x4, V20, V20, V20 // flip the doublewords to big-endian order VPDI $0x4, V21, V21, V21 // flip the doublewords to big-endian order VPDI $0x4, V22, V22, V22 // flip the doublewords to big-endian order VPDI $0x4, V23, V23, V23 // flip the doublewords to big-endian order VPDI $0x4, V24, V24, V24 // flip the doublewords to big-endian order VSTM V17, V24, 0(R7) // 128-bytes into z ADD $128, R7 ADD $128, R10 // i += 16 SUB $16, R3 // n -= 16 BGE UU1 // if n >= 0 goto U1 VLGVG $1, V0, R4 // put cf into R4 NEG R4, R4 // save cf A1: ADD $12, R3 // n += 16 // s/JL/JMP/ below to disable the unrolled loop BLT v1 // if n < 0 goto v1 U1: // n >= 0 // regular loop body unrolled 4x MOVD 0(R8)(R10*1), R5 MOVD 8(R8)(R10*1), R6 MOVD 16(R8)(R10*1), R7 MOVD 24(R8)(R10*1), R1 ADDC R4, R4 // restore CF MOVD 0(R9)(R10*1), R11 ADDE R11, R5 MOVD 8(R9)(R10*1), R11 ADDE R11, R6 MOVD 16(R9)(R10*1), R11 ADDE R11, R7 MOVD 24(R9)(R10*1), R11 ADDE R11, R1 MOVD R0, R4 ADDE R4, R4 // save CF NEG R4, R4 MOVD R5, 0(R2)(R10*1) MOVD R6, 8(R2)(R10*1) MOVD R7, 16(R2)(R10*1) MOVD R1, 24(R2)(R10*1) ADD $32, R10 // i += 4 SUB $4, R3 // n -= 4 BGE U1 // if n >= 0 goto U1 v1: ADD $4, R3 // n += 4 BLE E1 // if n <= 0 goto E1 L1: // n > 0 ADDC R4, R4 // restore CF MOVD 0(R8)(R10*1), R5 MOVD 0(R9)(R10*1), R11 ADDE R11, R5 MOVD R5, 0(R2)(R10*1) MOVD R0, R4 ADDE R4, R4 // save CF NEG R4, R4 ADD $8, R10 // i++ SUB $1, R3 // n-- BGT L1 // if n > 0 goto L1 E1: NEG R4, R4 MOVD R4, c+72(FP) // return c RET TEXT ·addVV_novec(SB), NOSPLIT, $0 novec: MOVD z_len+8(FP), R3 MOVD x+24(FP), R8 MOVD y+48(FP), R9 MOVD z+0(FP), R2 MOVD $0, R4 // c = 0 MOVD $0, R0 // make sure it's zero MOVD $0, R10 // i = 0 // s/JL/JMP/ below to disable the unrolled loop SUB $4, R3 // n -= 4 BLT v1n // if n < 0 goto v1n U1n: // n >= 0 // regular loop body unrolled 4x MOVD 0(R8)(R10*1), R5 MOVD 8(R8)(R10*1), R6 MOVD 16(R8)(R10*1), R7 MOVD 24(R8)(R10*1), R1 ADDC R4, R4 // restore CF MOVD 0(R9)(R10*1), R11 ADDE R11, R5 MOVD 8(R9)(R10*1), R11 ADDE R11, R6 MOVD 16(R9)(R10*1), R11 ADDE R11, R7 MOVD 24(R9)(R10*1), R11 ADDE R11, R1 MOVD R0, R4 ADDE R4, R4 // save CF NEG R4, R4 MOVD R5, 0(R2)(R10*1) MOVD R6, 8(R2)(R10*1) MOVD R7, 16(R2)(R10*1) MOVD R1, 24(R2)(R10*1) ADD $32, R10 // i += 4 SUB $4, R3 // n -= 4 BGE U1n // if n >= 0 goto U1n v1n: ADD $4, R3 // n += 4 BLE E1n // if n <= 0 goto E1n L1n: // n > 0 ADDC R4, R4 // restore CF MOVD 0(R8)(R10*1), R5 MOVD 0(R9)(R10*1), R11 ADDE R11, R5 MOVD R5, 0(R2)(R10*1) MOVD R0, R4 ADDE R4, R4 // save CF NEG R4, R4 ADD $8, R10 // i++ SUB $1, R3 // n-- BGT L1n // if n > 0 goto L1n E1n: NEG R4, R4 MOVD R4, c+72(FP) // return c RET TEXT ·subVV(SB), NOSPLIT, $0 MOVD subvectorfacility+0x00(SB), R1 BR (R1) TEXT ·subVV_check(SB), NOSPLIT, $0 MOVB ·hasVX(SB), R1 CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported MOVD $subvectorfacility+0x00(SB), R1 MOVD $·subVV_novec(SB), R2 MOVD R2, 0(R1) // MOVD $·subVV_novec(SB), 0(R1) BR ·subVV_novec(SB) vectorimpl: MOVD $subvectorfacility+0x00(SB), R1 MOVD $·subVV_vec(SB), R2 MOVD R2, 0(R1) // MOVD $·subVV_vec(SB), 0(R1) BR ·subVV_vec(SB) GLOBL subvectorfacility+0x00(SB), NOPTR, $8 DATA subvectorfacility+0x00(SB)/8, $·subVV_check(SB) // DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2, r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11 // func subVV(z, x, y []Word) (c Word) // (same as addVV except for SUBC/SUBE instead of ADDC/ADDE and label names) TEXT ·subVV_vec(SB), NOSPLIT, $0 MOVD z_len+8(FP), R3 MOVD x+24(FP), R8 MOVD y+48(FP), R9 MOVD z+0(FP), R2 MOVD $0, R4 // c = 0 MOVD $0, R0 // make sure it's zero MOVD $0, R10 // i = 0 // s/JL/JMP/ below to disable the unrolled loop SUB $4, R3 // n -= 4 BLT v1 // if n < 0 goto v1 SUB $12, R3 // n -= 16 BLT A1 // if n < 0 goto A1 MOVD R8, R5 MOVD R9, R6 MOVD R2, R7 // n >= 0 // regular loop body unrolled 16x VZERO V0 // cf = 0 MOVD $1, R4 // for 390 subtraction cf starts as 1 (no borrow) VLVGG $1, R4, V0 // put carry into V0 UU1: VLM 0(R5), V1, V4 // 64-bytes into V1..V8 ADD $64, R5 VPDI $0x4, V1, V1, V1 // flip the doublewords to big-endian order VPDI $0x4, V2, V2, V2 // flip the doublewords to big-endian order VLM 0(R6), V9, V12 // 64-bytes into V9..V16 ADD $64, R6 VPDI $0x4, V9, V9, V9 // flip the doublewords to big-endian order VPDI $0x4, V10, V10, V10 // flip the doublewords to big-endian order VSBCBIQ V1, V9, V0, V25 VSBIQ V1, V9, V0, V17 VSBCBIQ V2, V10, V25, V26 VSBIQ V2, V10, V25, V18 VLM 0(R5), V5, V6 // 32-bytes into V1..V8 VLM 0(R6), V13, V14 // 32-bytes into V9..V16 ADD $32, R5 ADD $32, R6 VPDI $0x4, V3, V3, V3 // flip the doublewords to big-endian order VPDI $0x4, V4, V4, V4 // flip the doublewords to big-endian order VPDI $0x4, V11, V11, V11 // flip the doublewords to big-endian order VPDI $0x4, V12, V12, V12 // flip the doublewords to big-endian order VSBCBIQ V3, V11, V26, V27 VSBIQ V3, V11, V26, V19 VSBCBIQ V4, V12, V27, V28 VSBIQ V4, V12, V27, V20 VLM 0(R5), V7, V8 // 32-bytes into V1..V8 VLM 0(R6), V15, V16 // 32-bytes into V9..V16 ADD $32, R5 ADD $32, R6 VPDI $0x4, V5, V5, V5 // flip the doublewords to big-endian order VPDI $0x4, V6, V6, V6 // flip the doublewords to big-endian order VPDI $0x4, V13, V13, V13 // flip the doublewords to big-endian order VPDI $0x4, V14, V14, V14 // flip the doublewords to big-endian order VSBCBIQ V5, V13, V28, V29 VSBIQ V5, V13, V28, V21 VSBCBIQ V6, V14, V29, V30 VSBIQ V6, V14, V29, V22 VPDI $0x4, V7, V7, V7 // flip the doublewords to big-endian order VPDI $0x4, V8, V8, V8 // flip the doublewords to big-endian order VPDI $0x4, V15, V15, V15 // flip the doublewords to big-endian order VPDI $0x4, V16, V16, V16 // flip the doublewords to big-endian order VSBCBIQ V7, V15, V30, V31 VSBIQ V7, V15, V30, V23 VSBCBIQ V8, V16, V31, V0 // V0 has carry-over VSBIQ V8, V16, V31, V24 VPDI $0x4, V17, V17, V17 // flip the doublewords to big-endian order VPDI $0x4, V18, V18, V18 // flip the doublewords to big-endian order VPDI $0x4, V19, V19, V19 // flip the doublewords to big-endian order VPDI $0x4, V20, V20, V20 // flip the doublewords to big-endian order VPDI $0x4, V21, V21, V21 // flip the doublewords to big-endian order VPDI $0x4, V22, V22, V22 // flip the doublewords to big-endian order VPDI $0x4, V23, V23, V23 // flip the doublewords to big-endian order VPDI $0x4, V24, V24, V24 // flip the doublewords to big-endian order VSTM V17, V24, 0(R7) // 128-bytes into z ADD $128, R7 ADD $128, R10 // i += 16 SUB $16, R3 // n -= 16 BGE UU1 // if n >= 0 goto U1 VLGVG $1, V0, R4 // put cf into R4 SUB $1, R4 // save cf A1: ADD $12, R3 // n += 16 BLT v1 // if n < 0 goto v1 U1: // n >= 0 // regular loop body unrolled 4x MOVD 0(R8)(R10*1), R5 MOVD 8(R8)(R10*1), R6 MOVD 16(R8)(R10*1), R7 MOVD 24(R8)(R10*1), R1 MOVD R0, R11 SUBC R4, R11 // restore CF MOVD 0(R9)(R10*1), R11 SUBE R11, R5 MOVD 8(R9)(R10*1), R11 SUBE R11, R6 MOVD 16(R9)(R10*1), R11 SUBE R11, R7 MOVD 24(R9)(R10*1), R11 SUBE R11, R1 MOVD R0, R4 SUBE R4, R4 // save CF MOVD R5, 0(R2)(R10*1) MOVD R6, 8(R2)(R10*1) MOVD R7, 16(R2)(R10*1) MOVD R1, 24(R2)(R10*1) ADD $32, R10 // i += 4 SUB $4, R3 // n -= 4 BGE U1 // if n >= 0 goto U1n v1: ADD $4, R3 // n += 4 BLE E1 // if n <= 0 goto E1 L1: // n > 0 MOVD R0, R11 SUBC R4, R11 // restore CF MOVD 0(R8)(R10*1), R5 MOVD 0(R9)(R10*1), R11 SUBE R11, R5 MOVD R5, 0(R2)(R10*1) MOVD R0, R4 SUBE R4, R4 // save CF ADD $8, R10 // i++ SUB $1, R3 // n-- BGT L1 // if n > 0 goto L1n E1: NEG R4, R4 MOVD R4, c+72(FP) // return c RET // DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2, r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11 // func subVV(z, x, y []Word) (c Word) // (same as addVV except for SUBC/SUBE instead of ADDC/ADDE and label names) TEXT ·subVV_novec(SB), NOSPLIT, $0 MOVD z_len+8(FP), R3 MOVD x+24(FP), R8 MOVD y+48(FP), R9 MOVD z+0(FP), R2 MOVD $0, R4 // c = 0 MOVD $0, R0 // make sure it's zero MOVD $0, R10 // i = 0 // s/JL/JMP/ below to disable the unrolled loop SUB $4, R3 // n -= 4 BLT v1 // if n < 0 goto v1 U1: // n >= 0 // regular loop body unrolled 4x MOVD 0(R8)(R10*1), R5 MOVD 8(R8)(R10*1), R6 MOVD 16(R8)(R10*1), R7 MOVD 24(R8)(R10*1), R1 MOVD R0, R11 SUBC R4, R11 // restore CF MOVD 0(R9)(R10*1), R11 SUBE R11, R5 MOVD 8(R9)(R10*1), R11 SUBE R11, R6 MOVD 16(R9)(R10*1), R11 SUBE R11, R7 MOVD 24(R9)(R10*1), R11 SUBE R11, R1 MOVD R0, R4 SUBE R4, R4 // save CF MOVD R5, 0(R2)(R10*1) MOVD R6, 8(R2)(R10*1) MOVD R7, 16(R2)(R10*1) MOVD R1, 24(R2)(R10*1) ADD $32, R10 // i += 4 SUB $4, R3 // n -= 4 BGE U1 // if n >= 0 goto U1 v1: ADD $4, R3 // n += 4 BLE E1 // if n <= 0 goto E1 L1: // n > 0 MOVD R0, R11 SUBC R4, R11 // restore CF MOVD 0(R8)(R10*1), R5 MOVD 0(R9)(R10*1), R11 SUBE R11, R5 MOVD R5, 0(R2)(R10*1) MOVD R0, R4 SUBE R4, R4 // save CF ADD $8, R10 // i++ SUB $1, R3 // n-- BGT L1 // if n > 0 goto L1 E1: NEG R4, R4 MOVD R4, c+72(FP) // return c RET TEXT ·addVW(SB), NOSPLIT, $0 MOVD z_len+8(FP), R5 // length of z MOVD x+24(FP), R6 MOVD y+48(FP), R7 // c = y MOVD z+0(FP), R8 CMPBEQ R5, $0, returnC // if len(z) == 0, we can have an early return // Add the first two words, and determine which path (copy path or loop path) to take based on the carry flag. ADDC 0(R6), R7 MOVD R7, 0(R8) CMPBEQ R5, $1, returnResult // len(z) == 1 MOVD $0, R9 ADDE 8(R6), R9 MOVD R9, 8(R8) CMPBEQ R5, $2, returnResult // len(z) == 2 // Update the counters MOVD $16, R12 // i = 2 MOVD $-2(R5), R5 // n = n - 2 loopOverEachWord: BRC $12, copySetup // carry = 0, copy the rest MOVD $1, R9 // Originally we used the carry flag generated in the previous iteration // (i.e: ADDE could be used here to do the addition). However, since we // already know carry is 1 (otherwise we will go to copy section), we can use // ADDC here so the current iteration does not depend on the carry flag // generated in the previous iteration. This could be useful when branch prediction happens. ADDC 0(R6)(R12*1), R9 MOVD R9, 0(R8)(R12*1) // z[i] = x[i] + c MOVD $8(R12), R12 // i++ BRCTG R5, loopOverEachWord // n-- // Return the current carry value returnResult: MOVD $0, R0 ADDE R0, R0 MOVD R0, c+56(FP) RET // Update position of x(R6) and z(R8) based on the current counter value and perform copying. // With the assumption that x and z will not overlap with each other or x and z will // point to same memory region, we can use a faster version of copy using only MVC here. // In the following implementation, we have three copy loops, each copying a word, 4 words, and // 32 words at a time. Via benchmarking, this implementation is faster than calling runtime·memmove. copySetup: ADD R12, R6 ADD R12, R8 CMPBGE R5, $4, mediumLoop smallLoop: // does a loop unrolling to copy word when n < 4 CMPBEQ R5, $0, returnZero MVC $8, 0(R6), 0(R8) CMPBEQ R5, $1, returnZero MVC $8, 8(R6), 8(R8) CMPBEQ R5, $2, returnZero MVC $8, 16(R6), 16(R8) returnZero: MOVD $0, c+56(FP) // return 0 as carry RET mediumLoop: CMPBLT R5, $4, smallLoop CMPBLT R5, $32, mediumLoopBody largeLoop: // Copying 256 bytes at a time. MVC $256, 0(R6), 0(R8) MOVD $256(R6), R6 MOVD $256(R8), R8 MOVD $-32(R5), R5 CMPBGE R5, $32, largeLoop BR mediumLoop mediumLoopBody: // Copying 32 bytes at a time MVC $32, 0(R6), 0(R8) MOVD $32(R6), R6 MOVD $32(R8), R8 MOVD $-4(R5), R5 CMPBGE R5, $4, mediumLoopBody BR smallLoop returnC: MOVD R7, c+56(FP) RET TEXT ·subVW(SB), NOSPLIT, $0 MOVD z_len+8(FP), R5 MOVD x+24(FP), R6 MOVD y+48(FP), R7 // The borrow bit passed in MOVD z+0(FP), R8 MOVD $0, R0 // R0 is a temporary variable used during computation. Ensure it has zero in it. CMPBEQ R5, $0, returnC // len(z) == 0, have an early return // Subtract the first two words, and determine which path (copy path or loop path) to take based on the borrow flag MOVD 0(R6), R9 SUBC R7, R9 MOVD R9, 0(R8) CMPBEQ R5, $1, returnResult MOVD 8(R6), R9 SUBE R0, R9 MOVD R9, 8(R8) CMPBEQ R5, $2, returnResult // Update the counters MOVD $16, R12 // i = 2 MOVD $-2(R5), R5 // n = n - 2 loopOverEachWord: BRC $3, copySetup // no borrow, copy the rest MOVD 0(R6)(R12*1), R9 // Originally we used the borrow flag generated in the previous iteration // (i.e: SUBE could be used here to do the subtraction). However, since we // already know borrow is 1 (otherwise we will go to copy section), we can // use SUBC here so the current iteration does not depend on the borrow flag // generated in the previous iteration. This could be useful when branch prediction happens. SUBC $1, R9 MOVD R9, 0(R8)(R12*1) // z[i] = x[i] - 1 MOVD $8(R12), R12 // i++ BRCTG R5, loopOverEachWord // n-- // return the current borrow value returnResult: SUBE R0, R0 NEG R0, R0 MOVD R0, c+56(FP) RET // Update position of x(R6) and z(R8) based on the current counter value and perform copying. // With the assumption that x and z will not overlap with each other or x and z will // point to same memory region, we can use a faster version of copy using only MVC here. // In the following implementation, we have three copy loops, each copying a word, 4 words, and // 32 words at a time. Via benchmarking, this implementation is faster than calling runtime·memmove. copySetup: ADD R12, R6 ADD R12, R8 CMPBGE R5, $4, mediumLoop smallLoop: // does a loop unrolling to copy word when n < 4 CMPBEQ R5, $0, returnZero MVC $8, 0(R6), 0(R8) CMPBEQ R5, $1, returnZero MVC $8, 8(R6), 8(R8) CMPBEQ R5, $2, returnZero MVC $8, 16(R6), 16(R8) returnZero: MOVD $0, c+56(FP) // return 0 as borrow RET mediumLoop: CMPBLT R5, $4, smallLoop CMPBLT R5, $32, mediumLoopBody largeLoop: // Copying 256 bytes at a time MVC $256, 0(R6), 0(R8) MOVD $256(R6), R6 MOVD $256(R8), R8 MOVD $-32(R5), R5 CMPBGE R5, $32, largeLoop BR mediumLoop mediumLoopBody: // Copying 32 bytes at a time MVC $32, 0(R6), 0(R8) MOVD $32(R6), R6 MOVD $32(R8), R8 MOVD $-4(R5), R5 CMPBGE R5, $4, mediumLoopBody BR smallLoop returnC: MOVD R7, c+56(FP) RET // func shlVU(z, x []Word, s uint) (c Word) TEXT ·shlVU(SB), NOSPLIT, $0 BR ·shlVU_g(SB) // func shrVU(z, x []Word, s uint) (c Word) TEXT ·shrVU(SB), NOSPLIT, $0 BR ·shrVU_g(SB) // CX = R4, r8 = r8, r9=r9, r10 = r2, r11 = r5, DX = r3, AX = r6, BX = R1, (R0 set to 0) + use R11 + use R7 for i // func mulAddVWW(z, x []Word, y, r Word) (c Word) TEXT ·mulAddVWW(SB), NOSPLIT, $0 MOVD z+0(FP), R2 MOVD x+24(FP), R8 MOVD y+48(FP), R9 MOVD r+56(FP), R4 // c = r MOVD z_len+8(FP), R5 MOVD $0, R1 // i = 0 MOVD $0, R7 // i*8 = 0 MOVD $0, R0 // make sure it's zero BR E5 L5: MOVD (R8)(R1*1), R6 MULHDU R9, R6 ADDC R4, R11 // add to low order bits ADDE R0, R6 MOVD R11, (R2)(R1*1) MOVD R6, R4 ADD $8, R1 // i*8 + 8 ADD $1, R7 // i++ E5: CMPBLT R7, R5, L5 // i < n MOVD R4, c+64(FP) RET // func addMulVVW(z, x []Word, y Word) (c Word) // CX = R4, r8 = r8, r9=r9, r10 = r2, r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1, (R0 set to 0) + use R11 + use R7 for i TEXT ·addMulVVW(SB), NOSPLIT, $0 MOVD z+0(FP), R2 MOVD x+24(FP), R8 MOVD y+48(FP), R9 MOVD z_len+8(FP), R5 MOVD $0, R1 // i*8 = 0 MOVD $0, R7 // i = 0 MOVD $0, R0 // make sure it's zero MOVD $0, R4 // c = 0 MOVD R5, R12 AND $-2, R12 CMPBGE R5, $2, A6 BR E6 A6: MOVD (R8)(R1*1), R6 MULHDU R9, R6 MOVD (R2)(R1*1), R10 ADDC R10, R11 // add to low order bits ADDE R0, R6 ADDC R4, R11 ADDE R0, R6 MOVD R6, R4 MOVD R11, (R2)(R1*1) MOVD (8)(R8)(R1*1), R6 MULHDU R9, R6 MOVD (8)(R2)(R1*1), R10 ADDC R10, R11 // add to low order bits ADDE R0, R6 ADDC R4, R11 ADDE R0, R6 MOVD R6, R4 MOVD R11, (8)(R2)(R1*1) ADD $16, R1 // i*8 + 8 ADD $2, R7 // i++ CMPBLT R7, R12, A6 BR E6 L6: MOVD (R8)(R1*1), R6 MULHDU R9, R6 MOVD (R2)(R1*1), R10 ADDC R10, R11 // add to low order bits ADDE R0, R6 ADDC R4, R11 ADDE R0, R6 MOVD R6, R4 MOVD R11, (R2)(R1*1) ADD $8, R1 // i*8 + 8 ADD $1, R7 // i++ E6: CMPBLT R7, R5, L6 // i < n MOVD R4, c+56(FP) RET saferith-0.33.0/arith_s390x_test.go000066400000000000000000000014571422457503400170620ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build s390x && !math_big_pure_go // +build s390x,!math_big_pure_go package big import ( "testing" ) // Tests whether the non vector routines are working, even when the tests are run on a // vector-capable machine func TestFunVVnovec(t *testing.T) { if hasVX == true { for _, a := range sumVV { arg := a testFunVV(t, "addVV_novec", addVV_novec, arg) arg = argVV{a.z, a.y, a.x, a.c} testFunVV(t, "addVV_novec symmetric", addVV_novec, arg) arg = argVV{a.x, a.z, a.y, a.c} testFunVV(t, "subVV_novec", subVV_novec, arg) arg = argVV{a.y, a.z, a.x, a.c} testFunVV(t, "subVV_novec symmetric", subVV_novec, arg) } } } saferith-0.33.0/arith_wasm.s000066400000000000000000000012631422457503400157340ustar00rootroot00000000000000// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE_go file. //go:build !math_big_pure_go // +build !math_big_pure_go #include "textflag.h" TEXT ·mulWW(SB),NOSPLIT,$0 JMP ·mulWW_g(SB) TEXT ·addVV(SB),NOSPLIT,$0 JMP ·addVV_g(SB) TEXT ·subVV(SB),NOSPLIT,$0 JMP ·subVV_g(SB) TEXT ·addVW(SB),NOSPLIT,$0 JMP ·addVW_g(SB) TEXT ·subVW(SB),NOSPLIT,$0 JMP ·subVW_g(SB) TEXT ·shlVU(SB),NOSPLIT,$0 JMP ·shlVU_g(SB) TEXT ·shrVU(SB),NOSPLIT,$0 JMP ·shrVU_g(SB) TEXT ·mulAddVWW(SB),NOSPLIT,$0 JMP ·mulAddVWW_g(SB) TEXT ·addMulVVW(SB),NOSPLIT,$0 JMP ·addMulVVW_g(SB) saferith-0.33.0/benchmarks/000077500000000000000000000000001422457503400155255ustar00rootroot00000000000000saferith-0.33.0/benchmarks/history.md000066400000000000000000002172301422457503400175550ustar00rootroot00000000000000# 8902aa4ec9f67312bfe1c635d02cbcbe5696431c (2021-08-19) Improve inversion speed ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkLimbMask-4 71810857 16.51 ns/op BenchmarkAddBig-4 10325738 102.5 ns/op BenchmarkModAddBig-4 16236423 63.66 ns/op BenchmarkLargeModAddBig-4 8365964 125.1 ns/op BenchmarkMulBig-4 1302160 893.6 ns/op BenchmarkModMulBig-4 20917198 57.39 ns/op BenchmarkLargeModMulBig-4 1354096 902.0 ns/op BenchmarkModBig-4 1361006 887.9 ns/op BenchmarkLargeModBig-4 448894 2558 ns/op BenchmarkModInverseBig-4 4943186 249.2 ns/op BenchmarkLargeModInverseBig-4 1000000 1165 ns/op BenchmarkExpBig-4 25028 47445 ns/op BenchmarkLargeExpBig-4 214 5514653 ns/op BenchmarkSetBytesBig-4 9410590 118.4 ns/op BenchmarkModSqrt3Mod4Big-4 40668 29378 ns/op BenchmarkAddNat-4 9163245 122.5 ns/op BenchmarkModAddNat-4 10028330 115.7 ns/op BenchmarkLargeModAddNat-4 2784940 596.1 ns/op BenchmarkModNegNat-4 12542563 90.18 ns/op BenchmarkLargeModNegNat-4 3841987 298.4 ns/op BenchmarkMulNat-4 481586 2245 ns/op BenchmarkModMulNat-4 2394678 491.9 ns/op BenchmarkLargeModMulNat-4 59353 20326 ns/op BenchmarkLargeModMulNatEven-4 58807 20275 ns/op BenchmarkModNat-4 58653 19919 ns/op BenchmarkLargeModNat-4 68472 17269 ns/op BenchmarkModInverseNat-4 1108507 1095 ns/op BenchmarkLargeModInverseNat-4 3265 355225 ns/op BenchmarkModInverseEvenNat-4 3139 361051 ns/op BenchmarkLargeModInverseEvenNat-4 3040 376379 ns/op BenchmarkExpNat-4 15562 74551 ns/op BenchmarkLargeExpNat-4 85 13958050 ns/op BenchmarkLargeExpNatEven-4 13 84531685 ns/op BenchmarkSetBytesNat-4 2151705 521.9 ns/op BenchmarkMontgomeryMul-4 195489 5238 ns/op BenchmarkModSqrt3Mod4Nat-4 25172 46841 ns/op BenchmarkModSqrt1Mod4Nat-4 7311 157145 ns/op BenchmarkDivNat-4 60760 19069 ns/op BenchmarkLargeDivNat-4 70380 17242 ns/op PASS ok github.com/cronokirby/saferith 59.556s [saferith] → go test -bench=. -tags math_big_pure_go ;goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkLimbMask-4 71783198 16.69 ns/op BenchmarkAddBig-4 8091496 147.9 ns/op BenchmarkModAddBig-4 16965126 68.49 ns/op BenchmarkLargeModAddBig-4 7665646 165.2 ns/op BenchmarkMulBig-4 861013 1393 ns/op BenchmarkModMulBig-4 16647033 60.42 ns/op BenchmarkLargeModMulBig-4 861454 1437 ns/op BenchmarkModBig-4 1302430 915.4 ns/op BenchmarkLargeModBig-4 171907 6858 ns/op BenchmarkModInverseBig-4 5026785 237.5 ns/op BenchmarkLargeModInverseBig-4 864998 1179 ns/op BenchmarkExpBig-4 25644 45937 ns/op BenchmarkLargeExpBig-4 121 9966891 ns/op BenchmarkSetBytesBig-4 9426774 153.4 ns/op BenchmarkModSqrt3Mod4Big-4 30499 42571 ns/op BenchmarkAddNat-4 6116457 193.3 ns/op BenchmarkModAddNat-4 10242615 112.7 ns/op BenchmarkLargeModAddNat-4 2641714 475.4 ns/op BenchmarkModNegNat-4 13805811 76.69 ns/op BenchmarkLargeModNegNat-4 3279925 339.9 ns/op BenchmarkMulNat-4 291374 4116 ns/op BenchmarkModMulNat-4 2364541 506.0 ns/op BenchmarkLargeModMulNat-4 52971 22709 ns/op BenchmarkLargeModMulNatEven-4 51830 22783 ns/op BenchmarkModNat-4 61860 19404 ns/op BenchmarkLargeModNat-4 66633 18104 ns/op BenchmarkModInverseNat-4 1000000 1107 ns/op BenchmarkLargeModInverseNat-4 3007 387848 ns/op BenchmarkModInverseEvenNat-4 2968 395330 ns/op BenchmarkLargeModInverseEvenNat-4 2815 416384 ns/op BenchmarkExpNat-4 17750 66138 ns/op BenchmarkLargeExpNat-4 85 14231935 ns/op BenchmarkLargeExpNatEven-4 12 94408614 ns/op BenchmarkSetBytesNat-4 2674656 453.6 ns/op BenchmarkMontgomeryMul-4 225349 5226 ns/op BenchmarkModSqrt3Mod4Nat-4 26919 44633 ns/op BenchmarkModSqrt1Mod4Nat-4 7810 155648 ns/op BenchmarkDivNat-4 60327 19770 ns/op BenchmarkLargeDivNat-4 66328 18055 ns/op PASS ok github.com/cronokirby/saferith 57.483s ``` # 5e3e87ee146345850aed0449e79cb1df471f5e69 (2021-05-23) Add benchmarks for sqrt. ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 10980842 97.37 ns/op BenchmarkModAddBig-4 17397620 80.55 ns/op BenchmarkLargeModAddBig-4 6986739 146.9 ns/op BenchmarkMulBig-4 1316322 949.8 ns/op BenchmarkModMulBig-4 21368481 66.05 ns/op BenchmarkLargeModMulBig-4 1000000 1080 ns/op BenchmarkModBig-4 1224583 978.6 ns/op BenchmarkLargeModBig-4 454917 3228 ns/op BenchmarkModInverseBig-4 3529366 305.1 ns/op BenchmarkLargeModInverseBig-4 1000000 1014 ns/op BenchmarkExpBig-4 25830 44472 ns/op BenchmarkLargeExpBig-4 223 5461545 ns/op BenchmarkSetBytesBig-4 11066692 111.1 ns/op BenchmarkModSqrt3Mod4Big-4 40464 30820 ns/op BenchmarkAddNat-4 12164599 99.69 ns/op BenchmarkModAddNat-4 12610224 117.7 ns/op BenchmarkLargeModAddNat-4 3075188 381.9 ns/op BenchmarkMulNat-4 542385 2452 ns/op BenchmarkModMulNat-4 2568790 460.5 ns/op BenchmarkLargeModMulNat-4 44596 22808 ns/op BenchmarkModNat-4 60856 19349 ns/op BenchmarkLargeModNat-4 63253 17489 ns/op BenchmarkModInverseNat-4 216262 5664 ns/op BenchmarkLargeModInverseNat-4 621 1905714 ns/op BenchmarkModInverseEvenNat-4 606 1858476 ns/op BenchmarkLargeModInverseEvenNat-4 567 1986437 ns/op BenchmarkExpNat-4 16140 73153 ns/op BenchmarkLargeExpNat-4 86 13971093 ns/op BenchmarkSetBytesNat-4 1256854 828.5 ns/op BenchmarkMontgomeryMul-4 217508 5228 ns/op BenchmarkModSqrt3Mod4Nat-4 26886 45715 ns/op BenchmarkModSqrt1Mod4Nat-4 7867 159168 ns/op PASS ok github.com/cronokirby/saferith 48.955s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 9965032 123.2 ns/op BenchmarkModAddBig-4 17541772 62.54 ns/op BenchmarkLargeModAddBig-4 8280865 146.7 ns/op BenchmarkMulBig-4 872971 1385 ns/op BenchmarkModMulBig-4 19919542 61.94 ns/op BenchmarkLargeModMulBig-4 828572 1417 ns/op BenchmarkModBig-4 1298089 920.7 ns/op BenchmarkLargeModBig-4 181110 6686 ns/op BenchmarkModInverseBig-4 5098978 233.9 ns/op BenchmarkLargeModInverseBig-4 997564 1136 ns/op BenchmarkExpBig-4 26109 45708 ns/op BenchmarkLargeExpBig-4 122 9717229 ns/op BenchmarkSetBytesBig-4 9906030 123.0 ns/op BenchmarkModSqrt3Mod4Big-4 31356 39154 ns/op BenchmarkAddNat-4 9561010 117.0 ns/op BenchmarkModAddNat-4 12873328 94.94 ns/op BenchmarkLargeModAddNat-4 2430703 441.0 ns/op BenchmarkMulNat-4 290158 4321 ns/op BenchmarkModMulNat-4 2719801 453.4 ns/op BenchmarkLargeModMulNat-4 50503 24636 ns/op BenchmarkModNat-4 56659 20209 ns/op BenchmarkLargeModNat-4 64297 18912 ns/op BenchmarkModInverseNat-4 211422 5629 ns/op BenchmarkLargeModInverseNat-4 422 2781247 ns/op BenchmarkModInverseEvenNat-4 427 2786959 ns/op BenchmarkLargeModInverseEvenNat-4 386 2982514 ns/op BenchmarkExpNat-4 17989 67104 ns/op BenchmarkLargeExpNat-4 82 14357059 ns/op BenchmarkSetBytesNat-4 1631575 767.5 ns/op BenchmarkMontgomeryMul-4 221646 5304 ns/op BenchmarkModSqrt3Mod4Nat-4 26612 44991 ns/op BenchmarkModSqrt1Mod4Nat-4 7627 151005 ns/op PASS ok github.com/cronokirby/saferith 48.457s ``` # 686f4cf65fc400b5023f57f038f2482553d5439c (2021-05-23) Use double sized modulus ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 10980612 98.41 ns/op BenchmarkModAddBig-4 15502824 72.49 ns/op BenchmarkLargeModAddBig-4 8604442 172.4 ns/op BenchmarkMulBig-4 1233339 1139 ns/op BenchmarkModMulBig-4 14785972 89.11 ns/op BenchmarkLargeModMulBig-4 1309222 997.6 ns/op BenchmarkModBig-4 1253164 1051 ns/op BenchmarkLargeModBig-4 443877 2811 ns/op BenchmarkModInverseBig-4 5071822 249.5 ns/op BenchmarkLargeModInverseBig-4 1418097 872.6 ns/op BenchmarkExpBig-4 26084 47260 ns/op BenchmarkLargeExpBig-4 217 5517348 ns/op BenchmarkSetBytesBig-4 9893497 130.2 ns/op BenchmarkAddNat-4 9331024 108.3 ns/op BenchmarkModAddNat-4 11811805 109.0 ns/op BenchmarkLargeModAddNat-4 2920185 396.3 ns/op BenchmarkMulNat-4 436962 2307 ns/op BenchmarkModMulNat-4 2762408 431.8 ns/op BenchmarkLargeModMulNat-4 51115 23478 ns/op BenchmarkModNat-4 61317 19313 ns/op BenchmarkLargeModNat-4 65481 17649 ns/op BenchmarkModInverseNat-4 209734 5675 ns/op BenchmarkLargeModInverseNat-4 596 1925483 ns/op BenchmarkModInverseEvenNat-4 612 1885449 ns/op BenchmarkLargeModInverseEvenNat-4 578 2007402 ns/op BenchmarkExpNat-4 15661 73063 ns/op BenchmarkLargeExpNat-4 82 13861699 ns/op BenchmarkSetBytesNat-4 1525309 793.7 ns/op BenchmarkMontgomeryMul-4 205795 5364 ns/op PASS ok github.com/cronokirby/saferith 47.339s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 9696001 149.1 ns/op BenchmarkModAddBig-4 17568430 67.73 ns/op BenchmarkLargeModAddBig-4 7791817 158.7 ns/op BenchmarkMulBig-4 904347 1398 ns/op BenchmarkModMulBig-4 18952276 64.41 ns/op BenchmarkLargeModMulBig-4 820531 1504 ns/op BenchmarkModBig-4 1267386 934.4 ns/op BenchmarkLargeModBig-4 176822 6755 ns/op BenchmarkModInverseBig-4 4979390 253.8 ns/op BenchmarkLargeModInverseBig-4 1000000 1179 ns/op BenchmarkExpBig-4 25953 45030 ns/op BenchmarkLargeExpBig-4 121 9587821 ns/op BenchmarkSetBytesBig-4 9764602 115.5 ns/op BenchmarkAddNat-4 8318188 127.2 ns/op BenchmarkModAddNat-4 13145910 100.2 ns/op BenchmarkLargeModAddNat-4 2597168 461.3 ns/op BenchmarkMulNat-4 293556 4107 ns/op BenchmarkModMulNat-4 2761899 445.0 ns/op BenchmarkLargeModMulNat-4 47923 24010 ns/op BenchmarkModNat-4 59161 19442 ns/op BenchmarkLargeModNat-4 66775 18246 ns/op BenchmarkModInverseNat-4 211495 5541 ns/op BenchmarkLargeModInverseNat-4 430 2774137 ns/op BenchmarkModInverseEvenNat-4 434 2730413 ns/op BenchmarkLargeModInverseEvenNat-4 397 2952327 ns/op BenchmarkExpNat-4 17966 65837 ns/op BenchmarkLargeExpNat-4 81 14192840 ns/op BenchmarkSetBytesNat-4 1459062 780.0 ns/op BenchmarkMontgomeryMul-4 220545 5350 ns/op PASS ok github.com/cronokirby/saferith 45.059s ``` # 93b1d2efdf9e0a3933648a0223981b55e45eb97f (2021-05-21) Use same size for exponent. Reduce modulo m for both big and nat consistently. ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 7811989 144.0 ns/op BenchmarkModAddBig-4 17670838 83.17 ns/op BenchmarkLargeModAddBig-4 9088922 144.0 ns/op BenchmarkMulBig-4 1282808 992.2 ns/op BenchmarkModMulBig-4 14717402 71.02 ns/op BenchmarkLargeModMulBig-4 1205802 983.4 ns/op BenchmarkModBig-4 2277052 547.1 ns/op BenchmarkLargeModBig-4 10930282 108.7 ns/op BenchmarkModInverseBig-4 4725792 266.3 ns/op BenchmarkLargeModInverseBig-4 1194169 956.0 ns/op BenchmarkExpBig-4 25194 46598 ns/op BenchmarkLargeExpBig-4 211 5444524 ns/op BenchmarkSetBytesBig-4 10221433 119.3 ns/op BenchmarkAddNat-4 10622287 105.6 ns/op BenchmarkModAddNat-4 12189889 103.6 ns/op BenchmarkLargeModAddNat-4 3121003 394.2 ns/op BenchmarkMulNat-4 434802 2329 ns/op BenchmarkModMulNat-4 2614874 452.9 ns/op BenchmarkLargeModMulNat-4 46010 24497 ns/op BenchmarkModNat-4 113990 10428 ns/op BenchmarkLargeModNat-4 1442504 839.2 ns/op BenchmarkModInverseNat-4 180853 6016 ns/op BenchmarkLargeModInverseNat-4 616 1907289 ns/op BenchmarkModInverseEvenNat-4 604 1894636 ns/op BenchmarkLargeModInverseEvenNat-4 574 2025423 ns/op BenchmarkExpNat-4 14673 81342 ns/op BenchmarkLargeExpNat-4 84 13838482 ns/op BenchmarkSetBytesNat-4 1471936 783.0 ns/op BenchmarkMontgomeryMul-4 226705 5169 ns/op PASS ok github.com/cronokirby/saferith 46.498s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 9266389 119.9 ns/op BenchmarkModAddBig-4 19766632 63.94 ns/op BenchmarkLargeModAddBig-4 7352701 168.6 ns/op BenchmarkMulBig-4 883479 1422 ns/op BenchmarkModMulBig-4 20001904 61.96 ns/op BenchmarkLargeModMulBig-4 818521 1428 ns/op BenchmarkModBig-4 2367650 567.8 ns/op BenchmarkLargeModBig-4 13014212 96.34 ns/op BenchmarkModInverseBig-4 5175038 242.3 ns/op BenchmarkLargeModInverseBig-4 1000000 1601 ns/op BenchmarkExpBig-4 25219 45675 ns/op BenchmarkLargeExpBig-4 123 9655995 ns/op BenchmarkSetBytesBig-4 10951519 123.6 ns/op BenchmarkAddNat-4 11068844 123.9 ns/op BenchmarkModAddNat-4 13086139 111.9 ns/op BenchmarkLargeModAddNat-4 2538372 453.4 ns/op BenchmarkMulNat-4 294279 4188 ns/op ``` # 1c25451143e2ee7726b17eb553915ddcaa088537 (2021-05-21) 4 bit windows in exponentiation. ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 6364396 176.4 ns/op BenchmarkModAddBig-4 1000000 1122 ns/op BenchmarkLargeModAddBig-4 413269 2848 ns/op BenchmarkMulBig-4 444428 2692 ns/op BenchmarkModMulBig-4 268455 3870 ns/op BenchmarkLargeModMulBig-4 164492 6449 ns/op BenchmarkModBig-4 1000000 1166 ns/op BenchmarkLargeModBig-4 411985 3008 ns/op BenchmarkModInverseBig-4 796880 1460 ns/op BenchmarkLargeModInverseBig-4 292916 4561 ns/op BenchmarkExpBig-4 8677 139762 ns/op BenchmarkLargeExpBig-4 100 11449322 ns/op BenchmarkSetBytesBig-4 4278319 283.4 ns/op BenchmarkAddNat-4 3748434 277.5 ns/op BenchmarkModAddNat-4 9617431 106.9 ns/op BenchmarkLargeModAddNat-4 2870283 509.4 ns/op BenchmarkMulNat-4 136616 8357 ns/op BenchmarkModMulNat-4 2507910 468.9 ns/op BenchmarkLargeModMulNat-4 48259 25039 ns/op BenchmarkModNat-4 56263 21192 ns/op BenchmarkLargeModNat-4 59048 19888 ns/op BenchmarkModInverseNat-4 194116 6244 ns/op BenchmarkLargeModInverseNat-4 603 1912025 ns/op BenchmarkModInverseEvenNat-4 146 7982540 ns/op BenchmarkLargeModInverseEvenNat-4 152 7837469 ns/op BenchmarkExpNat-4 303786 3758 ns/op BenchmarkLargeExpNat-4 81 14387063 ns/op BenchmarkSetBytesNat-4 763826 1498 ns/op PASS ok github.com/cronokirby/saferith 40.496s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 4438161 225.5 ns/op BenchmarkModAddBig-4 1002176 1188 ns/op BenchmarkLargeModAddBig-4 178190 7491 ns/op BenchmarkMulBig-4 263458 5212 ns/op BenchmarkModMulBig-4 222494 6292 ns/op BenchmarkLargeModMulBig-4 99888 12643 ns/op BenchmarkModBig-4 1000000 1219 ns/op BenchmarkLargeModBig-4 178986 8129 ns/op BenchmarkModInverseBig-4 706246 1492 ns/op BenchmarkLargeModInverseBig-4 135918 9792 ns/op BenchmarkExpBig-4 8767 139145 ns/op BenchmarkLargeExpBig-4 61 19967055 ns/op BenchmarkSetBytesBig-4 6146796 210.7 ns/op BenchmarkAddNat-4 5866333 290.4 ns/op BenchmarkModAddNat-4 11618220 103.8 ns/op BenchmarkLargeModAddNat-4 2420194 471.7 ns/op BenchmarkMulNat-4 67681 15628 ns/op BenchmarkModMulNat-4 2711295 485.8 ns/op BenchmarkLargeModMulNat-4 48771 25754 ns/op BenchmarkModNat-4 57334 20387 ns/op BenchmarkLargeModNat-4 62467 20964 ns/op BenchmarkModInverseNat-4 204230 5711 ns/op BenchmarkLargeModInverseNat-4 447 2723910 ns/op BenchmarkModInverseEvenNat-4 100 11023572 ns/op BenchmarkLargeModInverseEvenNat-4 93 11149428 ns/op BenchmarkExpNat-4 349776 3514 ns/op BenchmarkLargeExpNat-4 84 14485539 ns/op BenchmarkSetBytesNat-4 772384 1563 ns/op PASS ok github.com/cronokirby/saferith 41.785s ``` # 3a6ccb03c434f719c04ccfcc2c9ce35013424b90 (2021-05-21) Don't reduce modulo m unnecessarily. ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 7046594 184.3 ns/op BenchmarkModAddBig-4 984085 1531 ns/op BenchmarkLargeModAddBig-4 363100 2769 ns/op BenchmarkMulBig-4 388844 2784 ns/op BenchmarkModMulBig-4 314078 3786 ns/op BenchmarkLargeModMulBig-4 226860 6252 ns/op BenchmarkModBig-4 1158972 986.0 ns/op BenchmarkLargeModBig-4 415216 2623 ns/op BenchmarkModInverseBig-4 832084 1403 ns/op BenchmarkLargeModInverseBig-4 289676 4534 ns/op BenchmarkExpBig-4 7690 140706 ns/op BenchmarkLargeExpBig-4 105 11499733 ns/op BenchmarkSetBytesBig-4 5206584 234.5 ns/op BenchmarkAddNat-4 5770597 183.7 ns/op BenchmarkModAddNat-4 11950340 103.5 ns/op BenchmarkLargeModAddNat-4 2921572 406.5 ns/op BenchmarkMulNat-4 137380 8128 ns/op BenchmarkModMulNat-4 2682211 450.0 ns/op BenchmarkLargeModMulNat-4 49249 25011 ns/op BenchmarkModNat-4 54798 20895 ns/op BenchmarkLargeModNat-4 63405 19135 ns/op BenchmarkModInverseNat-4 172198 5939 ns/op BenchmarkLargeModInverseNat-4 636 1880771 ns/op BenchmarkModInverseEvenNat-4 157 7773234 ns/op BenchmarkLargeModInverseEvenNat-4 159 7483443 ns/op BenchmarkExpNat-4 387169 3243 ns/op BenchmarkLargeExpNat-4 57 21061245 ns/op BenchmarkSetBytesNat-4 785596 1796 ns/op PASS ok github.com/cronokirby/saferith 43.576s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 5793404 214.7 ns/op BenchmarkModAddBig-4 917923 1171 ns/op BenchmarkLargeModAddBig-4 170028 7783 ns/op BenchmarkMulBig-4 197192 5113 ns/op BenchmarkModMulBig-4 219387 5886 ns/op BenchmarkLargeModMulBig-4 101758 11690 ns/op BenchmarkModBig-4 1164991 931.1 ns/op BenchmarkLargeModBig-4 154022 6841 ns/op BenchmarkModInverseBig-4 746452 1447 ns/op BenchmarkLargeModInverseBig-4 137312 8781 ns/op BenchmarkExpBig-4 8520 138144 ns/op BenchmarkLargeExpBig-4 60 19625440 ns/op BenchmarkSetBytesBig-4 6022134 200.6 ns/op BenchmarkAddNat-4 4837562 221.7 ns/op BenchmarkModAddNat-4 12762744 95.37 ns/op BenchmarkLargeModAddNat-4 2299520 455.9 ns/op BenchmarkMulNat-4 77550 15464 ns/op BenchmarkModMulNat-4 2675755 453.0 ns/op BenchmarkLargeModMulNat-4 44835 25523 ns/op BenchmarkModNat-4 56588 21005 ns/op BenchmarkLargeModNat-4 59696 20052 ns/op BenchmarkModInverseNat-4 206808 5787 ns/op BenchmarkLargeModInverseNat-4 427 2663714 ns/op BenchmarkModInverseEvenNat-4 93 11266719 ns/op BenchmarkLargeModInverseEvenNat-4 100 11274026 ns/op BenchmarkExpNat-4 436466 2709 ns/op BenchmarkLargeExpNat-4 54 22100573 ns/op BenchmarkSetBytesNat-4 775238 1528 ns/op PASS ok github.com/cronokirby/saferith 37.968s ``` # 036c5a428268cec0fd63887d75a4fe53c50a350b (2021-05-21) Benchmarks using 2048 bit modulus: ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 7074402 164.0 ns/op BenchmarkModAddBig-4 1025569 1105 ns/op BenchmarkLargeModAddBig-4 449142 2932 ns/op BenchmarkMulBig-4 429507 2975 ns/op BenchmarkModMulBig-4 228830 4452 ns/op BenchmarkLargeModMulBig-4 221468 5562 ns/op BenchmarkModBig-4 1210222 957.8 ns/op BenchmarkLargeModBig-4 444782 2693 ns/op BenchmarkModInverseBig-4 836184 1478 ns/op BenchmarkLargeModInverseBig-4 295900 4466 ns/op BenchmarkExpBig-4 8374 140578 ns/op BenchmarkLargeExpBig-4 98 11188430 ns/op BenchmarkSetBytesBig-4 5052667 232.9 ns/op BenchmarkAddNat-4 6121905 208.5 ns/op BenchmarkModAddNat-4 53536 21612 ns/op BenchmarkLargeModAddNat-4 61086 19195 ns/op BenchmarkMulNat-4 134721 8297 ns/op BenchmarkModMulNat-4 27606 43755 ns/op BenchmarkLargeModMulNat-4 19029 62319 ns/op BenchmarkModNat-4 57270 21360 ns/op BenchmarkLargeModNat-4 61735 19143 ns/op BenchmarkModInverseNat-4 43052 27536 ns/op BenchmarkLargeModInverseNat-4 610 1939277 ns/op BenchmarkModInverseEvenNat-4 151 7830059 ns/op BenchmarkLargeModInverseEvenNat-4 148 7759581 ns/op BenchmarkExpNat-4 5539 196737 ns/op BenchmarkLargeExpNat-4 27 42940694 ns/op BenchmarkSetBytesNat-4 738734 1484 ns/op PASS ok github.com/cronokirby/saferith 45.238s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 4995013 255.2 ns/op BenchmarkModAddBig-4 882956 1198 ns/op BenchmarkLargeModAddBig-4 175794 7042 ns/op BenchmarkMulBig-4 260041 4783 ns/op BenchmarkModMulBig-4 204600 5758 ns/op BenchmarkLargeModMulBig-4 101822 11598 ns/op BenchmarkModBig-4 1265289 960.7 ns/op BenchmarkLargeModBig-4 178405 6966 ns/op BenchmarkModInverseBig-4 755259 1506 ns/op BenchmarkLargeModInverseBig-4 132333 8900 ns/op BenchmarkExpBig-4 8341 141174 ns/op BenchmarkLargeExpBig-4 57 20066576 ns/op BenchmarkSetBytesBig-4 4863494 275.2 ns/op BenchmarkAddNat-4 4241242 335.4 ns/op BenchmarkModAddNat-4 49152 22361 ns/op BenchmarkLargeModAddNat-4 59218 21233 ns/op BenchmarkMulNat-4 76137 16409 ns/op BenchmarkModMulNat-4 26935 44656 ns/op BenchmarkLargeModMulNat-4 18156 66906 ns/op BenchmarkModNat-4 54295 21300 ns/op BenchmarkLargeModNat-4 60390 20335 ns/op BenchmarkModInverseNat-4 44236 26966 ns/op BenchmarkLargeModInverseNat-4 435 2753945 ns/op BenchmarkModInverseEvenNat-4 99 11446948 ns/op BenchmarkLargeModInverseEvenNat-4 103 11365429 ns/op BenchmarkExpNat-4 7198 165886 ns/op BenchmarkLargeExpNat-4 26 44880442 ns/op BenchmarkSetBytesNat-4 801037 1479 ns/op PASS ok github.com/cronokirby/saferith 42.140s ``` # 92874261776e63721c16f50b476383a6c2a1818b (2021-04-10) Various small improvements, namely free limb injection for even modular inversion. ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 7626368 174.2 ns/op BenchmarkModAddBig-4 860586 1749 ns/op BenchmarkLargeModAddBig-4 447950 2914 ns/op BenchmarkMulBig-4 435757 3613 ns/op BenchmarkModMulBig-4 176112 6350 ns/op BenchmarkLargeModMulBig-4 261256 5036 ns/op BenchmarkModBig-4 1238294 958.5 ns/op BenchmarkLargeModBig-4 511916 1963 ns/op BenchmarkModInverseBig-4 753504 1396 ns/op BenchmarkLargeModInverseBig-4 96507 12173 ns/op BenchmarkExpBig-4 7321 143193 ns/op BenchmarkLargeExpBig-4 46 25072028 ns/op BenchmarkSetBytesBig-4 4645417 229.7 ns/op BenchmarkAddNat-4 7302646 168.2 ns/op BenchmarkModAddNat-4 57741 21532 ns/op BenchmarkLargeModAddNat-4 116892 11512 ns/op BenchmarkMulNat-4 141757 8153 ns/op BenchmarkModMulNat-4 27888 44462 ns/op BenchmarkLargeModMulNat-4 16935 71999 ns/op BenchmarkModNat-4 52398 20922 ns/op BenchmarkLargeModNat-4 114846 10610 ns/op BenchmarkModInverseNat-4 45645 26265 ns/op BenchmarkLargeModInverseNat-4 228 4966108 ns/op BenchmarkModInverseEvenNat-4 159 7317795 ns/op BenchmarkLargeModInverseEvenNat-4 154 7610100 ns/op BenchmarkExpNat-4 5998 186802 ns/op BenchmarkLargeExpNat-4 10 102149566 ns/op BenchmarkSetBytesNat-4 746450 2190 ns/op PASS ok github.com/cronokirby/saferith 44.172s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 5950827 242.9 ns/op BenchmarkModAddBig-4 1000000 1464 ns/op BenchmarkLargeModAddBig-4 227496 5226 ns/op BenchmarkMulBig-4 244974 4665 ns/op BenchmarkModMulBig-4 204112 6142 ns/op BenchmarkLargeModMulBig-4 125530 10708 ns/op BenchmarkModBig-4 1205188 1031 ns/op BenchmarkLargeModBig-4 238192 6104 ns/op BenchmarkModInverseBig-4 768084 1936 ns/op BenchmarkLargeModInverseBig-4 53173 26462 ns/op BenchmarkExpBig-4 8479 138762 ns/op BenchmarkLargeExpBig-4 22 48573421 ns/op BenchmarkSetBytesBig-4 4857422 234.5 ns/op BenchmarkAddNat-4 4293802 306.1 ns/op BenchmarkModAddNat-4 57752 23735 ns/op BenchmarkLargeModAddNat-4 89205 13366 ns/op BenchmarkMulNat-4 75943 15527 ns/op BenchmarkModMulNat-4 28130 43486 ns/op BenchmarkLargeModMulNat-4 15502 76571 ns/op BenchmarkModNat-4 57678 19955 ns/op BenchmarkLargeModNat-4 105062 12205 ns/op BenchmarkModInverseNat-4 43506 26133 ns/op BenchmarkLargeModInverseNat-4 178 6637800 ns/op BenchmarkModInverseEvenNat-4 100 10713290 ns/op BenchmarkLargeModInverseEvenNat-4 99 11205415 ns/op BenchmarkExpNat-4 6625 163308 ns/op BenchmarkLargeExpNat-4 10 106379810 ns/op BenchmarkSetBytesNat-4 594848 1728 ns/op PASS ok github.com/cronokirby/saferith 40.055s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkLimbMask-4 71783198 16.69 ns/op BenchmarkAddBig-4 8091496 147.9 ns/op BenchmarkModAddBig-4 16965126 68.49 ns/op BenchmarkLargeModAddBig-4 7665646 165.2 ns/op BenchmarkMulBig-4 861013 1393 ns/op BenchmarkModMulBig-4 16647033 60.42 ns/op BenchmarkLargeModMulBig-4 861454 1437 ns/op BenchmarkModBig-4 1302430 915.4 ns/op BenchmarkLargeModBig-4 171907 6858 ns/op BenchmarkModInverseBig-4 5026785 237.5 ns/op BenchmarkLargeModInverseBig-4 864998 1179 ns/op BenchmarkExpBig-4 25644 45937 ns/op BenchmarkLargeExpBig-4 121 9966891 ns/op BenchmarkSetBytesBig-4 9426774 153.4 ns/op BenchmarkModSqrt3Mod4Big-4 30499 42571 ns/op BenchmarkAddNat-4 6116457 193.3 ns/op BenchmarkModAddNat-4 10242615 112.7 ns/op BenchmarkLargeModAddNat-4 2641714 475.4 ns/op BenchmarkModNegNat-4 13805811 76.69 ns/op BenchmarkLargeModNegNat-4 3279925 339.9 ns/op BenchmarkMulNat-4 291374 4116 ns/op BenchmarkModMulNat-4 2364541 506.0 ns/op BenchmarkLargeModMulNat-4 52971 22709 ns/op BenchmarkLargeModMulNatEven-4 51830 22783 ns/op BenchmarkModNat-4 61860 19404 ns/op BenchmarkLargeModNat-4 66633 18104 ns/op BenchmarkModInverseNat-4 1000000 1107 ns/op BenchmarkLargeModInverseNat-4 3007 387848 ns/op BenchmarkModInverseEvenNat-4 2968 395330 ns/op BenchmarkLargeModInverseEvenNat-4 2815 416384 ns/op BenchmarkExpNat-4 17750 66138 ns/op BenchmarkLargeExpNat-4 85 14231935 ns/op BenchmarkLargeExpNatEven-4 12 94408614 ns/op BenchmarkSetBytesNat-4 2674656 453.6 ns/op BenchmarkMontgomeryMul-4 225349 5226 ns/op BenchmarkModSqrt3Mod4Nat-4 26919 44633 ns/op BenchmarkModSqrt1Mod4Nat-4 7810 155648 ns/op BenchmarkDivNat-4 60327 19770 ns/op BenchmarkLargeDivNat-4 66328 18055 ns/op PASS ok github.com/cronokirby/saferith 57.483s ``` # f51bc7910016e703d1389250ee07a90eabcceac3 (2021-04-08) Implement a more streamlined modular inversion routine. ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 5783803 316.4 ns/op BenchmarkModAddBig-4 1000000 1103 ns/op BenchmarkLargeModAddBig-4 694466 1777 ns/op BenchmarkMulBig-4 438339 2668 ns/op BenchmarkModMulBig-4 276226 3624 ns/op BenchmarkLargeModMulBig-4 257265 4327 ns/op BenchmarkModBig-4 1267786 959.5 ns/op BenchmarkLargeModBig-4 670425 1718 ns/op BenchmarkModInverseBig-4 779264 1466 ns/op BenchmarkLargeModInverseBig-4 103576 12414 ns/op BenchmarkExpBig-4 8476 139698 ns/op BenchmarkLargeExpBig-4 42 26590436 ns/op BenchmarkSetBytesBig-4 4994770 233.0 ns/op BenchmarkAddNat-4 5213209 193.6 ns/op BenchmarkModAddNat-4 57008 21120 ns/op BenchmarkLargeModAddNat-4 117346 10567 ns/op BenchmarkMulNat-4 149577 8031 ns/op BenchmarkModMulNat-4 54616 21473 ns/op BenchmarkLargeModMulNat-4 19770 61052 ns/op BenchmarkModNat-4 56593 21178 ns/op BenchmarkLargeModNat-4 115038 10264 ns/op BenchmarkModInverseNat-4 43377 27282 ns/op BenchmarkLargeModInverseNat-4 232 5132395 ns/op BenchmarkModInverseEvenNat-4 152 7839481 ns/op BenchmarkLargeModInverseEvenNat-4 130 9155569 ns/op BenchmarkExpNat-4 6046 195910 ns/op BenchmarkLargeExpNat-4 10 107854567 ns/op BenchmarkSetBytesNat-4 667095 1520 ns/op PASS ok github.com/cronokirby/saferith 39.583s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 5109685 229.7 ns/op BenchmarkModAddBig-4 1000000 1172 ns/op BenchmarkLargeModAddBig-4 212607 5168 ns/op BenchmarkMulBig-4 243620 5084 ns/op BenchmarkModMulBig-4 205398 5749 ns/op BenchmarkLargeModMulBig-4 121932 9739 ns/op BenchmarkModBig-4 1000000 1031 ns/op BenchmarkLargeModBig-4 236596 5281 ns/op BenchmarkModInverseBig-4 683730 1575 ns/op BenchmarkLargeModInverseBig-4 54470 23258 ns/op BenchmarkExpBig-4 8566 139349 ns/op BenchmarkLargeExpBig-4 22 50228266 ns/op BenchmarkSetBytesBig-4 5612074 268.1 ns/op BenchmarkAddNat-4 4814877 233.6 ns/op BenchmarkModAddNat-4 55846 21443 ns/op BenchmarkLargeModAddNat-4 108088 11674 ns/op BenchmarkMulNat-4 74908 16143 ns/op BenchmarkModMulNat-4 54472 21836 ns/op BenchmarkLargeModMulNat-4 18576 64639 ns/op BenchmarkModNat-4 55894 20642 ns/op BenchmarkLargeModNat-4 109884 10816 ns/op BenchmarkModInverseNat-4 44550 26488 ns/op BenchmarkLargeModInverseNat-4 160 7306197 ns/op BenchmarkModInverseEvenNat-4 100 11287731 ns/op BenchmarkLargeModInverseEvenNat-4 90 13300550 ns/op BenchmarkExpNat-4 6496 164679 ns/op BenchmarkLargeExpNat-4 10 110866574 ns/op BenchmarkSetBytesNat-4 764706 1486 ns/op PASS ok github.com/cronokirby/saferith 40.311s ``` # 6768b30cbd9284b75aa387717f286b1d81edcf4f (2021-03-31) Added even modular inversion ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 8544007 187.3 ns/op BenchmarkModAddBig-4 1000000 1164 ns/op BenchmarkLargeModAddBig-4 660194 1883 ns/op BenchmarkMulBig-4 429873 2810 ns/op BenchmarkModMulBig-4 291237 3534 ns/op BenchmarkLargeModMulBig-4 273622 4221 ns/op BenchmarkModBig-4 1233248 957.5 ns/op BenchmarkLargeModBig-4 668458 1706 ns/op BenchmarkModInverseBig-4 881304 1400 ns/op BenchmarkLargeModInverseBig-4 101865 11527 ns/op BenchmarkExpBig-4 8653 136691 ns/op BenchmarkLargeExpBig-4 45 25358294 ns/op BenchmarkSetBytesBig-4 6028752 204.8 ns/op BenchmarkAddNat-4 6311770 212.2 ns/op BenchmarkModAddNat-4 55579 21493 ns/op BenchmarkLargeModAddNat-4 116384 10377 ns/op BenchmarkMulNat-4 147770 7869 ns/op BenchmarkModMulNat-4 56871 21316 ns/op BenchmarkLargeModMulNat-4 20094 59612 ns/op BenchmarkModNat-4 58078 20815 ns/op BenchmarkLargeModNat-4 122284 10707 ns/op BenchmarkModInverseNat-4 44977 26167 ns/op BenchmarkLargeModInverseNat-4 278 4266387 ns/op BenchmarkModInverseEvenNat-4 192 6378308 ns/op BenchmarkLargeModInverseEvenNat-4 168 7169524 ns/op BenchmarkExpNat-4 6134 197088 ns/op BenchmarkLargeExpNat-4 10 108836798 ns/op BenchmarkSetBytesNat-4 785342 1525 ns/op PASS ok github.com/cronokirby/saferith 42.185s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 6626785 206.2 ns/op BenchmarkModAddBig-4 1000000 1168 ns/op BenchmarkLargeModAddBig-4 241417 5376 ns/op BenchmarkMulBig-4 269402 4368 ns/op BenchmarkModMulBig-4 210955 5503 ns/op BenchmarkLargeModMulBig-4 124407 9295 ns/op BenchmarkModBig-4 1315934 1054 ns/op BenchmarkLargeModBig-4 244891 4897 ns/op BenchmarkModInverseBig-4 887767 1470 ns/op BenchmarkLargeModInverseBig-4 56559 19833 ns/op BenchmarkExpBig-4 8506 136205 ns/op BenchmarkLargeExpBig-4 22 49501706 ns/op BenchmarkSetBytesBig-4 6449314 188.5 ns/op BenchmarkAddNat-4 5942666 206.2 ns/op BenchmarkModAddNat-4 58060 20800 ns/op BenchmarkLargeModAddNat-4 113953 11640 ns/op BenchmarkMulNat-4 79896 14701 ns/op BenchmarkModMulNat-4 57115 20863 ns/op BenchmarkLargeModMulNat-4 19510 62708 ns/op BenchmarkModNat-4 58936 20227 ns/op BenchmarkLargeModNat-4 115466 10372 ns/op BenchmarkModInverseNat-4 48084 25082 ns/op BenchmarkLargeModInverseNat-4 195 5999599 ns/op BenchmarkModInverseEvenNat-4 122 9402313 ns/op BenchmarkLargeModInverseEvenNat-4 100 10791384 ns/op BenchmarkExpNat-4 7232 158975 ns/op BenchmarkLargeExpNat-4 10 108437946 ns/op BenchmarkSetBytesNat-4 773128 1483 ns/op PASS ok github.com/cronokirby/saferith 41.718s ``` # b89445f7bada17baf2db88b52f2a39a8a168ceea (2021-03-31) Various minor optimizations around aliasing when multiplying ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 7508224 162.0 ns/op BenchmarkModAddBig-4 1000000 1112 ns/op BenchmarkLargeModAddBig-4 605701 2081 ns/op BenchmarkMulBig-4 448010 3001 ns/op BenchmarkModMulBig-4 293226 3936 ns/op BenchmarkLargeModMulBig-4 277750 4489 ns/op BenchmarkModBig-4 1279603 920.3 ns/op BenchmarkLargeModBig-4 719839 1712 ns/op BenchmarkModInverseBig-4 867788 1480 ns/op BenchmarkLargeModInverseBig-4 105806 12099 ns/op BenchmarkExpBig-4 8371 137640 ns/op BenchmarkLargeExpBig-4 43 25984116 ns/op BenchmarkSetBytesBig-4 6138981 200.8 ns/op BenchmarkAddNat-4 6275767 168.9 ns/op BenchmarkModAddNat-4 57129 20985 ns/op BenchmarkLargeModAddNat-4 115216 10546 ns/op BenchmarkMulNat-4 148363 8168 ns/op BenchmarkModMulNat-4 54138 21883 ns/op BenchmarkLargeModMulNat-4 19264 63472 ns/op BenchmarkModNat-4 54705 20608 ns/op BenchmarkLargeModNat-4 119592 10277 ns/op BenchmarkModInverseNat-4 41794 26500 ns/op BenchmarkLargeModInverseNat-4 276 4266428 ns/op BenchmarkExpNat-4 6061 192362 ns/op BenchmarkLargeExpNat-4 10 106890226 ns/op BenchmarkSetBytesNat-4 690368 1506 ns/op PASS ok github.com/cronokirby/saferith 35.207s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 5036264 221.7 ns/op BenchmarkModAddBig-4 1000000 1173 ns/op BenchmarkLargeModAddBig-4 235732 5102 ns/op BenchmarkMulBig-4 256430 4607 ns/op BenchmarkModMulBig-4 214002 5646 ns/op BenchmarkLargeModMulBig-4 126057 9631 ns/op BenchmarkModBig-4 1261798 948.9 ns/op BenchmarkLargeModBig-4 241308 5021 ns/op BenchmarkModInverseBig-4 883976 1482 ns/op BenchmarkLargeModInverseBig-4 58646 20443 ns/op BenchmarkExpBig-4 8444 140072 ns/op BenchmarkLargeExpBig-4 21 50785149 ns/op BenchmarkSetBytesBig-4 4901631 264.0 ns/op BenchmarkAddNat-4 5093500 259.5 ns/op BenchmarkModAddNat-4 53115 21281 ns/op BenchmarkLargeModAddNat-4 108441 11162 ns/op BenchmarkMulNat-4 77780 15389 ns/op BenchmarkModMulNat-4 54999 21898 ns/op BenchmarkLargeModMulNat-4 18313 64847 ns/op BenchmarkModNat-4 56948 20848 ns/op BenchmarkLargeModNat-4 110329 10730 ns/op BenchmarkModInverseNat-4 45327 26150 ns/op BenchmarkLargeModInverseNat-4 181 6601408 ns/op BenchmarkExpNat-4 7132 166127 ns/op BenchmarkLargeExpNat-4 10 111626393 ns/op BenchmarkSetBytesNat-4 773126 1467 ns/op PASS ok github.com/cronokirby/saferith 36.167s ``` # c0e31da784cec1419655a732a96f03c42bc3d97f (2021-03-31) Implement basic Montgomery multiplication ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 7037252 225.2 ns/op BenchmarkModAddBig-4 1000000 1158 ns/op BenchmarkLargeModAddBig-4 686259 1750 ns/op BenchmarkMulBig-4 448657 2704 ns/op BenchmarkModMulBig-4 339421 3771 ns/op BenchmarkLargeModMulBig-4 258805 4402 ns/op BenchmarkModBig-4 1264462 944.2 ns/op BenchmarkLargeModBig-4 685807 1785 ns/op BenchmarkModInverseBig-4 843180 1446 ns/op BenchmarkLargeModInverseBig-4 98060 11984 ns/op BenchmarkExpBig-4 8458 138325 ns/op BenchmarkLargeExpBig-4 40 26431885 ns/op BenchmarkSetBytesBig-4 4958301 270.1 ns/op BenchmarkAddNat-4 5462413 223.7 ns/op BenchmarkModAddNat-4 55483 21294 ns/op BenchmarkLargeModAddNat-4 113378 10548 ns/op BenchmarkMulNat-4 149622 7999 ns/op BenchmarkModMulNat-4 28458 41601 ns/op BenchmarkLargeModMulNat-4 16954 70324 ns/op BenchmarkModNat-4 57303 20695 ns/op BenchmarkLargeModNat-4 116892 10228 ns/op BenchmarkModInverseNat-4 45306 26319 ns/op BenchmarkLargeModInverseNat-4 277 4395341 ns/op BenchmarkExpNat-4 4910 238846 ns/op BenchmarkLargeExpNat-4 10 111907965 ns/op BenchmarkSetBytesNat-4 754429 1452 ns/op PASS ok github.com/cronokirby/saferith 36.098s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 5705341 217.9 ns/op BenchmarkModAddBig-4 982310 1138 ns/op BenchmarkLargeModAddBig-4 218103 5170 ns/op BenchmarkMulBig-4 239662 4615 ns/op BenchmarkModMulBig-4 212642 5477 ns/op BenchmarkLargeModMulBig-4 128007 9509 ns/op BenchmarkModBig-4 1297141 938.8 ns/op BenchmarkLargeModBig-4 239312 5124 ns/op BenchmarkModInverseBig-4 826834 1389 ns/op BenchmarkLargeModInverseBig-4 60763 20150 ns/op BenchmarkExpBig-4 8456 138709 ns/op BenchmarkLargeExpBig-4 22 49983921 ns/op BenchmarkSetBytesBig-4 5739404 220.6 ns/op BenchmarkAddNat-4 4374946 257.3 ns/op BenchmarkModAddNat-4 56361 21243 ns/op BenchmarkLargeModAddNat-4 110377 10957 ns/op BenchmarkMulNat-4 78637 15193 ns/op BenchmarkModMulNat-4 28364 41493 ns/op BenchmarkLargeModMulNat-4 16376 73648 ns/op BenchmarkModNat-4 56200 21209 ns/op BenchmarkLargeModNat-4 111158 10637 ns/op BenchmarkModInverseNat-4 45657 25961 ns/op BenchmarkLargeModInverseNat-4 183 6448992 ns/op BenchmarkExpNat-4 5218 217985 ns/op BenchmarkLargeExpNat-4 9 112084020 ns/op BenchmarkSetBytesNat-4 746052 1433 ns/op PASS ok github.com/cronokirby/saferith 35.794s ``` # a24e618bccbc040c7121394c889e8bdd0dca2d01 (2021-03-29) Implement free limb injection ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 6953644 212.4 ns/op BenchmarkModAddBig-4 1100115 1040 ns/op BenchmarkLargeModAddBig-4 585422 1841 ns/op BenchmarkMulBig-4 429370 3033 ns/op BenchmarkModMulBig-4 339429 3708 ns/op BenchmarkLargeModMulBig-4 284680 4709 ns/op BenchmarkModBig-4 1257049 936.7 ns/op BenchmarkLargeModBig-4 650443 1853 ns/op BenchmarkModInverseBig-4 823682 1468 ns/op BenchmarkLargeModInverseBig-4 98150 12050 ns/op BenchmarkExpBig-4 8720 135014 ns/op BenchmarkLargeExpBig-4 43 25975861 ns/op BenchmarkSetBytesBig-4 5997328 225.9 ns/op BenchmarkAddNat-4 5293556 211.6 ns/op BenchmarkModAddNat-4 55104 21435 ns/op BenchmarkLargeModAddNat-4 108148 10385 ns/op BenchmarkMulNat-4 140707 8281 ns/op BenchmarkModMulNat-4 24109 51049 ns/op BenchmarkLargeModMulNat-4 17731 63715 ns/op BenchmarkModNat-4 56545 20935 ns/op BenchmarkLargeModNat-4 114956 10484 ns/op BenchmarkModInverseNat-4 44448 26838 ns/op BenchmarkLargeModInverseNat-4 271 4304645 ns/op BenchmarkExpNat-4 196 5948716 ns/op BenchmarkLargeExpNat-4 3 359713739 ns/op BenchmarkSetBytesNat-4 836378 1506 ns/op PASS ok github.com/cronokirby/saferith 44.859s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 5669407 252.4 ns/op BenchmarkModAddBig-4 946286 1171 ns/op BenchmarkLargeModAddBig-4 215196 5212 ns/op BenchmarkMulBig-4 222129 4557 ns/op BenchmarkModMulBig-4 215752 5561 ns/op BenchmarkLargeModMulBig-4 125840 9571 ns/op BenchmarkModBig-4 1275968 1004 ns/op BenchmarkLargeModBig-4 234193 5070 ns/op BenchmarkModInverseBig-4 877096 1414 ns/op BenchmarkLargeModInverseBig-4 53406 23734 ns/op BenchmarkExpBig-4 8257 141085 ns/op BenchmarkLargeExpBig-4 21 51494745 ns/op BenchmarkSetBytesBig-4 4290268 292.7 ns/op BenchmarkAddNat-4 5286901 272.6 ns/op BenchmarkModAddNat-4 56404 21761 ns/op BenchmarkLargeModAddNat-4 109947 11317 ns/op BenchmarkMulNat-4 77644 15774 ns/op BenchmarkModMulNat-4 21037 56950 ns/op BenchmarkLargeModMulNat-4 16245 73966 ns/op BenchmarkModNat-4 57388 20625 ns/op BenchmarkLargeModNat-4 111604 10840 ns/op BenchmarkModInverseNat-4 45824 25909 ns/op BenchmarkLargeModInverseNat-4 181 6707925 ns/op BenchmarkExpNat-4 192 6192261 ns/op BenchmarkLargeExpNat-4 3 415365259 ns/op BenchmarkSetBytesNat-4 613538 1669 ns/op PASS ok github.com/cronokirby/saferith 39.011s ``` # 020e34e3436d885500318e1777e7364a1c3c393d (2021-03-27) Limb by limb reduction ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 7193640 174.0 ns/op BenchmarkModAddBig-4 1129430 1049 ns/op BenchmarkLargeModAddBig-4 637372 1735 ns/op BenchmarkMulBig-4 470226 2635 ns/op BenchmarkModMulBig-4 313644 3456 ns/op BenchmarkLargeModMulBig-4 257732 4091 ns/op BenchmarkModBig-4 1342916 891.9 ns/op BenchmarkLargeModBig-4 669978 1662 ns/op BenchmarkModInverseBig-4 772887 1358 ns/op BenchmarkLargeModInverseBig-4 102363 11236 ns/op BenchmarkExpBig-4 8823 136720 ns/op BenchmarkLargeExpBig-4 40 25947153 ns/op BenchmarkSetBytesBig-4 5259864 209.5 ns/op BenchmarkAddNat-4 6865567 177.1 ns/op BenchmarkModAddNat-4 56781 21101 ns/op BenchmarkLargeModAddNat-4 26634 44499 ns/op BenchmarkMulNat-4 147716 7994 ns/op BenchmarkModMulNat-4 23832 49599 ns/op BenchmarkLargeModMulNat-4 12249 96907 ns/op BenchmarkModNat-4 56899 20651 ns/op BenchmarkLargeModNat-4 26866 45090 ns/op BenchmarkModInverseNat-4 44586 26605 ns/op BenchmarkLargeModInverseNat-4 265 4357764 ns/op BenchmarkExpNat-4 200 5926589 ns/op BenchmarkLargeExpNat-4 2 622324074 ns/op BenchmarkSetBytesNat-4 786177 1445 ns/op PASS ok github.com/cronokirby/saferith 40.069s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 5884750 222.7 ns/op BenchmarkModAddBig-4 888044 1178 ns/op BenchmarkLargeModAddBig-4 218853 5074 ns/op BenchmarkMulBig-4 256408 4895 ns/op BenchmarkModMulBig-4 202692 5761 ns/op BenchmarkLargeModMulBig-4 111362 9603 ns/op BenchmarkModBig-4 1289430 915.5 ns/op BenchmarkLargeModBig-4 238866 4949 ns/op BenchmarkModInverseBig-4 892677 1415 ns/op BenchmarkLargeModInverseBig-4 60750 20164 ns/op BenchmarkExpBig-4 8632 140500 ns/op BenchmarkLargeExpBig-4 22 51087158 ns/op BenchmarkSetBytesBig-4 4668199 243.2 ns/op BenchmarkAddNat-4 4858521 255.4 ns/op BenchmarkModAddNat-4 56320 21361 ns/op BenchmarkLargeModAddNat-4 24938 48347 ns/op BenchmarkMulNat-4 78342 15215 ns/op BenchmarkModMulNat-4 20944 55907 ns/op BenchmarkLargeModMulNat-4 10000 111708 ns/op BenchmarkModNat-4 56568 20852 ns/op BenchmarkLargeModNat-4 23888 48265 ns/op BenchmarkModInverseNat-4 44446 26053 ns/op BenchmarkLargeModInverseNat-4 177 6559094 ns/op BenchmarkExpNat-4 202 5873289 ns/op BenchmarkLargeExpNat-4 2 717422832 ns/op BenchmarkSetBytesNat-4 686104 1476 ns/op PASS ok github.com/cronokirby/saferith 39.768s ``` # b66ac649d9ca1b1b394a7706cea6495b107dcb9c (2021-03-25) ``` [saferith] → go test -bench=. goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 7233490 168.4 ns/op BenchmarkModAddBig-4 1000000 1051 ns/op BenchmarkLargeModAddBig-4 599292 1772 ns/op BenchmarkMulBig-4 458648 2567 ns/op BenchmarkModMulBig-4 324234 3573 ns/op BenchmarkLargeModMulBig-4 285141 4197 ns/op BenchmarkModBig-4 1239144 1043 ns/op BenchmarkLargeModBig-4 711975 1732 ns/op BenchmarkModInverseBig-4 778612 1438 ns/op BenchmarkLargeModInverseBig-4 91058 13763 ns/op BenchmarkExpBig-4 7298 139626 ns/op BenchmarkLargeExpBig-4 42 25930457 ns/op BenchmarkSetBytesBig-4 4770934 240.3 ns/op BenchmarkAddNat-4 6586465 166.4 ns/op BenchmarkModAddNat-4 27013 44123 ns/op BenchmarkLargeModAddNat-4 2725 438992 ns/op BenchmarkMulNat-4 140364 7858 ns/op BenchmarkModMulNat-4 12786 94944 ns/op BenchmarkLargeModMulNat-4 1135 893462 ns/op BenchmarkModNat-4 27309 41986 ns/op BenchmarkLargeModNat-4 2733 438626 ns/op BenchmarkModInverseNat-4 25102 48908 ns/op BenchmarkLargeModInverseNat-4 237 4906628 ns/op BenchmarkExpNat-4 98 11851448 ns/op BenchmarkLargeExpNat-4 1 5541042938 ns/op BenchmarkSetBytesNat-4 788446 1696 ns/op PASS ok github.com/cronokirby/saferith 41.362s [saferith] → go test -bench=. -tags math_big_pure_go goos: linux goarch: amd64 pkg: github.com/cronokirby/saferith cpu: Intel(R) Core(TM) i5-4690K CPU @ 3.50GHz BenchmarkAddBig-4 6008720 206.7 ns/op BenchmarkModAddBig-4 1000000 1141 ns/op BenchmarkLargeModAddBig-4 226074 4899 ns/op BenchmarkMulBig-4 261199 5061 ns/op BenchmarkModMulBig-4 159643 6593 ns/op BenchmarkLargeModMulBig-4 119554 9764 ns/op BenchmarkModBig-4 1298679 1030 ns/op BenchmarkLargeModBig-4 226292 5268 ns/op BenchmarkModInverseBig-4 810531 1395 ns/op BenchmarkLargeModInverseBig-4 58944 21170 ns/op BenchmarkExpBig-4 8494 136181 ns/op BenchmarkLargeExpBig-4 22 50598450 ns/op BenchmarkSetBytesBig-4 5431112 199.1 ns/op BenchmarkAddNat-4 6781236 177.2 ns/op BenchmarkModAddNat-4 33860 36012 ns/op BenchmarkLargeModAddNat-4 1794 664556 ns/op BenchmarkMulNat-4 79315 14716 ns/op BenchmarkModMulNat-4 14188 83209 ns/op BenchmarkLargeModMulNat-4 872 1320999 ns/op BenchmarkModNat-4 34676 35121 ns/op BenchmarkLargeModNat-4 1726 674276 ns/op BenchmarkModInverseNat-4 29527 40186 ns/op BenchmarkLargeModInverseNat-4 164 7174169 ns/op BenchmarkExpNat-4 120 9723271 ns/op BenchmarkLargeExpNat-4 1 8705644301 ns/op BenchmarkSetBytesNat-4 794876 1570 ns/op PASS ok github.com/cronokirby/saferith 46.227s ```saferith-0.33.0/bits.go000066400000000000000000000021771422457503400147070ustar00rootroot00000000000000package saferith import "math/bits" // _WShift can be used to multiply or divide by _W // // This assumes that _W = 64, 32 const _WShift = 5 + (_W >> 6) const _WMask = _W - 1 // limbCount returns the number of limbs needed to accomodate bits. func limbCount(bits int) int { return (bits + _W - 1) >> _WShift } // limbMask returns the mask used for the final limb of a Nat with this number of bits. // // Note that this function will leak the number of bits. For our library, this isn't // a problem, since we always call this function with announced sizes. func limbMask(bits int) Word { remaining := bits & _WMask allOnes := ^Word(0) if remaining == 0 { return allOnes } return ^(allOnes << remaining) } // leadingZeros calculates the number of leading zero bits in x. // // This shouldn't leak any information about the value of x. func leadingZeros(x Word) int { // Go will replace this call with the appropriate instruction on amd64 and arm64. // // Unfortunately, the fallback function is not constant-time, but the platforms // for which there is no fallback aren't all that common anyways. return bits.LeadingZeros(uint(x)) } saferith-0.33.0/bits_test.go000066400000000000000000000002701422457503400157360ustar00rootroot00000000000000package saferith import ( "math/rand" "testing" ) var result Word func BenchmarkLimbMask(b *testing.B) { for i := 0; i < b.N; i++ { x := rand.Int() result = limbMask(x) } } saferith-0.33.0/go.mod000066400000000000000000000000571422457503400145200ustar00rootroot00000000000000module github.com/cronokirby/saferith go 1.15 saferith-0.33.0/int.go000066400000000000000000000210341422457503400145310ustar00rootroot00000000000000package saferith import ( "errors" "math/big" "math/bits" ) // Int represents a signed integer of arbitrary size. // // Similarly to Nat, each Int comes along with an announced size, representing // the number of bits need to represent its absolute value. This can be // larger than its true size, the number of bits actually needed. type Int struct { // This number is represented by (-1)^sign * abs, essentially // When 1, this is a negative number, when 0 a positive number. // // There's a bit of redundancy to note, because -0 and +0 represent the same // number. We need to be careful around this edge case. sign Choice // The absolute value. // // Not using a point is important, that way the zero value for Int is actually zero. abs Nat } // SetBytes interprets a number in big-endian form, stores it in z, and returns z. // // This number will be positive. func (z *Int) SetBytes(data []byte) *Int { z.sign = 0 z.abs.SetBytes(data) return z } // MarshalBinary implements encoding.BinaryMarshaler. // The retrned byte slice is always of length 1 + len(i.Abs().Bytes()), // where the first byte encodes the sign. func (i *Int) MarshalBinary() ([]byte, error) { length := 1 + (i.abs.announced+7)/8 out := make([]byte, length) out[0] = byte(i.sign) i.abs.FillBytes(out[1:]) return out, nil } // UnmarshalBinary implements encoding.BinaryUnmarshaler. // Returns an error when the length of data is 0, // since we always expect the first byte to encode the sign. func (i *Int) UnmarshalBinary(data []byte) error { if len(data) == 0 { return errors.New("data must contain a sign byte") } i.abs.SetBytes(data[1:]) i.sign = Choice(data[0] & 1) return nil } // SetUint64 sets the value of z to x. // // This number will be positive. func (z *Int) SetUint64(x uint64) *Int { z.sign = 0 z.abs.SetUint64(x) return z } // SetNat will set the absolute value of z to x, and the sign to zero, returning z. func (z *Int) SetNat(x *Nat) *Int { z.sign = 0 z.abs.SetNat(x) return z } // Clone returns a copy of this Int. // // The copy can safely be mutated without affecting the original value. func (z *Int) Clone() *Int { out := new(Int) out.sign = z.sign out.abs.SetNat(&z.abs) return out } // SetBig will set the value of this number to the value of a big.Int, including sign. // // The size dicates the number of bits to use for the absolute value. This is important, // in order to include additional padding that the big.Int might have stripped off. // // Since big.Int stores its sign as a boolean, it's likely that this conversion // will leak the value of the sign. func (z *Int) SetBig(x *big.Int, size int) *Int { // x.Sign() = {-1, 0, 1}, // 1 - x.Sign() = {2, 1, 0}, // so this comparison correctly sniffs out negative numbers z.sign = ctGt(Word(1-x.Sign()), 1) z.abs.SetBig(x, size) return z } // Big will convert this number into a big.Int, including sign. // // This will leak the true size of this number, and its sign, because of the leakiness // of big.Int, so caution should be exercises when using this function. func (z *Int) Big() *big.Int { abs := z.abs.Big() if z.sign == 1 { abs.Neg(abs) } return abs } // Resize adjust the announced size of this number, possibly truncating the absolute value. func (z *Int) Resize(cap int) *Int { z.abs.Resize(cap) return z } // String formats this number as a signed hex string. // // This isn't a format that Int knows how to parse. This function exists mainly // to help debugging, and whatnot. func (z *Int) String() string { sign := ctIfElse(z.sign, Word('-'), Word('+')) return string(rune(sign)) + z.abs.String() } // Eq checks if this Int has the same value as another Int. // // Note that negative zero and positive zero are the same number. func (z *Int) Eq(x *Int) Choice { zero := z.abs.EqZero() // If this is zero, then any number as the same sign, // otherwise, check that the signs aren't different sameSign := zero | (1 ^ z.sign ^ x.sign) return sameSign & z.abs.Eq(&x.abs) } // Abs returns the absolute value of this Int. func (z *Int) Abs() *Nat { return new(Nat).SetNat(&z.abs) } // IsNegative checks if this value is negative func (z *Int) IsNegative() Choice { return z.sign } // AnnouncedLen returns the announced size of this int's absolute value. // // See Nat.AnnouncedLen func (z *Int) AnnouncedLen() int { return z.abs.AnnouncedLen() } // TrueLen returns the actual number of bits need to represent this int's absolute value. // // This leaks this value. // // See Nat.TrueLen func (z *Int) TrueLen() int { return z.abs.TrueLen() } // Neg calculates z <- -x. // // The result has the same announced size. func (z *Int) Neg(doit Choice) *Int { z.sign ^= doit return z } func (z *Int) SetInt(x *Int) *Int { z.sign = x.sign z.abs.SetNat(&x.abs) return z } // Mul calculates z <- x * y, returning z. // // This will truncate the resulting absolute value, based on the bit capacity passed in. // // If cap < 0, then capacity is x.AnnouncedLen() + y.AnnouncedLen(). func (z *Int) Mul(x *Int, y *Int, cap int) *Int { // (-1)^sx * ax * (-1)^sy * ay = (-1)^(sx + sy) * ax * ay z.sign = x.sign ^ y.sign z.abs.Mul(&x.abs, &y.abs, cap) return z } // Mod calculates z mod M, handling negatives correctly. // // As indicated by the types, this function will return a number in the range 0..m-1. func (z *Int) Mod(m *Modulus) *Nat { out := new(Nat).Mod(&z.abs, m) negated := new(Nat).ModNeg(out, m) out.CondAssign(z.sign, negated) return out } // SetModSymmetric takes a number x mod M, and returns a signed number centered around 0. // // This effectively takes numbers in the range: // {0, .., m - 1} // And returns numbers in the range: // {-(m - 1)/2, ..., 0, ..., (m - 1)/2} // In the case that m is even, there will simply be an extra negative number. func (z *Int) SetModSymmetric(x *Nat, m *Modulus) *Int { z.abs.Mod(x, m) negated := new(Nat).ModNeg(&z.abs, m) gt, _, _ := negated.Cmp(&z.abs) negatedLeq := 1 ^ gt // Always use the smaller value z.abs.CondAssign(negatedLeq, negated) // A negative modular number, by definition, will have it's negation <= itself z.sign = negatedLeq return z } // CheckInRange checks whether or not this Int is in the range for SetModSymmetric. func (z *Int) CheckInRange(m *Modulus) Choice { // First check that the absolute value makes sense _, _, absOk := z.abs.CmpMod(m) negated := new(Nat).ModNeg(&z.abs, m) _, _, lt := negated.Cmp(&z.abs) // If the negated value is strictly smaller, then we have a number out of range signOk := 1 ^ lt return absOk & signOk } // ExpI calculates z <- x^i mod m. // // This works with negative exponents, but requires x to be invertible mod m, of course. func (z *Nat) ExpI(x *Nat, i *Int, m *Modulus) *Nat { z.Exp(x, &i.abs, m) inverted := new(Nat).ModInverse(z, m) z.CondAssign(i.sign, inverted) return z } // conditionally negate a slice of words based on two's complement func negateTwos(doit Choice, z []Word) { if len(z) <= 0 { return } sign := Word(doit) zi, carry := bits.Add(uint(-sign^z[0]), uint(sign), 0) z[0] = Word(zi) for i := 1; i < len(z); i++ { zi, carry = bits.Add(uint(-sign^z[i]), 0, carry) z[i] = Word(zi) } } // convert a slice to two's complement, using a sign, and writing the result to out func toTwos(sign Choice, abs []Word, out []Word) { copy(out, abs) negateTwos(sign, out) } // convert a slice from two's complement, writing it in place, and producing a sign func fromTwos(bits int, mut []Word) Choice { if len(mut) <= 0 { return 0 } sign := Choice(mut[len(mut)-1] >> (_W - 1)) negateTwos(sign, mut) return sign } // Add calculates z <- x + y. // // The cap determines the number of bits to use for the absolute value of the result. // // If cap < 0, cap gets set to max(x.AnnouncedLen(), y.AnnouncedLen()) + 1 func (z *Int) Add(x *Int, y *Int, cap int) *Int { // Rough idea, convert x and y to two's complement representation, add, and // then convert back, before truncating as necessary. if cap < 0 { cap = x.abs.maxAnnounced(&y.abs) + 1 } xLimbs := x.abs.unaliasedLimbs(&z.abs) yLimbs := y.abs.unaliasedLimbs(&z.abs) // We need an extra bit for the sign size := limbCount(cap + 1) scratch := z.abs.resizedLimbs(_W * 2 * size) // Convert both to two's complement xTwos := scratch[:size] yTwos := scratch[size:] toTwos(x.sign, xLimbs, xTwos) toTwos(y.sign, yLimbs, yTwos) // The addition will now produce the right result addVV(xTwos, xTwos, yTwos) // Convert back from two's complement z.sign = fromTwos(cap, xTwos) size = limbCount(cap) z.abs.limbs = scratch[:size] copy(z.abs.limbs, xTwos) maskEnd(z.abs.limbs, cap) z.abs.reduced = nil z.abs.announced = cap return z } saferith-0.33.0/int_test.go000066400000000000000000000103431422457503400155710ustar00rootroot00000000000000package saferith import ( "bytes" "math/rand" "reflect" "testing" "testing/quick" ) func (*Int) Generate(r *rand.Rand, size int) reflect.Value { bytes := make([]byte, r.Int()&127) r.Read(bytes) i := new(Int).SetBytes(bytes) if r.Int()&1 == 1 { i.Neg(1) } return reflect.ValueOf(i) } func testIntEqualReflexive(z *Int) bool { return z.Eq(z) == 1 } func TestIntEqualReflexive(t *testing.T) { err := quick.Check(testIntEqualReflexive, &quick.Config{}) if err != nil { t.Error(err) } } func testIntMulCommutative(x, y *Int) bool { way1 := new(Int).Mul(x, y, -1) way2 := new(Int).Mul(y, x, -1) return way1.Eq(way2) == 1 } func TestIntMulCommutative(t *testing.T) { err := quick.Check(testIntMulCommutative, &quick.Config{}) if err != nil { t.Error(err) } } func testIntMulZeroIsZero(x *Int) bool { zero := new(Int) timesZero := new(Int).Mul(zero, x, -1) return timesZero.Eq(zero) == 1 } func TestIntMulZeroIsZero(t *testing.T) { err := quick.Check(testIntMulZeroIsZero, &quick.Config{}) if err != nil { t.Error(err) } } func testIntMulNegativeOneIsNeg(x *Int) bool { minusOne := new(Int).SetUint64(1).Neg(1) way1 := new(Int).SetInt(x).Neg(1) way2 := new(Int).Mul(x, minusOne, -1) return way1.Eq(way2) == 1 } func TestIntMulNegativeOneIsNeg(t *testing.T) { err := quick.Check(testIntMulNegativeOneIsNeg, &quick.Config{}) if err != nil { t.Error(err) } } func testIntModAddNegReturnsZero(x *Int, m Modulus) bool { a := new(Int).SetInt(x).Neg(1).Mod(&m) b := x.Mod(&m) return b.ModAdd(a, b, &m).EqZero() == 1 } func TestIntModAddNegReturnsZero(t *testing.T) { err := quick.Check(testIntModAddNegReturnsZero, &quick.Config{}) if err != nil { t.Error(err) } } func testIntModRoundtrip(x Nat, m Modulus) bool { xModM := new(Nat).Mod(&x, &m) i := new(Int).SetModSymmetric(xModM, &m) if i.CheckInRange(&m) != 1 { return false } roundTrip := i.Mod(&m) return xModM.Eq(roundTrip) == 1 } func TestIntModRoundtrip(t *testing.T) { err := quick.Check(testIntModRoundtrip, &quick.Config{}) if err != nil { t.Error(err) } } func testIntAddNegZero(i *Int) bool { zero := new(Int) neg := new(Int).SetInt(i).Neg(1) shouldBeZero := new(Int).Add(i, neg, -1) return shouldBeZero.Eq(zero) == 1 } func TestIntAddNegZero(t *testing.T) { err := quick.Check(testIntAddNegZero, &quick.Config{}) if err != nil { t.Error(err) } } func testIntAddCommutative(x *Int, y *Int) bool { way1 := new(Int).Add(x, y, -1) way2 := new(Int).Add(x, y, -1) return way1.Eq(way2) == 1 } func TestIntAddCommutative(t *testing.T) { err := quick.Check(testIntAddCommutative, &quick.Config{}) if err != nil { t.Error(err) } } func testIntAddZeroIdentity(x *Int) bool { zero := new(Int) shouldBeX := new(Int).Add(x, zero, -1) return shouldBeX.Eq(x) == 1 } func TestIntAddZeroIdentity(t *testing.T) { err := quick.Check(testIntAddZeroIdentity, &quick.Config{}) if err != nil { t.Error(err) } } func TestCheckInRangeExamples(t *testing.T) { x := new(Int).SetUint64(0) m := ModulusFromUint64(13) if x.CheckInRange(m) != 1 { t.Errorf("expected zero to be in range of modulus") } } func TestIntAddExamples(t *testing.T) { x := new(Int).SetUint64(3).Resize(8) y := new(Int).SetUint64(4).Neg(1).Resize(8) expected := new(Int).SetUint64(1).Neg(1) actual := new(Int).Add(x, y, -1) if expected.Eq(actual) != 1 { t.Errorf("%+v != %+v", expected, actual) } } func testIntMarshalBinaryRoundTrip(x *Int) bool { out, err := x.MarshalBinary() if err != nil { return false } y := new(Int) err = y.UnmarshalBinary(out) if err != nil { return false } return x.Eq(y) == 1 } func TestIntMarshalBinaryRoundTrip(t *testing.T) { err := quick.Check(testIntMarshalBinaryRoundTrip, &quick.Config{}) if err != nil { t.Error(err) } } func testInvalidInt(expected []byte) bool { x := new(Int) err := x.UnmarshalBinary(expected) // empty slice is invalid, so we expect an error if len(expected) == 0 { return err != nil } expectedBytes := expected[1:] expectedSign := Choice(expected[0]) & 1 actualBytes := x.Abs().Bytes() actualSign := x.sign return (expectedSign == actualSign) && bytes.Equal(expectedBytes, actualBytes) } func TestInvalidInt(t *testing.T) { err := quick.Check(testInvalidInt, &quick.Config{}) if err != nil { t.Error(err) } } saferith-0.33.0/num.go000066400000000000000000001617401422457503400145470ustar00rootroot00000000000000package saferith import ( "fmt" "math/big" "math/bits" "strings" ) // General utilities // add calculates a + b + carry, returning the sum, and carry // // This is a convenient wrapper around bits.Add, and should be optimized // by the compiler to produce a single ADC instruction. func add(a, b, carry Word) (sum Word, newCarry Word) { s, c := bits.Add(uint(a), uint(b), uint(carry)) return Word(s), Word(c) } // Constant Time Utilities // Choice represents a constant-time boolean. // // The value of Choice is always either 1 or 0. // // We use a separate type instead of bool, in order to be able to make decisions without leaking // which decision was made. // // You can easily convert a Choice into a bool with the operation c == 1. // // In general, logical operations on bool become bitwise operations on choice: // a && b => a & b // a || b => a | b // a != b => a ^ b // !a => 1 ^ a type Choice Word // ctEq compares x and y for equality, returning 1 if equal, and 0 otherwise // // This doesn't leak any information about either of them func ctEq(x, y Word) Choice { // If x == y, then x ^ y should be all zero bits. q := uint(x ^ y) // For any q != 0, either the MSB of q, or the MSB of -q is 1. // We can thus or those together, and check the top bit. When q is zero, // that means that x and y are equal, so we negate that top bit. return 1 ^ Choice((q|-q)>>(_W-1)) } // ctGt checks x > y, returning 1 or 0 // // This doesn't leak any information about either of them func ctGt(x, y Word) Choice { _, b := bits.Sub(uint(y), uint(x), 0) return Choice(b) } // ctIfElse selects x if v = 1, and y otherwise // // This doesn't leak the value of any of its inputs func ctIfElse(v Choice, x, y Word) Word { // mask should be all 1s if v is 1, otherwise all 0s mask := -Word(v) return y ^ (mask & (y ^ x)) } // ctCondCopy copies y into x, if v == 1, otherwise does nothing // // Both slices must have the same length. // // LEAK: the length of the slices // // Otherwise, which branch was taken isn't leaked func ctCondCopy(v Choice, x, y []Word) { if len(x) != len(y) { panic("ctCondCopy: mismatched arguments") } for i := 0; i < len(x); i++ { x[i] = ctIfElse(v, y[i], x[i]) } } // ctCondSwap swaps the contents of a and b, when v == 1, otherwise does nothing // // Both slices must have the same length. // // LEAK: the length of the slices // // Whether or not a swap happened isn't leaked func ctCondSwap(v Choice, a, b []Word) { for i := 0; i < len(a) && i < len(b); i++ { ai := a[i] a[i] = ctIfElse(v, b[i], ai) b[i] = ctIfElse(v, ai, b[i]) } } // CondAssign sets z <- yes ? x : z. // // This function doesn't leak any information about whether the assignment happened. // // The announced size of the result will be the largest size between z and x. func (z *Nat) CondAssign(yes Choice, x *Nat) *Nat { maxBits := z.maxAnnounced(x) xLimbs := x.resizedLimbs(maxBits) z.limbs = z.resizedLimbs(maxBits) ctCondCopy(yes, z.limbs, xLimbs) // If the value we're potentially assigning has a different reduction, // then there's nothing we can conclude about the resulting reduction. if z.reduced != x.reduced { z.reduced = nil } z.announced = maxBits return z } // "Missing" Functions // These are routines that could in theory be implemented in assembly, // but aren't already present in Go's big number routines // div calculates the quotient and remainder of hi:lo / d // // Unlike bits.Div, this doesn't leak anything about the inputs func div(hi, lo, d Word) (Word, Word) { var quo Word hi = ctIfElse(ctEq(hi, d), 0, hi) for i := _W - 1; i > 0; i-- { j := _W - i w := (hi << j) | (lo >> i) sel := ctEq(w, d) | ctGt(w, d) | Choice(hi>>i) hi2 := (w - d) >> j lo2 := lo - (d << i) hi = ctIfElse(sel, hi2, hi) lo = ctIfElse(sel, lo2, lo) quo |= Word(sel) quo <<= 1 } sel := ctEq(lo, d) | ctGt(lo, d) | Choice(hi) quo |= Word(sel) rem := ctIfElse(sel, lo-d, lo) return quo, rem } // mulSubVVW calculates z -= y * x // // This also results in a carry. func mulSubVVW(z, x []Word, y Word) (c Word) { for i := 0; i < len(z) && i < len(x); i++ { hi, lo := mulAddWWW_g(x[i], y, c) sub, cc := bits.Sub(uint(z[i]), uint(lo), 0) c, z[i] = Word(cc), Word(sub) c += hi } return } // Nat represents an arbitrary sized natural number. // // Different methods on Nats will talk about a "capacity". The capacity represents // the announced size of some number. Operations may vary in time *only* relative // to this capacity, and not to the actual value of the number. // // The capacity of a number is usually inherited through whatever method was used to // create the number in the first place. type Nat struct { // The exact number of bits this number claims to have. // // This can differ from the actual number of bits needed to represent this number. announced int // If this is set, then the value of this Nat is in the range 0..reduced - 1. // // This value should get set based only on statically knowable things, like what // functions have been called. This means that we will have plenty of false // negatives, where a value is small enough, but we don't know statically // that this is the case. // // Invariant: If reduced is set, then announced should match the announced size of // this modulus. reduced *Modulus // The limbs representing this number, in little endian order. // // Invariant: The bits past announced will not be set. This includes when announced // isn't a multiple of the limb size. // // Invariant: two Nats are not allowed to share the same slice. // This allows us to use pointer comparison to check that Nats don't alias eachother limbs []Word } // checkInvariants does some internal sanity checks. // // This is useful for tests. func (z *Nat) checkInvariants() bool { if z.reduced != nil && z.announced != z.reduced.nat.announced { return false } if len(z.limbs) != limbCount(z.announced) { return false } if len(z.limbs) > 0 { lastLimb := z.limbs[len(z.limbs)-1] if lastLimb != lastLimb&limbMask(z.announced) { return false } } return true } // maxAnnounced returns the larger announced length of z and y func (z *Nat) maxAnnounced(y *Nat) int { maxBits := z.announced if y.announced > maxBits { maxBits = y.announced } return maxBits } // ensureLimbCapacity makes sure that a Nat has capacity for a certain number of limbs // // This will modify the slice contained inside the natural, but won't change the size of // the slice, so it doesn't affect the value of the natural. // // LEAK: Probably the current number of limbs, and size // OK: both of these should be public func (z *Nat) ensureLimbCapacity(size int) { if cap(z.limbs) < size { newLimbs := make([]Word, len(z.limbs), size) copy(newLimbs, z.limbs) z.limbs = newLimbs } } // resizedLimbs returns a new slice of limbs accomodating a number of bits. // // This will clear out the end of the slice as necessary. // // LEAK: the current number of limbs, and bits // OK: both are public func (z *Nat) resizedLimbs(bits int) []Word { size := limbCount(bits) z.ensureLimbCapacity(size) res := z.limbs[:size] // Make sure that the expansion (if any) is cleared for i := len(z.limbs); i < size; i++ { res[i] = 0 } maskEnd(res, bits) return res } // maskEnd applies the correct bit mask to some limbs func maskEnd(limbs []Word, bits int) { if len(limbs) <= 0 { return } limbs[len(limbs)-1] &= limbMask(bits) } // unaliasedLimbs returns a set of limbs for z, such that they do not alias those of x // // This will create a copy of the limbs, if necessary. // // LEAK: the size of z, whether or not z and x are the same Nat func (z *Nat) unaliasedLimbs(x *Nat) []Word { res := z.limbs if z == x { res = make([]Word, len(z.limbs)) copy(res, z.limbs) } return res } // trueSize calculates the actual size necessary for representing these limbs // // This is the size with leading zeros removed. This leaks the number // of such zeros, but nothing else. func trueSize(limbs []Word) int { // Instead of checking == 0 directly, which may leak the value, we instead // compare with zero in constant time, and check if that succeeded in a leaky way. var size int for size = len(limbs); size > 0 && ctEq(limbs[size-1], 0) == 1; size-- { } return size } // AnnouncedLen returns the number of bits this number is publicly known to have func (z *Nat) AnnouncedLen() int { return z.announced } // TrueLen calculates the exact number of bits needed to represent z // // This function violates the standard contract around Nats and announced length. // For most purposes, `AnnouncedLen` should be used instead. // // That being said, this function does try to limit its leakage, and should // only leak the number of leading zero bits in the number. func (z *Nat) TrueLen() int { limbSize := trueSize(z.limbs) size := limbSize * _W if limbSize > 0 { size -= leadingZeros(z.limbs[limbSize-1]) } return size } // FillBytes writes out the big endian bytes of a natural number. // // This will always write out the full capacity of the number, without // any kind trimming. func (z *Nat) FillBytes(buf []byte) []byte { for i := 0; i < len(buf); i++ { buf[i] = 0 } i := len(buf) // LEAK: Number of limbs // OK: The number of limbs is public // LEAK: The addresses touched in the out array // OK: Every member of out is touched Outer: for _, x := range z.limbs { y := x for j := 0; j < _S; j++ { i-- if i < 0 { break Outer } buf[i] = byte(y) y >>= 8 } } return buf } // SetBytes interprets a number in big-endian format, stores it in z, and returns z. // // The exact length of the buffer must be public information! This length also dictates // the capacity of the number returned, and thus the resulting timings for operations // involving that number. func (z *Nat) SetBytes(buf []byte) *Nat { z.reduced = nil z.announced = 8 * len(buf) z.limbs = z.resizedLimbs(z.announced) bufI := len(buf) - 1 for i := 0; i < len(z.limbs) && bufI >= 0; i++ { z.limbs[i] = 0 for shift := 0; shift < _W && bufI >= 0; shift += 8 { z.limbs[i] |= Word(buf[bufI]) << shift bufI-- } } return z } // Bytes creates a slice containing the contents of this Nat, in big endian // // This will always fill the output byte slice based on the announced length of this Nat. func (z *Nat) Bytes() []byte { length := (z.announced + 7) / 8 out := make([]byte, length) return z.FillBytes(out) } // MarshalBinary implements encoding.BinaryMarshaler. // Returns the same value as Bytes(). func (i *Nat) MarshalBinary() ([]byte, error) { return i.Bytes(), nil } // UnmarshalBinary implements encoding.BinaryUnmarshaler. // Wraps SetBytes func (i *Nat) UnmarshalBinary(data []byte) error { i.SetBytes(data) return nil } // convert a 4 bit value into an ASCII value in constant time func nibbletoASCII(nibble byte) byte { w := Word(nibble) value := ctIfElse(ctGt(w, 9), w-0xA+Word('A'), w+Word('0')) return byte(value) } // convert an ASCII value into a 4 bit value, returning whether or not this value is valid. func nibbleFromASCII(ascii byte) (byte, Choice) { w := Word(ascii) inFirstRange := ctGt(w, Word('0')-1) & (1 ^ ctGt(w, Word('9'))) inSecondRange := ctGt(w, Word('A')-1) & (1 ^ ctGt(w, Word('F'))) valid := inFirstRange | inSecondRange nibble := ctIfElse(inFirstRange, w-Word('0'), w-Word('A')+0xA) return byte(nibble), valid } // SetHex modifies the value of z to hold a hex string, returning z // // The hex string must be in big endian order. If it contains characters // other than 0..9, A..F, the value of z will be undefined, and an error will // be returned. // // The value of the string shouldn't be leaked, except in the case where the string // contains invalid characters. func (z *Nat) SetHex(hex string) (*Nat, error) { z.reduced = nil z.announced = 4 * len(hex) z.limbs = z.resizedLimbs(z.announced) hexI := len(hex) - 1 for i := 0; i < len(z.limbs) && hexI >= 0; i++ { z.limbs[i] = 0 for shift := 0; shift < _W && hexI >= 0; shift += 4 { nibble, valid := nibbleFromASCII(byte(hex[hexI])) if valid != 1 { return nil, fmt.Errorf("invalid hex character: %c", hex[hexI]) } z.limbs[i] |= Word(nibble) << shift hexI-- } } return z, nil } // Hex converts this number into a hexadecimal string. // // This string will be a multiple of 8 bits. // // This shouldn't leak any information about the value of this Nat, only its length. func (z *Nat) Hex() string { bytes := z.Bytes() var builder strings.Builder for _, b := range bytes { _ = builder.WriteByte(nibbletoASCII((b >> 4) & 0xF)) _ = builder.WriteByte(nibbletoASCII(b & 0xF)) } return builder.String() } // the number of bytes to print in the string representation before an underscore const underscoreAfterNBytes = 4 // String will represent this nat as a convenient Hex string // // This shouldn't leak any information about the value of this Nat, only its length. func (z *Nat) String() string { bytes := z.Bytes() var builder strings.Builder _, _ = builder.WriteString("0x") i := 0 for _, b := range bytes { if i == underscoreAfterNBytes { builder.WriteRune('_') i = 0 } builder.WriteByte(nibbletoASCII((b >> 4) & 0xF)) builder.WriteByte(nibbletoASCII(b & 0xF)) i += 1 } return builder.String() } // Byte will access the ith byte in this nat, with 0 being the least significant byte. // // This will leak the value of i, and panic if i is < 0. func (z *Nat) Byte(i int) byte { if i < 0 { panic("negative byte") } limbCount := len(z.limbs) bytesPerLimb := _W / 8 if i >= bytesPerLimb*limbCount { return 0 } return byte(z.limbs[i/bytesPerLimb] >> (8 * (i % bytesPerLimb))) } // Big converts a Nat into a big.Int // // This will leak information about the true size of z, so caution // should be exercised when using this method with sensitive values. func (z *Nat) Big() *big.Int { res := new(big.Int) // Unfortunate that there's no good way to handle this bigLimbs := make([]big.Word, len(z.limbs)) for i := 0; i < len(bigLimbs) && i < len(z.limbs); i++ { bigLimbs[i] = big.Word(z.limbs[i]) } res.SetBits(bigLimbs) return res } // SetBig modifies z to contain the value of x // // The size parameter is used to pad or truncate z to a certain number of bits. func (z *Nat) SetBig(x *big.Int, size int) *Nat { z.announced = size z.limbs = z.resizedLimbs(size) bigLimbs := x.Bits() for i := 0; i < len(z.limbs) && i < len(bigLimbs); i++ { z.limbs[i] = Word(bigLimbs[i]) } maskEnd(z.limbs, size) return z } // SetUint64 sets z to x, and returns z // // This will have the exact same capacity as a 64 bit number func (z *Nat) SetUint64(x uint64) *Nat { z.reduced = nil z.announced = 64 z.limbs = z.resizedLimbs(z.announced) for i := 0; i < len(z.limbs); i++ { z.limbs[i] = Word(x) x >>= _W } return z } // Uint64 represents this number as uint64 // // The behavior of this function is undefined if the announced length of z is > 64. func (z *Nat) Uint64() uint64 { var ret uint64 for i := len(z.limbs) - 1; i >= 0; i-- { ret = (ret << _W) | uint64(z.limbs[i]) } return ret } // SetNat copies the value of x into z // // z will have the same announced length as x. func (z *Nat) SetNat(x *Nat) *Nat { z.limbs = z.resizedLimbs(x.announced) copy(z.limbs, x.limbs) z.reduced = x.reduced z.announced = x.announced return z } // Clone returns a copy of this value. // // This copy can safely be mutated without affecting the original. func (z *Nat) Clone() *Nat { return new(Nat).SetNat(z) } // Resize resizes z to a certain number of bits, returning z. func (z *Nat) Resize(cap int) *Nat { z.limbs = z.resizedLimbs(cap) z.announced = cap return z } // Modulus represents a natural number used for modular reduction // // Unlike with natural numbers, the number of bits need to contain the modulus // is assumed to be public. Operations are allowed to leak this size, and creating // a modulus will remove unnecessary zeros. // // Operations on a Modulus may leak whether or not a Modulus is even. type Modulus struct { nat Nat // the number of leading zero bits leading int // The inverse of the least significant limb, modulo W m0inv Word // If true, then this modulus is even even bool } // invertModW calculates x^-1 mod _W func invertModW(x Word) Word { y := x // This is enough for 64 bits, and the extra iteration is not that costly for 32 for i := 0; i < 5; i++ { y = y * (2 - x*y) } return y } // precomputeValues calculates the desirable modulus fields in advance // // This sets the leading number of bits, leaking the true bit size of m, // as well as the inverse of the least significant limb (without leaking it). // // This will also do integrity checks, namely that the modulus isn't empty or even func (m *Modulus) precomputeValues() { announced := m.nat.TrueLen() m.nat.announced = announced m.nat.limbs = m.nat.resizedLimbs(announced) if len(m.nat.limbs) < 1 { panic("Modulus is empty") } m.leading = leadingZeros(m.nat.limbs[len(m.nat.limbs)-1]) // I think checking the bit directly might leak more data than we'd like m.even = ctEq(m.nat.limbs[0]&1, 0) == 1 // There's no point calculating this if m isn't even, and we can leak evenness if !m.even { m.m0inv = invertModW(m.nat.limbs[0]) m.m0inv = -m.m0inv } } // ModulusFromUint64 sets the modulus according to an integer func ModulusFromUint64(x uint64) *Modulus { var m Modulus m.nat.SetUint64(x) m.precomputeValues() return &m } // ModulusFromBytes creates a new Modulus, converting from big endian bytes // // This function will remove leading zeros, thus leaking the true size of the modulus. // See the documentation for the Modulus type, for more information about this contract. func ModulusFromBytes(bytes []byte) *Modulus { var m Modulus // TODO: You could allocate a smaller buffer to begin with, versus using the Nat method m.nat.SetBytes(bytes) m.precomputeValues() return &m } // ModulusFromHex creates a new modulus from a hex string. // // The same rules as Nat.SetHex apply. // // Additionally, this function will remove leading zeros, leaking the true size of the modulus. // See the documentation for the Modulus type, for more information about this contract. func ModulusFromHex(hex string) (*Modulus, error) { var m Modulus _, err := m.nat.SetHex(hex) if err != nil { return nil, err } m.precomputeValues() return &m, nil } // FromNat creates a new Modulus, using the value of a Nat // // This will leak the true size of this natural number. Because of this, // the true size of the number should not be sensitive information. This is // a stronger requirement than we usually have for Nat. func ModulusFromNat(nat *Nat) *Modulus { var m Modulus m.nat.SetNat(nat) m.precomputeValues() return &m } // Nat returns the value of this modulus as a Nat. // // This will create a copy of this modulus value, so the Nat can be safely // mutated. func (m *Modulus) Nat() *Nat { return new(Nat).SetNat(&m.nat) } // Bytes returns the big endian bytes making up the modulus func (m *Modulus) Bytes() []byte { return m.nat.Bytes() } // MarshalBinary implements encoding.BinaryMarshaler. func (i *Modulus) MarshalBinary() ([]byte, error) { return i.nat.Bytes(), nil } // UnmarshalBinary implements encoding.BinaryUnmarshaler. func (i *Modulus) UnmarshalBinary(data []byte) error { i.nat.SetBytes(data) i.precomputeValues() return nil } // Big returns the value of this Modulus as a big.Int func (m *Modulus) Big() *big.Int { return m.nat.Big() } // Hex will represent this Modulus as a Hex string. // // The hex string will hold a multiple of 8 bits. // // This shouldn't leak any information about the value of the modulus, beyond // the usual leakage around its size. func (m *Modulus) Hex() string { return m.nat.Hex() } // String will represent this Modulus as a convenient Hex string // // This shouldn't leak any information about the value of the modulus, only its length. func (m *Modulus) String() string { return m.nat.String() } // BitLen returns the exact number of bits used to store this Modulus // // Moduli are allowed to leak this value. func (m *Modulus) BitLen() int { return m.nat.announced } // Cmp compares two moduli, returning results for (>, =, <). // // This will not leak information about the value of these relations, or the moduli. func (m *Modulus) Cmp(n *Modulus) (Choice, Choice, Choice) { return m.nat.Cmp(&n.nat) } // shiftAddInCommon exists to unify behavior between shiftAddIn and shiftAddInGeneric // // z, scratch, and m should have the same length. // // The two functions differ only in how the calculate a1:a0, and b0. // // hi should be what was previously the top limb of z. // // a1:a0 and b0 should be the most significant two limbs of z, and single limb of m, // after shifting to discard leading zeros. // // The way these are calculated differs between the two versions of shiftAddIn, // which is why this function exists. func shiftAddInCommon(z, scratch, m []Word, hi, a1, a0, b0 Word) (q Word) { // We want to use a1:a0 / b0 - 1 as our estimate. If rawQ is 0, we should // use 0 as our estimate. Another edge case when an overflow happens in the quotient. // It can be shown that this happens when a1 == b0. In this case, we want // to use the maximum value for q rawQ, _ := div(a1, a0, b0) q = ctIfElse(ctEq(a1, b0), ^Word(0), ctIfElse(ctEq(rawQ, 0), 0, rawQ-1)) // This estimate is off by +- 1, so we subtract q * m, and then either add // or subtract m, based on the result. c := mulSubVVW(z, m, q) // If the carry from subtraction is greater than the limb of z we've shifted out, // then we've underflowed, and need to add in m under := ctGt(c, hi) // For us to be too large, we first need to not be too low, as per the previous flag. // Then, if the lower limbs of z are still larger, or the top limb of z is equal to the carry, // we can conclude that we're too large, and need to subtract m stillBigger := cmpGeq(z, m) over := (1 ^ under) & (stillBigger | (1 ^ ctEq(c, hi))) addVV(scratch, z, m) ctCondCopy(under, z, scratch) q -= Word(under) subVV(scratch, z, m) ctCondCopy(over, z, scratch) q += Word(over) return } // shiftAddIn calculates z = z << _W + x mod m // // The length of z and scratch should be len(m) func shiftAddIn(z, scratch []Word, x Word, m *Modulus) (q Word) { // Making tests on the exact bit length of m is ok, // since that's part of the contract for moduli size := len(m.nat.limbs) if size == 0 { return } if size == 1 { // In this case, z:x (/, %) m is exactly what we need to calculate q, r := div(z[0], x, m.nat.limbs[0]) z[0] = r return q } // The idea is as follows: // // We want to shift x into z, and then divide by m. Instead of dividing by // m, we can get a good estimate, using the top two 2 * _W bits of z, and the // top _W bits of m. These are stored in a1:a0, and b0 respectively. // We need to keep around the top word of z, pre-shifting hi := z[size-1] a1 := (z[size-1] << m.leading) | (z[size-2] >> (_W - m.leading)) // The actual shift can be performed by moving the limbs of z up, then inserting x for i := size - 1; i > 0; i-- { z[i] = z[i-1] } z[0] = x a0 := (z[size-1] << m.leading) | (z[size-2] >> (_W - m.leading)) b0 := (m.nat.limbs[size-1] << m.leading) | (m.nat.limbs[size-2] >> (_W - m.leading)) return shiftAddInCommon(z, scratch, m.nat.limbs, hi, a1, a0, b0) } // shiftAddInGeneric is like shiftAddIn, but works with arbitrary m. // // See shiftAddIn for what this function is trying to accomplish, and what the // inputs represent. // // The big difference this entails is that z and m may have padding limbs, so // we have to do a bit more work to recover their significant bits in constant-time. func shiftAddInGeneric(z, scratch []Word, x Word, m []Word) Word { size := len(m) if size == 0 { return 0 } if size == 1 { // In this case, z:x (/, %) m is exactly what we need to calculate q, r := div(z[0], x, m[0]) z[0] = r return q } // We need to get match the two most significant 2 * _W bits of z with the most significant // _W bits of m. We also need to eliminate any leading zeros, possibly fetching a // these bits over multiple limbs. Because of this, we need to scan over both // arrays, with a window of 3 limbs for z, and 2 limbs for m, until we hit the // first non-zero limb for either of them. Because z < m, it suffices to check // for a non-zero limb from m. var a2, a1, a0, b1, b0 Word done := Choice(0) for i := size - 1; i > 1; i-- { a2 = ctIfElse(done, a2, z[i]) a1 = ctIfElse(done, a1, z[i-1]) a0 = ctIfElse(done, a0, z[i-2]) b1 = ctIfElse(done, b1, m[i]) b0 = ctIfElse(done, b0, m[i-1]) done = 1 ^ ctEq(b1, 0) } // We also need to do one more iteration to potentially include x inside of our // significant bits from z. a2 = ctIfElse(done, a2, z[1]) a1 = ctIfElse(done, a1, z[0]) a0 = ctIfElse(done, a0, x) b1 = ctIfElse(done, b1, m[1]) b0 = ctIfElse(done, b0, m[0]) // Now, we need to shift away the leading zeros to get the most significant bits. // Converting to Word avoids a panic check l := Word(leadingZeros(b1)) a2 = (a2 << l) | (a1 >> (_W - l)) a1 = (a1 << l) | (a0 >> (_W - l)) b1 = (b1 << l) | (b0 >> (_W - l)) // Another adjustment we need to make before calling the next function is to actually // insert x inside of z, shifting out hi. hi := z[len(z)-1] for i := size - 1; i > 0; i-- { z[i] = z[i-1] } z[0] = x return shiftAddInCommon(z, scratch, m, hi, a2, a1, b1) } // Mod calculates z <- x mod m // // The capacity of the resulting number matches the capacity of the modulus. func (z *Nat) Mod(x *Nat, m *Modulus) *Nat { if x.reduced == m { z.SetNat(x) return z } size := len(m.nat.limbs) xLimbs := x.unaliasedLimbs(z) z.limbs = z.resizedLimbs(2 * _W * size) for i := 0; i < len(z.limbs); i++ { z.limbs[i] = 0 } // Multiple times in this section: // LEAK: the length of x // OK: this is public information i := len(xLimbs) - 1 // We can inject at least size - 1 limbs while staying under m // Thus, we start injecting from index size - 2 start := size - 2 // That is, if there are at least that many limbs to choose from if i < start { start = i } for j := start; j >= 0; j-- { z.limbs[j] = xLimbs[i] i-- } // We shift in the remaining limbs, making sure to reduce modulo M each time for ; i >= 0; i-- { shiftAddIn(z.limbs[:size], z.limbs[size:], xLimbs[i], m) } z.limbs = z.resizedLimbs(m.nat.announced) z.announced = m.nat.announced z.reduced = m return z } // Div calculates z <- x / m, with m a Modulus. // // This might seem like an odd signature, but by using a Modulus, // we can achieve the same speed as the Mod method. This wouldn't be the case for // an arbitrary Nat. // // cap determines the number of bits to keep in the result. If cap < 0, then // the number of bits will be x.AnnouncedLen() - m.BitLen() + 2 func (z *Nat) Div(x *Nat, m *Modulus, cap int) *Nat { if cap < 0 { cap = x.announced - m.nat.announced + 2 } if len(x.limbs) < len(m.nat.limbs) || x.reduced == m { z.limbs = z.resizedLimbs(cap) for i := 0; i < len(z.limbs); i++ { z.limbs[i] = 0 } z.announced = cap z.reduced = nil return z } size := limbCount(m.nat.announced) xLimbs := x.unaliasedLimbs(z) // Enough for 2 buffers the size of m, and to store the full quotient startSize := limbCount(cap) if startSize < 2*size { startSize = 2 * size } z.limbs = z.resizedLimbs(_W * (startSize + len(xLimbs))) remainder := z.limbs[:size] for i := 0; i < len(remainder); i++ { remainder[i] = 0 } scratch := z.limbs[size : 2*size] // Our full quotient, in big endian order. quotientBE := z.limbs[startSize:] // We use this to append without actually reallocating. We fill our quotient // in from 0 upwards. qI := 0 i := len(xLimbs) - 1 // We can inject at least size - 1 limbs while staying under m // Thus, we start injecting from index size - 2 start := size - 2 // That is, if there are at least that many limbs to choose from if i < start { start = i } for j := start; j >= 0; j-- { remainder[j] = xLimbs[i] i-- quotientBE[qI] = 0 qI++ } for ; i >= 0; i-- { q := shiftAddIn(remainder, scratch, xLimbs[i], m) quotientBE[qI] = q qI++ } z.limbs = z.resizedLimbs(cap) // First, reverse all the limbs we want, from the last part of the buffer we used. for i := 0; i < len(z.limbs) && i < len(quotientBE); i++ { z.limbs[i] = quotientBE[qI-i-1] } maskEnd(z.limbs, cap) z.reduced = nil z.announced = cap return z } // ModAdd calculates z <- x + y mod m // // The capacity of the resulting number matches the capacity of the modulus. func (z *Nat) ModAdd(x *Nat, y *Nat, m *Modulus) *Nat { var xModM, yModM Nat // This is necessary for the correctness of the algorithm, since // we don't assume that x and y are in range. // Furthermore, we can now assume that x and y have the same number // of limbs as m xModM.Mod(x, m) yModM.Mod(y, m) // The only thing we have to resize is z, everything else has m's length size := limbCount(m.nat.announced) scratch := z.resizedLimbs(2 * _W * size) // This might hold some more bits, but masking isn't necessary, since the // result will be < m. z.limbs = scratch[:size] subResult := scratch[size:] addCarry := addVV(z.limbs, xModM.limbs, yModM.limbs) subCarry := subVV(subResult, z.limbs, m.nat.limbs) // Three cases are possible: // // addCarry, subCarry = 0 -> subResult // we didn't overflow our buffer, but our result was big // enough to subtract m without underflow, so it was larger than m // addCarry, subCarry = 1 -> subResult // we overflowed the buffer, and the subtraction of m is correct, // because our result only looks too small because of the missing carry bit // addCarry = 0, subCarry = 1 -> addResult // we didn't overflow our buffer, and the subtraction of m is wrong, // because our result was already smaller than m // The other case is impossible, because it would mean we have a result big // enough to both overflow the addition by at least m. But, we made sure that // x and y are at most m - 1, so this isn't possible. selectSub := ctEq(addCarry, subCarry) ctCondCopy(selectSub, z.limbs[:size], subResult) z.reduced = m z.announced = m.nat.announced return z } func (z *Nat) ModSub(x *Nat, y *Nat, m *Modulus) *Nat { var xModM, yModM Nat // First reduce x and y mod m xModM.Mod(x, m) yModM.Mod(y, m) size := len(m.nat.limbs) scratch := z.resizedLimbs(_W * 2 * size) z.limbs = scratch[:size] addResult := scratch[size:] subCarry := subVV(z.limbs, xModM.limbs, yModM.limbs) underflow := ctEq(subCarry, 1) addVV(addResult, z.limbs, m.nat.limbs) ctCondCopy(underflow, z.limbs, addResult) z.reduced = m z.announced = m.nat.announced return z } // ModNeg calculates z <- -x mod m func (z *Nat) ModNeg(x *Nat, m *Modulus) *Nat { // First reduce x mod m z.Mod(x, m) size := len(m.nat.limbs) scratch := z.resizedLimbs(_W * 2 * size) z.limbs = scratch[:size] zero := scratch[size:] for i := 0; i < len(zero); i++ { zero[i] = 0 } borrow := subVV(z.limbs, zero, z.limbs) underflow := ctEq(Word(borrow), 1) // Add back M if we underflowed addVV(zero, z.limbs, m.nat.limbs) ctCondCopy(underflow, z.limbs, zero) z.reduced = m z.announced = m.nat.announced return z } // Add calculates z <- x + y, modulo 2^cap // // The capacity is given in bits, and also controls the size of the result. // // If cap < 0, the capacity will be max(x.AnnouncedLen(), y.AnnouncedLen()) + 1 func (z *Nat) Add(x *Nat, y *Nat, cap int) *Nat { if cap < 0 { cap = x.maxAnnounced(y) + 1 } xLimbs := x.resizedLimbs(cap) yLimbs := y.resizedLimbs(cap) z.limbs = z.resizedLimbs(cap) addVV(z.limbs, xLimbs, yLimbs) // Mask off the final bits z.limbs = z.resizedLimbs(cap) z.announced = cap z.reduced = nil return z } // Sub calculates z <- x - y, modulo 2^cap // // The capacity is given in bits, and also controls the size of the result. // // If cap < 0, the capacity will be max(x.AnnouncedLen(), y.AnnouncedLen()) func (z *Nat) Sub(x *Nat, y *Nat, cap int) *Nat { if cap < 0 { cap = x.maxAnnounced(y) } xLimbs := x.resizedLimbs(cap) yLimbs := y.resizedLimbs(cap) z.limbs = z.resizedLimbs(cap) subVV(z.limbs, xLimbs, yLimbs) // Mask off the final bits z.limbs = z.resizedLimbs(cap) z.announced = cap z.reduced = nil return z } // montgomeryRepresentation calculates zR mod m func montgomeryRepresentation(z []Word, scratch []Word, m *Modulus) { // Our strategy is to shift by W, n times, each time reducing modulo m size := len(m.nat.limbs) // LEAK: the size of the modulus // OK: this is public for i := 0; i < size; i++ { shiftAddIn(z, scratch, 0, m) } } // You might have the urge to replace this with []Word, and use the routines // that already exist for doing operations. This would be a mistake. // Go doesn't seem to be able to optimize and inline slice operations nearly as // well as it can for this little type. Attempts to replace this struct with a // slice were an order of magnitude slower (as per the exponentiation operation) type triple struct { w0 Word w1 Word w2 Word } func (a *triple) add(b triple) { w0, c0 := bits.Add(uint(a.w0), uint(b.w0), 0) w1, c1 := bits.Add(uint(a.w1), uint(b.w1), c0) w2, _ := bits.Add(uint(a.w2), uint(b.w2), c1) a.w0 = Word(w0) a.w1 = Word(w1) a.w2 = Word(w2) } func tripleFromMul(a Word, b Word) triple { // You might be tempted to use mulWW here, but for some reason, Go cannot // figure out how to inline that assembly routine, but using bits.Mul directly // gets inlined by the compiler into effectively the same assembly. // // Beats me. w1, w0 := bits.Mul(uint(a), uint(b)) return triple{w0: Word(w0), w1: Word(w1), w2: 0} } // montgomeryMul performs z <- xy / R mod m // // LEAK: the size of the modulus // // out, x, y must have the same length as the modulus, and be reduced already. // // out can alias x and y, but not scratch func montgomeryMul(x []Word, y []Word, out []Word, scratch []Word, m *Modulus) { size := len(m.nat.limbs) for i := 0; i < size; i++ { scratch[i] = 0 } dh := Word(0) for i := 0; i < size; i++ { f := (scratch[0] + x[i]*y[0]) * m.m0inv var c triple for j := 0; j < size; j++ { z := triple{w0: scratch[j], w1: 0, w2: 0} z.add(tripleFromMul(x[i], y[j])) z.add(tripleFromMul(f, m.nat.limbs[j])) z.add(c) if j > 0 { scratch[j-1] = z.w0 } c.w0 = z.w1 c.w1 = z.w2 } z := triple{w0: dh, w1: 0, w2: 0} z.add(c) scratch[size-1] = z.w0 dh = z.w1 } c := subVV(out, scratch, m.nat.limbs) ctCondCopy(1^ctEq(dh, c), out, scratch) } // ModMul calculates z <- x * y mod m // // The capacity of the resulting number matches the capacity of the modulus func (z *Nat) ModMul(x *Nat, y *Nat, m *Modulus) *Nat { xModM := new(Nat).Mod(x, m) yModM := new(Nat).Mod(y, m) bitLen := m.BitLen() z.Mul(xModM, yModM, 2*bitLen) return z.Mod(z, m) } // Mul calculates z <- x * y, modulo 2^cap // // The capacity is given in bits, and also controls the size of the result. // // If cap < 0, the capacity will be x.AnnouncedLen() + y.AnnouncedLen() func (z *Nat) Mul(x *Nat, y *Nat, cap int) *Nat { if cap < 0 { cap = x.announced + y.announced } size := limbCount(cap) // Since we neex to set z to zero, we have no choice to use a new buffer, // because we allow z to alias either of the arguments zLimbs := make([]Word, size) xLimbs := x.resizedLimbs(cap) yLimbs := y.resizedLimbs(cap) // LEAK: limbCount // OK: the capacity is public, or should be for i := 0; i < size; i++ { addMulVVW(zLimbs[i:], xLimbs, yLimbs[i]) } z.limbs = zLimbs z.limbs = z.resizedLimbs(cap) z.announced = cap z.reduced = nil return z } // Rsh calculates z <- x >> shift, producing a certain number of bits // // This method will leak the value of shift. // // If cap < 0, the number of bits will be x.AnnouncedLen() - shift. func (z *Nat) Rsh(x *Nat, shift uint, cap int) *Nat { if cap < 0 { cap = x.announced - int(shift) if cap < 0 { cap = 0 } } zLimbs := z.resizedLimbs(x.announced) xLimbs := x.resizedLimbs(x.announced) singleShift := shift % _W shrVU(zLimbs, xLimbs, singleShift) limbShifts := (shift - singleShift) / _W if limbShifts > 0 { i := 0 for ; i+int(limbShifts) < len(zLimbs); i++ { zLimbs[i] = zLimbs[i+int(limbShifts)] } for ; i < len(zLimbs); i++ { zLimbs[i] = 0 } } z.limbs = zLimbs z.limbs = z.resizedLimbs(cap) z.announced = cap z.reduced = nil return z } // Lsh calculates z <- x << shift, producing a certain number of bits // // This method will leak the value of shift. // // If cap < 0, the number of bits will be x.AnnouncedLen() + shift. func (z *Nat) Lsh(x *Nat, shift uint, cap int) *Nat { if cap < 0 { cap = x.announced + int(shift) } zLimbs := z.resizedLimbs(cap) xLimbs := x.resizedLimbs(cap) singleShift := shift % _W shlVU(zLimbs, xLimbs, singleShift) limbShifts := (shift - singleShift) / _W if limbShifts > 0 { i := len(zLimbs) - 1 for ; i-int(limbShifts) >= 0; i-- { zLimbs[i] = zLimbs[i-int(limbShifts)] } for ; i >= 0; i-- { zLimbs[i] = 0 } } z.limbs = zLimbs z.announced = cap z.reduced = nil return z } func (z *Nat) expOdd(x *Nat, y *Nat, m *Modulus) *Nat { size := len(m.nat.limbs) xModM := new(Nat).Mod(x, m) yLimbs := y.unaliasedLimbs(z) scratch := z.resizedLimbs(_W * 18 * size) scratch1 := scratch[16*size : 17*size] scratch2 := scratch[17*size:] z.limbs = scratch[:size] for i := 0; i < size; i++ { z.limbs[i] = 0 } z.limbs[0] = 1 montgomeryRepresentation(z.limbs, scratch1, m) x1 := scratch[size : 2*size] copy(x1, xModM.limbs) montgomeryRepresentation(scratch[size:2*size], scratch1, m) for i := 2; i < 16; i++ { ximinus1 := scratch[(i-1)*size : i*size] xi := scratch[i*size : (i+1)*size] montgomeryMul(ximinus1, x1, xi, scratch1, m) } // LEAK: y's length // OK: this should be public for i := len(yLimbs) - 1; i >= 0; i-- { yi := yLimbs[i] for j := _W - 4; j >= 0; j -= 4 { montgomeryMul(z.limbs, z.limbs, z.limbs, scratch1, m) montgomeryMul(z.limbs, z.limbs, z.limbs, scratch1, m) montgomeryMul(z.limbs, z.limbs, z.limbs, scratch1, m) montgomeryMul(z.limbs, z.limbs, z.limbs, scratch1, m) window := (yi >> j) & 0b1111 for i := 1; i < 16; i++ { xToI := scratch[i*size : (i+1)*size] ctCondCopy(ctEq(window, Word(i)), scratch1, xToI) } montgomeryMul(z.limbs, scratch1, scratch1, scratch2, m) ctCondCopy(1^ctEq(window, 0), z.limbs, scratch1) } } for i := 0; i < size; i++ { scratch2[i] = 0 } scratch2[0] = 1 montgomeryMul(z.limbs, scratch2, z.limbs, scratch1, m) z.reduced = m z.announced = m.nat.announced return z } func (z *Nat) expEven(x *Nat, y *Nat, m *Modulus) *Nat { xModM := new(Nat).Mod(x, m) yLimbs := y.unaliasedLimbs(z) scratch := new(Nat) // LEAK: y's length // OK: this should be public for i := len(yLimbs) - 1; i >= 0; i-- { yi := yLimbs[i] for j := _W; j >= 0; j-- { z.ModMul(z, z, m) sel := Choice((yi >> j) & 1) scratch.ModMul(z, xModM, m) ctCondCopy(sel, z.limbs, scratch.limbs) } } return z } // Exp calculates z <- x^y mod m // // The capacity of the resulting number matches the capacity of the modulus func (z *Nat) Exp(x *Nat, y *Nat, m *Modulus) *Nat { if m.even { return z.expEven(x, y, m) } else { return z.expOdd(x, y, m) } } // cmpEq compares two limbs (same size) returning 1 if x >= y, and 0 otherwise func cmpEq(x []Word, y []Word) Choice { res := Choice(1) for i := 0; i < len(x) && i < len(y); i++ { res &= ctEq(x[i], y[i]) } return res } // cmpGeq compares two limbs (same size) returning 1 if x >= y, and 0 otherwise func cmpGeq(x []Word, y []Word) Choice { var c uint for i := 0; i < len(x) && i < len(y); i++ { _, c = bits.Sub(uint(x[i]), uint(y[i]), c) } return 1 ^ Choice(c) } // cmpZero checks if a slice is equal to zero, in constant time // // LEAK: the length of a func cmpZero(a []Word) Choice { var v Word for i := 0; i < len(a); i++ { v |= a[i] } return ctEq(v, 0) } // Cmp compares two natural numbers, returning results for (>, =, <) in that order. // // Because these relations are mutually exclusive, exactly one of these values // will be true. // // This function doesn't leak any information about the values involved, only // their announced lengths. func (z *Nat) Cmp(x *Nat) (Choice, Choice, Choice) { // Rough Idea: Resize both slices to the maximum length, then compare // using that length maxBits := z.maxAnnounced(x) zLimbs := z.resizedLimbs(maxBits) xLimbs := x.resizedLimbs(maxBits) eq := Choice(1) geq := Choice(1) for i := 0; i < len(zLimbs) && i < len(xLimbs); i++ { eq_at_i := ctEq(zLimbs[i], xLimbs[i]) eq &= eq_at_i geq = (eq_at_i & geq) | ((1 ^ eq_at_i) & ctGt(zLimbs[i], xLimbs[i])) } if (eq & (1 ^ geq)) == 1 { panic("eq but not geq") } return geq & (1 ^ eq), eq, 1 ^ geq } // CmpMod compares this natural number with a modulus, returning results for (>, =, <) // // This doesn't leak anything about the values of the numbers, only their lengths. func (z *Nat) CmpMod(m *Modulus) (Choice, Choice, Choice) { return z.Cmp(&m.nat) } // Eq checks if z = y. // // This is equivalent to looking at the second choice returned by Cmp. // But, since looking at equality is so common, this function is provided // as an extra utility. func (z *Nat) Eq(y *Nat) Choice { _, eq, _ := z.Cmp(y) return eq } // EqZero compares z to 0. // // This is more efficient that calling Eq between this Nat and a zero Nat. func (z *Nat) EqZero() Choice { return cmpZero(z.limbs) } // mixSigned calculates a <- alpha * a + beta * b, returning whether the result is negative. // // alpha and beta are signed integers, but whose absolute value is < 2^(_W / 2). // They're represented in two's complement. // // a and b both have an extra limb. We use the extra limb of a to store the full // result. func mixSigned(a, b []Word, alpha, beta Word) Choice { // Get the sign and absolute value for alpha alphaNeg := alpha >> (_W - 1) alpha = (alpha ^ -alphaNeg) + alphaNeg // Get the sign and absolute value for beta betaNeg := beta >> (_W - 1) beta = (beta ^ -betaNeg) + betaNeg // Our strategy for representing the result is to use a two's complement // representation alongside an extra limb. // Multiply a by alpha var cc Word for i := 0; i < len(a)-1; i++ { cc, a[i] = mulAddWWW_g(alpha, a[i], cc) } a[len(a)-1] = cc // Correct for sign negateTwos(Choice(alphaNeg), a) // We want to do the same for b, and then add it to a, but without // creating a temporary array var mulCarry, negCarry, addCarry, si Word mulCarry, si = mulAddWWW_g(beta, b[0], 0) si, negCarry = add(si^-betaNeg, betaNeg, 0) a[0], addCarry = add(a[0], si, 0) for i := 1; i < len(b)-1; i++ { mulCarry, si = mulAddWWW_g(beta, b[i], mulCarry) si, negCarry = add(si^-betaNeg, 0, negCarry) a[i], addCarry = add(a[i], si, addCarry) } si, _ = add(mulCarry^-betaNeg, 0, negCarry) a[len(a)-1], _ = add(a[len(a)-1], si, addCarry) outNeg := Choice(a[len(a)-1] >> (_W - 1)) negateTwos(outNeg, a) return outNeg } // topLimbs finds the most significant _W bits of a and b // // This function assumes that a and b have the same length. // // By this, we mean aligning a and b, and then reading down _W bits starting // from the first bit that a or b have set. func topLimbs(a, b []Word) (Word, Word) { // explicitly checking this avoids indexing checks later too if len(a) != len(b) { panic("topLimbs: mismatched arguments") } // We lookup pairs of elements from top to bottom, until a1 or b1 != 0 var a1, a0, b1, b0 Word done := Choice(0) for i := len(a) - 1; i > 0; i-- { a1 = ctIfElse(done, a1, a[i]) a0 = ctIfElse(done, a0, a[i-1]) b1 = ctIfElse(done, b1, b[i]) b0 = ctIfElse(done, b0, b[i-1]) done = 1 ^ ctEq(a1|b1, 0) } // Now, we look at the leading zeros to make sure that we're looking at the top // bits completely. // Converting to Word avoids a panic check l := Word(leadingZeros(a1 | b1)) return (a1 << l) | (a0 >> (_W - l)), (b1 << l) | (b0 >> (_W - l)) } // invert calculates and returns v s.t. vx = 1 mod m, and a flag indicating success. // // This function assumes that m is and odd number, but doesn't assume // that m is truncated to its full size. // // announced should be the number of significant bits in m. // // x should already be reduced modulo m. // // m0inv should be -invertModW(m[0]), which might have been precomputed in some // cases. func (z *Nat) invert(announced int, x []Word, m []Word, m0inv Word) Choice { // This function follows Thomas Pornin's optimized GCD method: // https://eprint.iacr.org/2020/972 if len(x) != len(m) { panic("invert: mismatched arguments") } size := len(m) // We need 4 normal buffers, and one scratch buffer. // We make each of them have an extra limb, because our updates produce an extra // _W / 2 bits or so, before shifting, or modular reduction, and it's convenient // to do these "large" updates in place. z.limbs = z.resizedLimbs(_W * 5 * (size + 1)) // v = 0, u = 1, a = x, b = m v := z.limbs[:size+1] u := z.limbs[size+1 : 2*(size+1)] for i := 0; i < size; i++ { u[i] = 0 v[i] = 0 } u[0] = 1 a := z.limbs[3*(size+1) : 4*(size+1)] copy(a, x) b := z.limbs[2*(size+1) : 3*(size+1)] copy(b, m) scratch := z.limbs[4*(size+1):] // k is half of our limb size // // We do k - 1 inner iterations inside our loop. const k = _W >> 1 // kMask allows us to keep only this half of a limb const kMask = (1 << k) - 1 // iterMask allows us to mask off first (k - 1) bits, which is useful, since // that's how many inner iterations we have. const iterMask = Word((1 << (k - 1)) - 1) // The minimum number of iterations is 2 * announced - 1. So, we calculate // the ceiling of this quantity divided by (k - 1), since that's the number // of iterations we do inside the inner loop iterations := ((2*announced - 1) + k - 2) / (k - 1) for i := 0; i < iterations; i++ { // The core idea is to use an approximation of a and b to calculate update // factors. We want to use the low k - 1 bits, combined with the high k + 1 bits. // This is because the low k - 1 bits suffice to give us odd / even information // for our k - 1 iterations, and the remaining high bits allow us to check // a < b as well. aBar := a[0] bBar := b[0] if size > 1 { aTop, bTop := topLimbs(a[:size], b[:size]) aBar = (iterMask & aBar) | (^iterMask & aTop) bBar = (iterMask & bBar) | (^iterMask & bTop) } // We store two factors in a single register, to make the inner loop faster. // // fg = f + (2^(k-1) - 1) + 2^k(g + (2^(k-1) - 1)) // // The reason we add in 2^(k-1) - 1, is so that the result in each half // doesn't go negative. We then subtract this factor away when extracting // the coefficients. // This factor needs to be added when we subtract one double register from // another, and vice versa. const coefficientAdjust = iterMask * ((1 << k) + 1) fg0 := Word(1) + coefficientAdjust fg1 := Word(1<>= 1 fg1 += fg1 fg1 -= coefficientAdjust } // Extract out the actual coefficients, as per the previous discussion. f0 := (fg0 & kMask) - iterMask g0 := (fg0 >> k) - iterMask f1 := (fg1 & kMask) - iterMask g1 := (fg1 >> k) - iterMask // a, b <- (f0 * a + g0 * b), (f1 * a + g1 * b) copy(scratch, a) aNeg := Word(mixSigned(a, b, f0, g0)) bNeg := Word(mixSigned(b, scratch, g1, f1)) // This will always clear the low k - 1 bits, so we shift those away shrVU(a, a, k-1) shrVU(b, b, k-1) // The result may have been negative, in which case we need to negate // the coefficients for the updates to u and v. f0 = (f0 ^ -aNeg) + aNeg g0 = (g0 ^ -aNeg) + aNeg f1 = (f1 ^ -bNeg) + bNeg g1 = (g1 ^ -bNeg) + bNeg // u, v <- (f0 * u + g0 * v), (f1 * u + g1 * v) copy(scratch, u) uNeg := mixSigned(u, v, f0, g0) vNeg := mixSigned(v, scratch, g1, f1) // Now, reduce u and v mod m, making sure to conditionally negate the result. u0 := u[0] copy(u, u[1:]) shiftAddInGeneric(u[:size], scratch[:size], u0, m) subVV(scratch[:size], m, u[:size]) ctCondCopy(uNeg&(1^cmpZero(u)), u[:size], scratch[:size]) v0 := v[0] copy(v, v[1:]) shiftAddInGeneric(v[:size], scratch[:size], v0, m) subVV(scratch[:size], m, v[:size]) ctCondCopy(vNeg&(1^cmpZero(v)), v[:size], scratch[:size]) } // v now contains our inverse, multiplied by 2^(iterations). We need to correct // this by dividing by 2. We can use the same trick as in montgomery multiplication, // adding the correct multiple of m to clear the low bits, and then shifting totalIterations := iterations * (k - 1) // First, we try and do _W / 2 bits at a time. This is a convenient amount, // because then the coefficient only occupies a single limb. for i := 0; i < totalIterations/k; i++ { v[size] = addMulVVW(v[:size], m, (m0inv*v[0])&kMask) shrVU(v, v, k) } // If there are any iterations remaining, we can take care of them by clearing // a smaller number of bits. remaining := totalIterations % k if remaining > 0 { lastMask := Word((1 << remaining) - 1) v[size] = addMulVVW(v[:size], m, (m0inv*v[0])&lastMask) shrVU(v, v, uint(remaining)) } z.Resize(announced) // Inversion succeeded if b, which contains gcd(x, m), is 1. return cmpZero(b[1:]) & ctEq(1, b[0]) } // Coprime returns 1 if gcd(x, y) == 1, and 0 otherwise func (x *Nat) Coprime(y *Nat) Choice { maxBits := x.maxAnnounced(y) size := limbCount(maxBits) if size == 0 { // technically the result should be 1 since 0 is not a divisor, // but we expect 0 when both arguments are equal. return 0 } a := make([]Word, size) copy(a, x.limbs) b := make([]Word, size) copy(b, y.limbs) // Our gcd(a, b) routine requires b to be odd, and will return garbage otherwise. aOdd := Choice(a[0] & 1) ctCondSwap(aOdd, a, b) scratch := new(Nat) bOdd := Choice(b[0] & 1) // We make b odd so that our calculations aren't messed up, but this doesn't affect // our result b[0] |= 1 invertible := scratch.invert(maxBits, a, b, -invertModW(b[0])) // If at least one of a or b is odd, then our GCD calculation will have been correct, // otherwise, both are even, so we want to return false anyways. return (aOdd | bOdd) & invertible } // IsUnit checks if x is a unit, i.e. invertible, mod m. // // This so happens to be when gcd(x, m) == 1. func (x *Nat) IsUnit(m *Modulus) Choice { return x.Coprime(&m.nat) } // modInverse calculates the inverse of a reduced x modulo m // // This assumes that m is an odd number, but not that it's truncated // to its true size. This routine will only leak the announced sizes of // x and m. // // We also assume that x is already reduced modulo m func (z *Nat) modInverse(x *Nat, m *Nat, m0inv Word) *Nat { // Make sure that z doesn't alias either of m or x xLimbs := x.unaliasedLimbs(z) mLimbs := m.unaliasedLimbs(z) z.invert(m.announced, xLimbs, mLimbs, m0inv) return z } // ModInverse calculates z <- x^-1 mod m // // This will produce nonsense if the modulus is even. // // The capacity of the resulting number matches the capacity of the modulus func (z *Nat) ModInverse(x *Nat, m *Modulus) *Nat { z.Mod(x, m) if m.even { z.modInverseEven(x, m) } else { z.modInverse(z, &m.nat, m.m0inv) } z.reduced = m return z } // divDouble divides x by d, outputtting the quotient in out, and a remainder // // This routine assumes nothing about the padding of either of its inputs, and // leaks nothing beyond their announced length. // // If out is not empty, it's assumed that x has at most twice the bit length of d, // and the quotient can thus fit in a slice the length of d, which out is assumed to be. // // If out is nil, no quotient is produced, but the remainder is still calculated. // This remainder will be correct regardless of the size difference between x and d. func divDouble(x []Word, d []Word, out []Word) []Word { size := len(d) r := make([]Word, size) scratch := make([]Word, size) // We use free injection, like in Mod i := len(x) - 1 // We can inject at least size - 1 limbs while staying under m // Thus, we start injecting from index size - 2 start := size - 2 // That is, if there are at least that many limbs to choose from if i < start { start = i } for j := start; j >= 0; j-- { r[j] = x[i] i-- } for ; i >= 0; i-- { oi := shiftAddInGeneric(r, scratch, x[i], d) // Hopefully the branch predictor can make these checks not too expensive, // otherwise we'll have to duplicate the routine if out != nil { out[i] = oi } } return r } // ModInverseEven calculates the modular inverse of x, mod m // // This routine will work even if m is an even number, unlike ModInverse. // Furthermore, it doesn't require the modulus to be truncated to its true size, and // will only leak information about the public sizes of its inputs. It is slower // than the standard routine though. // // This function assumes that x has an inverse modulo m, naturally func (z *Nat) modInverseEven(x *Nat, m *Modulus) *Nat { if x.announced <= 0 { return z.Resize(0) } // Idea: // // You want to find Z such that ZX = 1 mod M. The problem is // that the usual routine assumes that m is odd. In this case m is even. // For X to be invertible, we need it to be odd. We can thus invert M mod X, // finding an A satisfying AM = 1 mod X. This means that AM = 1 + KX, for some // positive integer K. Modulo M, this entails that KX = -1 mod M, so -K provides // us with an inverse for X. // // To find K, we can calculate (AM - 1) / X, and then subtract this from M, to get our inverse. size := len(m.nat.limbs) // We want to invert m modulo x, so we first calculate the reduced version, before inverting var newZ Nat newZ.limbs = divDouble(m.nat.limbs, x.limbs, nil) newZ.modInverse(&newZ, x, -invertModW(x.limbs[0])) inverseZero := cmpZero(newZ.limbs) newZ.Mul(&newZ, &m.nat, 2*size*_W) newZ.limbs = newZ.resizedLimbs(_W * 2 * size) subVW(newZ.limbs, newZ.limbs, 1) divDouble(newZ.limbs, x.limbs, newZ.limbs) // The result fits on a single half of newZ, but we need to subtract it from m. // We can use the other half of newZ, and then copy it back over if we need to keep it subVV(newZ.limbs[size:], m.nat.limbs, newZ.limbs[:size]) // If the inverse was zero, then x was 1, and so we should return 1. // We go ahead and prepare this result, but expect to copy over the subtraction // we just calculated soon over, in the usual case. newZ.limbs[0] = 1 for i := 1; i < size; i++ { newZ.limbs[i] = 0 } ctCondCopy(1^inverseZero, newZ.limbs[:size], newZ.limbs[size:]) z.limbs = newZ.limbs z.Resize(m.nat.announced) return z } // modSqrt3Mod4 sets z <- sqrt(x) mod p, when p is a prime with p = 3 mod 4 func (z *Nat) modSqrt3Mod4(x *Nat, p *Modulus) *Nat { // In this case, we can do x^(p + 1) / 4 e := new(Nat).SetNat(&p.nat) carry := addVW(e.limbs, e.limbs, 1) shrVU(e.limbs, e.limbs, 2) e.limbs[len(e.limbs)-1] |= (carry << (_W - 2)) return z.Exp(x, e, p) } // tonelliShanks sets z <- sqrt(x) mod p, for any prime modulus func (z *Nat) tonelliShanks(x *Nat, p *Modulus) *Nat { // c.f. https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-hash-to-curve-09#appendix-G.4 scratch := new(Nat) x = new(Nat).SetNat(x) one := new(Nat).SetUint64(1) trailingZeros := 1 reducedPminusOne := new(Nat).Sub(&p.nat, one, p.BitLen()) // In this case, p must have been 1, so sqrt(x) mod p is 0. Explicitly checking // this avoids an infinite loop when trying to remove the least significant zeros. // Checking this value is fine, since ModSqrt is explicitly allowed to branch // on the value of the modulus. if reducedPminusOne.EqZero() == 1 { return z.SetUint64(0) } shrVU(reducedPminusOne.limbs, reducedPminusOne.limbs, 1) nonSquare := new(Nat).SetUint64(2) for scratch.Exp(nonSquare, reducedPminusOne, p).Eq(one) == 1 { nonSquare.Add(nonSquare, one, p.BitLen()) } for reducedPminusOne.limbs[0]&1 == 0 { trailingZeros += 1 shrVU(reducedPminusOne.limbs, reducedPminusOne.limbs, 1) } reducedQminusOne := new(Nat).Sub(reducedPminusOne, one, p.BitLen()) shrVU(reducedQminusOne.limbs, reducedQminusOne.limbs, 1) c := new(Nat).Exp(nonSquare, reducedPminusOne, p) z.Exp(x, reducedQminusOne, p) t := new(Nat).ModMul(z, z, p) t.ModMul(t, x, p) z.ModMul(z, x, p) b := new(Nat).SetNat(t) one.limbs = one.resizedLimbs(len(b.limbs)) for i := trailingZeros; i > 1; i-- { for j := 1; j < i-1; j++ { b.ModMul(b, b, p) } sel := 1 ^ cmpEq(b.limbs, one.limbs) scratch.ModMul(z, c, p) ctCondCopy(sel, z.limbs, scratch.limbs) c.ModMul(c, c, p) scratch.ModMul(t, c, p) ctCondCopy(sel, t.limbs, scratch.limbs) b.SetNat(t) } z.reduced = p return z } // ModSqrt calculates the square root of x modulo p // // p must be an odd prime number, and x must actually have a square root // modulo p. The result is undefined if these conditions aren't satisfied // // This function will leak information about the value of p. This isn't intended // to be used in situations where the modulus isn't publicly known. func (z *Nat) ModSqrt(x *Nat, p *Modulus) *Nat { if len(p.nat.limbs) == 0 { panic("Can't take square root mod 0") } if p.nat.limbs[0]&1 == 0 { panic("Can't take square root mod an even number") } if p.nat.limbs[0]&0b11 == 0b11 { return z.modSqrt3Mod4(x, p) } return z.tonelliShanks(x, p) } saferith-0.33.0/num_bench_test.go000066400000000000000000000227511422457503400167430ustar00rootroot00000000000000package saferith import ( "math/big" "testing" ) var resultBig big.Int var resultNat Nat const _SIZE = 256 func ones() []byte { bytes := make([]byte, _SIZE) for i := 0; i < _SIZE; i++ { bytes[i] = 1 } return bytes } func doubleOnes() []byte { bytes := make([]byte, 2*_SIZE) for i := 0; i < 2*_SIZE; i++ { bytes[i] = 1 } return bytes } // a modulus of 2048 bits func modulus2048() []byte { bytes := make([]byte, 256) for i := 0; i < len(bytes); i++ { bytes[i] = 0xFD } return bytes } // an even modulus of 2048 bits func modulus2048Even() []byte { bytes := make([]byte, 256) for i := 0; i < len(bytes); i++ { bytes[i] = 0xFE } return bytes } // A 256 bit prime that's 3 mod 4 func prime3Mod4() []byte { bytes := make([]byte, 32) bytes[0] = 4 bytes[31] = 0x4F return bytes } // A 256 bit prime that's 1 mod 4 func prime1Mod4() []byte { bytes := make([]byte, 32) bytes[0] = 4 bytes[31] = 0x99 return bytes } func BenchmarkAddBig(b *testing.B) { b.StopTimer() var x big.Int x.SetBytes(ones()) b.StartTimer() for n := 0; n < b.N; n++ { var z big.Int z.Add(&x, &x) resultBig = z } } func _benchmarkModAddBig(m *big.Int, b *testing.B) { b.StopTimer() x := new(big.Int).SetBytes(ones()) x.Mod(x, m) b.StartTimer() for n := 0; n < b.N; n++ { var z big.Int z.Add(x, x) z.Mod(x, m) resultBig = z } } func BenchmarkModAddBig(b *testing.B) { b.StopTimer() var m big.Int m.SetUint64(13) _benchmarkModAddBig(&m, b) } func BenchmarkLargeModAddBig(b *testing.B) { b.StopTimer() var m big.Int m.SetBytes(modulus2048()) _benchmarkModAddBig(&m, b) } func BenchmarkMulBig(b *testing.B) { b.StopTimer() var x big.Int x.SetBytes(ones()) b.StartTimer() for n := 0; n < b.N; n++ { var z big.Int z.Mul(&x, &x) resultBig = z } } func _benchmarkModMulBig(m *big.Int, b *testing.B) { b.StopTimer() x := new(big.Int).SetBytes(ones()) x.Mod(x, m) b.StartTimer() for n := 0; n < b.N; n++ { var z big.Int z.Mul(x, x) z.Mod(x, m) resultBig = z } } func BenchmarkModMulBig(b *testing.B) { b.StopTimer() var m big.Int m.SetUint64(13) _benchmarkModMulBig(&m, b) } func BenchmarkLargeModMulBig(b *testing.B) { b.StopTimer() var m big.Int m.SetBytes(modulus2048()) _benchmarkModMulBig(&m, b) } func _benchmarkModBig(m *big.Int, b *testing.B) { b.StopTimer() var x big.Int x.SetBytes(doubleOnes()) b.StartTimer() for n := 0; n < b.N; n++ { var z big.Int z.Mod(&x, m) resultBig = z } } func BenchmarkModBig(b *testing.B) { b.StopTimer() var m big.Int m.SetUint64(13) _benchmarkModBig(&m, b) } func BenchmarkLargeModBig(b *testing.B) { b.StopTimer() var m big.Int m.SetBytes(modulus2048()) _benchmarkModBig(&m, b) } func _benchmarkModInverseBig(m *big.Int, b *testing.B) { b.StopTimer() x := new(big.Int).SetBytes(ones()) x.Mod(x, m) b.StartTimer() for n := 0; n < b.N; n++ { var z big.Int z.ModInverse(x, m) resultBig = z } } func BenchmarkModInverseBig(b *testing.B) { b.StopTimer() var m big.Int m.SetUint64(13) _benchmarkModInverseBig(&m, b) } func BenchmarkLargeModInverseBig(b *testing.B) { b.StopTimer() var m big.Int m.SetBytes(modulus2048()) _benchmarkModInverseBig(&m, b) } func _benchmarkExpBig(m *big.Int, b *testing.B) { b.StopTimer() x := new(big.Int).SetBytes(ones()) x.Mod(x, m) y := new(big.Int).SetBytes(ones()) b.StartTimer() for n := 0; n < b.N; n++ { var z big.Int z.Exp(x, y, m) resultBig = z } } func BenchmarkExpBig(b *testing.B) { b.StopTimer() var m big.Int m.SetUint64(13) _benchmarkExpBig(&m, b) } func BenchmarkLargeExpBig(b *testing.B) { b.StopTimer() var m big.Int m.SetBytes(modulus2048()) _benchmarkExpBig(&m, b) } func BenchmarkSetBytesBig(b *testing.B) { b.StopTimer() bytes := ones() b.StartTimer() for n := 0; n < b.N; n++ { var z big.Int z.SetBytes(bytes) resultBig = z } } func BenchmarkModSqrt3Mod4Big(b *testing.B) { b.StopTimer() p := new(big.Int).SetBytes(prime3Mod4()) // This is a large square modulo p x := new(big.Int).Sub(p, new(big.Int).SetUint64(5)) b.StartTimer() for i := 0; i < b.N; i++ { var z big.Int z.ModSqrt(x, p) resultBig = z } } func BenchmarkAddNat(b *testing.B) { b.StopTimer() var x Nat x.SetBytes(ones()) b.StartTimer() for n := 0; n < b.N; n++ { var z Nat z.Add(&x, &x, _SIZE*8) resultNat = z } } func _benchmarkModAddNat(m *Modulus, b *testing.B) { b.StopTimer() x := new(Nat).SetBytes(ones()) x.Mod(x, m) b.StartTimer() for n := 0; n < b.N; n++ { var z Nat z.ModAdd(x, x, m) resultNat = z } } func BenchmarkModAddNat(b *testing.B) { b.StopTimer() m := ModulusFromUint64(13) _benchmarkModAddNat(m, b) } func BenchmarkLargeModAddNat(b *testing.B) { b.StopTimer() m := ModulusFromBytes(modulus2048()) _benchmarkModAddNat(m, b) } func _benchmarkModNegNat(m *Modulus, b *testing.B) { b.StopTimer() x := new(Nat).SetBytes(ones()) x.Mod(x, m) b.StartTimer() for n := 0; n < b.N; n++ { var z Nat z.ModNeg(x, m) resultNat = z } } func BenchmarkModNegNat(b *testing.B) { b.StopTimer() m := ModulusFromUint64(13) _benchmarkModNegNat(m, b) } func BenchmarkLargeModNegNat(b *testing.B) { b.StopTimer() m := ModulusFromBytes(modulus2048()) _benchmarkModNegNat(m, b) } func BenchmarkMulNat(b *testing.B) { b.StopTimer() var x Nat x.SetBytes(ones()) b.StartTimer() for n := 0; n < b.N; n++ { var z Nat z.Mul(&x, &x, _SIZE*2*8) resultNat = z } } func _benchmarkModMulNat(m *Modulus, b *testing.B) { b.StopTimer() x := new(Nat).SetBytes(ones()) x.Mod(x, m) b.StartTimer() for n := 0; n < b.N; n++ { var z Nat z.ModMul(x, x, m) resultNat = z } } func BenchmarkModMulNat(b *testing.B) { b.StopTimer() m := ModulusFromUint64(13) _benchmarkModMulNat(m, b) } func BenchmarkLargeModMulNat(b *testing.B) { b.StopTimer() m := ModulusFromBytes(modulus2048()) _benchmarkModMulNat(m, b) } func BenchmarkLargeModMulNatEven(b *testing.B) { b.StopTimer() m := ModulusFromBytes(modulus2048Even()) _benchmarkModMulNat(m, b) } func _benchmarkModNat(m *Modulus, b *testing.B) { b.StopTimer() x := new(Nat).SetBytes(doubleOnes()) b.StartTimer() for n := 0; n < b.N; n++ { var z Nat z.Mod(x, m) resultNat = z } } func BenchmarkModNat(b *testing.B) { b.StopTimer() m := ModulusFromUint64(13) _benchmarkModNat(m, b) } func BenchmarkLargeModNat(b *testing.B) { b.StopTimer() m := ModulusFromBytes(modulus2048()) _benchmarkModNat(m, b) } func _benchmarkModInverseNat(m *Modulus, b *testing.B) { b.StopTimer() x := new(Nat).SetBytes(ones()) x.Mod(x, m) b.StartTimer() for n := 0; n < b.N; n++ { var z Nat z.ModInverse(x, m) resultNat = z } } func BenchmarkModInverseNat(b *testing.B) { b.StopTimer() m := ModulusFromUint64(13) _benchmarkModInverseNat(m, b) } func BenchmarkLargeModInverseNat(b *testing.B) { b.StopTimer() m := ModulusFromBytes(modulus2048()) _benchmarkModInverseNat(m, b) } func _benchmarkModInverseEvenNat(m *Modulus, b *testing.B) { b.StopTimer() var x Nat x.SetBytes(ones()) b.StartTimer() for n := 0; n < b.N; n++ { var z Nat z.ModInverse(&x, m) resultNat = z } } func BenchmarkModInverseEvenNat(b *testing.B) { b.StopTimer() m := ModulusFromUint64(14) _benchmarkModInverseEvenNat(m, b) } func BenchmarkLargeModInverseEvenNat(b *testing.B) { b.StopTimer() var one, m Nat m.SetBytes(modulus2048()) one.SetUint64(1) m.Add(&m, &one, 2048) _benchmarkModInverseEvenNat(ModulusFromNat(&m), b) } func _benchmarkExpNat(m *Modulus, b *testing.B) { b.StopTimer() x := new(Nat).SetBytes(ones()) y := new(Nat).SetBytes(ones()) x.Mod(x, m) b.StartTimer() for n := 0; n < b.N; n++ { var z Nat z.Exp(x, y, m) resultNat = z } } func BenchmarkExpNat(b *testing.B) { b.StopTimer() m := ModulusFromUint64(13) _benchmarkExpNat(m, b) } func BenchmarkLargeExpNat(b *testing.B) { b.StopTimer() m := ModulusFromBytes(modulus2048()) _benchmarkExpNat(m, b) } func BenchmarkLargeExpNatEven(b *testing.B) { b.StopTimer() m := ModulusFromBytes(modulus2048Even()) _benchmarkExpNat(m, b) } func BenchmarkSetBytesNat(b *testing.B) { b.StopTimer() bytes := ones() b.StartTimer() for n := 0; n < b.N; n++ { var z Nat z.SetBytes(bytes) resultNat = z } } func BenchmarkMontgomeryMul(b *testing.B) { b.StopTimer() x := new(Nat).SetBytes(ones()) y := new(Nat).SetBytes(ones()) scratch := new(Nat).SetBytes(ones()) out := new(Nat).SetBytes(ones()) m := ModulusFromBytes(modulus2048()) b.StartTimer() for i := 0; i < b.N; i++ { montgomeryMul(x.limbs, y.limbs, out.limbs, scratch.limbs, m) } } func BenchmarkModSqrt3Mod4Nat(b *testing.B) { b.StopTimer() p := new(Nat).SetBytes(prime3Mod4()) // This is a large square modulo p x := new(Nat).Sub(p, new(Nat).SetUint64(5), 256) pMod := ModulusFromNat(p) b.StartTimer() for i := 0; i < b.N; i++ { var z Nat z.ModSqrt(x, pMod) resultNat = z } } func BenchmarkModSqrt1Mod4Nat(b *testing.B) { b.StopTimer() p := new(Nat).SetBytes(prime1Mod4()) // This is a large square modulo p x := new(Nat).Sub(p, new(Nat).SetUint64(6), 256) pMod := ModulusFromNat(p) b.StartTimer() for i := 0; i < b.N; i++ { var z Nat z.ModSqrt(x, pMod) resultNat = z } } func _benchmarkDivNat(m *Modulus, b *testing.B) { b.StopTimer() x := new(Nat).SetBytes(doubleOnes()) b.StartTimer() for n := 0; n < b.N; n++ { var z Nat z.Div(x, m, m.BitLen()) resultNat = z } } func BenchmarkDivNat(b *testing.B) { b.StopTimer() m := ModulusFromUint64(13) _benchmarkDivNat(m, b) } func BenchmarkLargeDivNat(b *testing.B) { b.StopTimer() m := ModulusFromBytes(modulus2048()) _benchmarkDivNat(m, b) } saferith-0.33.0/num_test.go000066400000000000000000000721071422457503400156040ustar00rootroot00000000000000package saferith import ( "bytes" "math/big" "math/rand" "reflect" "testing" "testing/quick" ) func (Nat) Generate(r *rand.Rand, size int) reflect.Value { bytes := make([]byte, r.Int()&0x7F) r.Read(bytes) var n Nat n.SetBytes(bytes) return reflect.ValueOf(n) } func (Modulus) Generate(r *rand.Rand, size int) reflect.Value { bytes := make([]byte, 1+(r.Int()&0x3F)) r.Read(bytes) // Ensure that our number isn't 0, but being even is ok bytes[len(bytes)-1] |= 0b10 n := ModulusFromBytes(bytes) return reflect.ValueOf(*n) } func testBigConversion(x Nat) bool { if !x.checkInvariants() { return false } xBig := x.Big() xNatAgain := new(Nat).SetBig(xBig, x.AnnouncedLen()) if !xNatAgain.checkInvariants() { return false } return x.Eq(xNatAgain) == 1 } func TestBigConversion(t *testing.T) { err := quick.Check(testBigConversion, &quick.Config{}) if err != nil { t.Error(err) } } func testByteVsBytes(x Nat) bool { if !x.checkInvariants() { return false } bytes := x.Bytes() for i := 0; i < len(bytes); i++ { if x.Byte(i) != bytes[len(bytes)-i-1] { return false } } return true } func TestByteVsBytes(t *testing.T) { err := quick.Check(testByteVsBytes, &quick.Config{}) if err != nil { t.Error(err) } } func testSetBytesRoundTrip(expected []byte) bool { x := new(Nat).SetBytes(expected) actual := x.Bytes() return bytes.Equal(expected, actual) } func TestSetBytesRoundTrip(t *testing.T) { err := quick.Check(testSetBytesRoundTrip, &quick.Config{}) if err != nil { t.Error(err) } } func testNatMarshalBinaryRoundTrip(x Nat) bool { out, err := x.MarshalBinary() if err != nil { return false } y := new(Nat) err = y.UnmarshalBinary(out) if err != nil { return false } return x.Eq(y) == 1 } func TestNatMarshalBinaryRoundTrip(t *testing.T) { err := quick.Check(testNatMarshalBinaryRoundTrip, &quick.Config{}) if err != nil { t.Error(err) } } func testModulusMarshalBinaryRoundTrip(x Modulus) bool { out, err := x.MarshalBinary() if err != nil { return false } y := new(Modulus) err = y.UnmarshalBinary(out) if err != nil { return false } _, eq, _ := x.Cmp(y) return eq == 1 } func TestModulusMarshalBinaryRoundTrip(t *testing.T) { err := quick.Check(testModulusMarshalBinaryRoundTrip, &quick.Config{}) if err != nil { t.Error(err) } } func testAddZeroIdentity(n Nat) bool { if !n.checkInvariants() { return false } var x, zero Nat zero.SetUint64(0) x.Add(&n, &zero, len(n.limbs)*_W) if !x.checkInvariants() { return false } if n.Eq(&x) != 1 { return false } x.Add(&zero, &n, len(n.limbs)*_W) if !x.checkInvariants() { return false } return n.Eq(&x) == 1 } func TestAddZeroIdentity(t *testing.T) { err := quick.Check(testAddZeroIdentity, &quick.Config{}) if err != nil { t.Error(err) } } func testAddCommutative(a Nat, b Nat) bool { if !(a.checkInvariants() && b.checkInvariants()) { return false } var aPlusB, bPlusA Nat for _, x := range []int{256, 128, 64, 32, 8} { aPlusB.Add(&a, &b, x) bPlusA.Add(&b, &a, x) if !(aPlusB.checkInvariants() && bPlusA.checkInvariants()) { return false } if aPlusB.Eq(&bPlusA) != 1 { return false } } return true } func TestAddCommutative(t *testing.T) { err := quick.Check(testAddCommutative, &quick.Config{}) if err != nil { t.Error(err) } } func testCondAssign(a Nat, b Nat) bool { if !(a.checkInvariants() && b.checkInvariants()) { return false } shouldBeA := new(Nat).SetNat(&a) shouldBeB := new(Nat).SetNat(&a) shouldBeA.CondAssign(0, &b) shouldBeB.CondAssign(1, &b) if !(shouldBeA.checkInvariants() && shouldBeB.checkInvariants()) { return false } return shouldBeA.Eq(&a) == 1 && shouldBeB.Eq(&b) == 1 } func TestCondAssign(t *testing.T) { err := quick.Check(testCondAssign, &quick.Config{}) if err != nil { t.Error(err) } } func testAddAssociative(a Nat, b Nat, c Nat) bool { if !(a.checkInvariants() && b.checkInvariants() && c.checkInvariants()) { return false } var order1, order2 Nat for _, x := range []int{256, 128, 64, 32, 8} { order1 = *order1.Add(&a, &b, x) order1.Add(&order1, &c, x) order2 = *order2.Add(&b, &c, x) order2.Add(&a, &order2, x) if !(order1.checkInvariants() && order2.checkInvariants()) { return false } if order1.Eq(&order2) != 1 { return false } } return true } func TestAddAssociative(t *testing.T) { err := quick.Check(testAddAssociative, &quick.Config{}) if err != nil { t.Error(err) } } func testLshCompositionIsAdditionOfShifts(x Nat, s1 uint8, s2 uint8) bool { way1 := new(Nat).Lsh(&x, uint(s1), -1) way1.Lsh(way1, uint(s2), -1) way2 := new(Nat).Lsh(&x, uint(s1)+uint(s2), -1) return way1.Eq(way2) == 1 } func TestLshCompositionIsAdditionOfShifts(t *testing.T) { err := quick.Check(testLshCompositionIsAdditionOfShifts, &quick.Config{}) if err != nil { t.Error(err) } } func testRshCompositionIsAdditionOfShifts(x Nat, s1 uint8, s2 uint8) bool { way1 := new(Nat).Rsh(&x, uint(s1), -1) way1.Rsh(way1, uint(s2), -1) way2 := new(Nat).Rsh(&x, uint(s1)+uint(s2), -1) return way1.Eq(way2) == 1 } func TestRshCompositionIsAdditionOfShifts(t *testing.T) { err := quick.Check(testRshCompositionIsAdditionOfShifts, &quick.Config{}) if err != nil { t.Error(err) } } func testLshRshRoundTrip(x Nat, s uint8) bool { z := new(Nat).Lsh(&x, uint(s), -1) z.Rsh(z, uint(s), -1) return x.Eq(z) == 1 } func TestRshLshRoundTrip(t *testing.T) { err := quick.Check(testRshLshRoundTrip, &quick.Config{}) if err != nil { t.Error(err) } } func testRshLshRoundTrip(x Nat, s uint8) bool { if t := x.TrueLen(); int(s) > t { s = uint8(t) } singleShift := s % _W limbShifts := (s - singleShift) / _W if len(x.limbs) > 0 { i := 0 for ; i < int(limbShifts) && i < len(x.limbs)-1; i++ { x.limbs[i] = 0 } mask := limbMask(int(singleShift)) x.limbs[i] &= ^mask } z := new(Nat).Rsh(&x, uint(s), -1) z.Lsh(z, uint(s), -1) return x.Eq(z) == 1 } func TestLshRshRoundTrip(t *testing.T) { err := quick.Check(testLshRshRoundTrip, &quick.Config{}) if err != nil { t.Error(err) } } func testModAddNegIsSub(a Nat, b Nat, m Modulus) bool { if !(a.checkInvariants() && b.checkInvariants()) { return false } subbed := new(Nat).ModSub(&a, &b, &m) negated := new(Nat).ModNeg(&b, &m) addWithNegated := new(Nat).ModAdd(&a, negated, &m) if !(subbed.checkInvariants() && negated.checkInvariants() && addWithNegated.checkInvariants()) { return false } return subbed.Eq(addWithNegated) == 1 } func TestModAddNegIsSub(t *testing.T) { err := quick.Check(testModAddNegIsSub, &quick.Config{}) if err != nil { t.Error(err) } } func testMulCommutative(a Nat, b Nat) bool { if !(a.checkInvariants() && b.checkInvariants()) { return false } var aTimesB, bTimesA Nat for _, x := range []int{256, 128, 64, 32, 8} { aTimesB.Mul(&a, &b, x) bTimesA.Mul(&b, &a, x) if !(aTimesB.checkInvariants() && bTimesA.checkInvariants()) { return false } if aTimesB.Eq(&bTimesA) != 1 { return false } } return true } func TestMulCommutative(t *testing.T) { err := quick.Check(testMulCommutative, &quick.Config{}) if err != nil { t.Error(err) } } func testMulAssociative(a Nat, b Nat, c Nat) bool { if !(a.checkInvariants() && b.checkInvariants() && c.checkInvariants()) { return false } var order1, order2 Nat for _, x := range []int{256, 128, 64, 32, 8} { order1 = *order1.Mul(&a, &b, x) order1.Mul(&order1, &c, x) order2 = *order2.Mul(&b, &c, x) order2.Mul(&a, &order2, x) if !(order1.checkInvariants() && order2.checkInvariants()) { return false } if order1.Eq(&order2) != 1 { return false } } return true } func TestMulAssociative(t *testing.T) { err := quick.Check(testMulAssociative, &quick.Config{}) if err != nil { t.Error(err) } } func testMulOneIdentity(n Nat) bool { if !n.checkInvariants() { return false } var x, one Nat one.SetUint64(1) x.Mul(&n, &one, len(n.limbs)*_W) if !x.checkInvariants() { return false } if n.Eq(&x) != 1 { return false } x.Mul(&one, &n, len(n.limbs)*_W) if !x.checkInvariants() { return false } return n.Eq(&x) == 1 } func TestMulOneIdentity(t *testing.T) { err := quick.Check(testMulOneIdentity, &quick.Config{}) if err != nil { t.Error(err) } } func testModIdempotent(a Nat, m Modulus) bool { if !a.checkInvariants() { return false } var way1, way2 Nat way1.Mod(&a, &m) way2.Mod(&way1, &m) if !(way1.checkInvariants() && way2.checkInvariants()) { return false } return way1.Eq(&way2) == 1 } func TestModIdempotent(t *testing.T) { err := quick.Check(testModIdempotent, &quick.Config{}) if err != nil { t.Error(err) } } func testModAddCommutative(a Nat, b Nat, m Modulus) bool { if !(a.checkInvariants() && b.checkInvariants()) { return false } var aPlusB, bPlusA Nat aPlusB.ModAdd(&a, &b, &m) bPlusA.ModAdd(&b, &a, &m) if !(aPlusB.checkInvariants() && bPlusA.checkInvariants()) { return false } return aPlusB.Eq(&bPlusA) == 1 } func TestModAddCommutative(t *testing.T) { err := quick.Check(testModAddCommutative, &quick.Config{}) if err != nil { t.Error(err) } } func testModAddAssociative(a Nat, b Nat, c Nat, m Modulus) bool { if !(a.checkInvariants() && b.checkInvariants() && c.checkInvariants()) { return false } var order1, order2 Nat order1 = *order1.ModAdd(&a, &b, &m) order1.ModAdd(&order1, &c, &m) order2 = *order2.ModAdd(&b, &c, &m) order2.ModAdd(&a, &order2, &m) if !(order1.checkInvariants() && order2.checkInvariants()) { return false } return order1.Eq(&order2) == 1 } func TestModAddAssociative(t *testing.T) { err := quick.Check(testModAddAssociative, &quick.Config{}) if err != nil { t.Error(err) } } func testModAddModSubInverse(a Nat, b Nat, m Modulus) bool { if !(a.checkInvariants() && b.checkInvariants()) { return false } var c Nat c.ModAdd(&a, &b, &m) c.ModSub(&c, &b, &m) expected := new(Nat) expected.Mod(&a, &m) if !(c.checkInvariants() && expected.checkInvariants()) { return false } return c.Eq(expected) == 1 } func TestModAddModSubInverse(t *testing.T) { err := quick.Check(testModAddModSubInverse, &quick.Config{}) if err != nil { t.Error(err) } } func testModMulCommutative(a Nat, b Nat, m Modulus) bool { if !(a.checkInvariants() && b.checkInvariants()) { return false } var aPlusB, bPlusA Nat aPlusB.ModMul(&a, &b, &m) bPlusA.ModMul(&b, &a, &m) if !(aPlusB.checkInvariants() && bPlusA.checkInvariants()) { return false } return aPlusB.Eq(&bPlusA) == 1 } func TestModMulCommutative(t *testing.T) { err := quick.Check(testModMulCommutative, &quick.Config{}) if err != nil { t.Error(err) } } func testModMulAssociative(a Nat, b Nat, c Nat, m Modulus) bool { if !(a.checkInvariants() && b.checkInvariants() && c.checkInvariants()) { return false } var order1, order2 Nat order1 = *order1.ModMul(&a, &b, &m) order1.ModMul(&order1, &c, &m) order2 = *order2.ModMul(&b, &c, &m) order2.ModMul(&a, &order2, &m) if !(order1.checkInvariants() && order2.checkInvariants()) { return false } return order1.Eq(&order2) == 1 } func TestModMulAssociative(t *testing.T) { err := quick.Check(testModMulAssociative, &quick.Config{}) if err != nil { t.Error(err) } } func testModInverseMultiplication(a Nat) bool { if !a.checkInvariants() { return false } var scratch, one, zero Nat zero.SetUint64(0) one.SetUint64(1) for _, x := range []uint64{3, 5, 7, 13, 19, 47, 97} { m := ModulusFromUint64(x) scratch.Mod(&a, m) if scratch.Eq(&zero) == 1 { continue } scratch.ModInverse(&a, m) scratch.ModMul(&scratch, &a, m) if !scratch.checkInvariants() { return false } if scratch.Eq(&one) != 1 { return false } } return true } func TestModInverseMultiplication(t *testing.T) { err := quick.Check(testModInverseMultiplication, &quick.Config{}) if err != nil { t.Error(err) } } func testModInverseMinusOne(a Nat) bool { if !a.checkInvariants() { return false } // Clear out the lowest bit if len(a.limbs) > 0 { a.limbs[0] &= ^Word(1) } if a.EqZero() == 1 { return true } var one Nat one.SetUint64(1) z := new(Nat).Add(&a, &one, -1) m := ModulusFromNat(z) z.ModInverse(&a, m) if !z.checkInvariants() { return false } return z.Eq(&a) == 1 } func TestModInverseMinusOne(t *testing.T) { err := quick.Check(testModInverseMinusOne, &quick.Config{}) if err != nil { t.Error(err) } } func testModInverseEvenMinusOne(a Nat) bool { if !a.checkInvariants() { return false } // Set the lowest bit if len(a.limbs) != 0 { a.limbs[0] |= 1 } var zero Nat zero.SetUint64(0) if a.Eq(&zero) == 1 { return true } var one Nat one.SetUint64(1) var z Nat z.Add(&a, &one, a.AnnouncedLen()+1) if !z.checkInvariants() { return false } z2 := new(Nat).ModInverse(&a, ModulusFromNat(&z)) if !z2.checkInvariants() { return false } return z2.Eq(&a) == 1 } func TestModInverseEvenMinusOne(t *testing.T) { err := quick.Check(testModInverseEvenMinusOne, &quick.Config{}) if err != nil { t.Error(err) } } func testModInverseEvenOne(a Nat) bool { if !a.checkInvariants() { return false } // Clear the lowest bit if len(a.limbs) > 0 { a.limbs[0] &= ^Word(1) } var zero Nat zero.SetUint64(0) if a.Eq(&zero) == 1 { return true } var one Nat one.SetUint64(1) var z Nat m := ModulusFromNat(&a) z.ModInverse(&one, m) if !z.checkInvariants() { return false } return z.Eq(&one) == 1 } func TestModInverseEvenOne(t *testing.T) { err := quick.Check(testModInverseEvenOne, &quick.Config{}) if err != nil { t.Error(err) } } func testExpAddition(x Nat, a Nat, b Nat, m Modulus) bool { if !(x.checkInvariants() && a.checkInvariants() && b.checkInvariants()) { return false } var expA, expB, aPlusB, way1, way2 Nat expA.Exp(&x, &a, &m) expB.Exp(&x, &b, &m) // Enough bits to hold the full amount cap := len(a.limbs) if l := len(b.limbs); l > cap { cap = l } aPlusB.Add(&a, &b, cap*_W+1) way1.ModMul(&expA, &expB, &m) way2.Exp(&x, &aPlusB, &m) if !(way1.checkInvariants() && way2.checkInvariants() && aPlusB.checkInvariants()) { return false } return way1.Eq(&way2) == 1 } func TestExpAddition(t *testing.T) { err := quick.Check(testExpAddition, &quick.Config{}) if err != nil { t.Error(err) } } func testSqrtRoundTrip(x *Nat, p *Modulus) bool { xSquared := x.ModMul(x, x, p) xRoot := new(Nat).ModSqrt(xSquared, p) if !(xRoot.checkInvariants() && xSquared.checkInvariants()) { return false } xRoot.ModMul(xRoot, xRoot, p) if !xRoot.checkInvariants() { return false } return xRoot.Eq(xSquared) == 1 } func testModSqrt(x Nat) bool { if !x.checkInvariants() { return false } p := ModulusFromBytes([]byte{ 13, }) if !testSqrtRoundTrip(&x, p) { return false } p = ModulusFromUint64((1 << 61) - 1) if !testSqrtRoundTrip(&x, p) { return false } p = ModulusFromBytes([]byte{ 0x1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }) if !testSqrtRoundTrip(&x, p) { return false } p = ModulusFromBytes([]byte{ 0x3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfb, }) if !testSqrtRoundTrip(&x, p) { return false } // 2^224 - 2^96 + 1 p = ModulusFromBytes([]byte{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 1, }) return testSqrtRoundTrip(&x, p) } func TestModSqrt(t *testing.T) { err := quick.Check(testModSqrt, &quick.Config{}) if err != nil { t.Error(err) } } func testMultiplyThenDivide(x Nat, m Modulus) bool { if !x.checkInvariants() { return false } mNat := &m.nat xm := new(Nat).Mul(&x, mNat, x.AnnouncedLen()+mNat.AnnouncedLen()) divided := new(Nat).Div(xm, &m, x.AnnouncedLen()) if divided.Eq(&x) != 1 { return false } // Adding m - 1 shouldn't change the result either xm.Add(xm, new(Nat).Sub(mNat, new(Nat).SetUint64(1), xm.AnnouncedLen()), xm.AnnouncedLen()) divided = new(Nat).Div(xm, &m, x.AnnouncedLen()) if !(divided.checkInvariants() && xm.checkInvariants()) { return false } return divided.Eq(&x) == 1 } func TestMultiplyThenDivide(t *testing.T) { err := quick.Check(testMultiplyThenDivide, &quick.Config{}) if err != nil { t.Error(err) } } func TestUint64Creation(t *testing.T) { var x, y Nat x.SetUint64(0) y.SetUint64(0) if x.Eq(&y) != 1 { t.Errorf("%+v != %+v", x, y) } x.SetUint64(1) if x.Eq(&y) == 1 { t.Errorf("%+v == %+v", x, y) } x.SetUint64(0x1111) y.SetUint64(0x1111) if x.Eq(&y) != 1 { t.Errorf("%+v != %+v", x, y) } } func TestAddExamples(t *testing.T) { var x, y, z Nat x.SetUint64(100) y.SetUint64(100) z.SetUint64(200) x = *x.Add(&x, &y, 8) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } z.SetUint64(300 - 256) x = *x.Add(&x, &y, 8) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetUint64(0xf3e5487232169930) y.SetUint64(0) z.SetUint64(0xf3e5487232169930) var x2 Nat x2.Add(&x, &y, 128) if x2.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } } func TestSubExamples(t *testing.T) { x := new(Nat).SetUint64(100) y := new(Nat).SetUint64(200) y.Sub(y, x, 8) if y.Eq(x) != 1 { t.Errorf("%+v != %+v", y, x) } } func TestMulExamples(t *testing.T) { var x, y, z Nat x.SetUint64(10) y.SetUint64(10) z.SetUint64(100) x = *x.Mul(&x, &y, 8) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } z.SetUint64(232) x = *x.Mul(&x, &y, 8) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } } func TestModAddExamples(t *testing.T) { m := ModulusFromUint64(13) var x, y, z Nat x.SetUint64(40) y.SetUint64(40) x = *x.ModAdd(&x, &y, m) z.SetUint64(2) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } } func TestModMulExamples(t *testing.T) { var x, y, z Nat m := ModulusFromUint64(13) x.SetUint64(40) y.SetUint64(40) x = *x.ModMul(&x, &y, m) z.SetUint64(1) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } m = ModulusFromBytes([]byte{1, 0, 0, 0, 0, 0, 0, 0, 1}) x.SetUint64(1) x = *x.ModMul(&x, &x, m) z.SetUint64(1) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } m = ModulusFromBytes([]byte{1, 0, 0, 0, 0, 0, 0, 0, 1}) x.SetUint64(16390320477281102916) y.SetUint64(13641051446569424315) x = *x.ModMul(&x, &y, m) z.SetUint64(12559215458690093993) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } } func TestModExamples(t *testing.T) { var x, test Nat x.SetUint64(40) m := ModulusFromUint64(13) x.Mod(&x, m) test.SetUint64(1) if x.Eq(&test) != 1 { t.Errorf("%+v != %+v", x, test) } m = ModulusFromBytes([]byte{13, 0, 0, 0, 0, 0, 0, 0, 1}) x.SetBytes([]byte{41, 0, 0, 0, 0, 0, 0, 0, 0}) x.Mod(&x, m) test.SetBytes([]byte{1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD}) if x.Eq(&test) != 1 { t.Errorf("%+v != %+v", x, test) } } func TestModInverseExamples(t *testing.T) { x, z := new(Nat), new(Nat) x.SetUint64(2) m := ModulusFromUint64(13) x = x.ModInverse(x, m) z.SetUint64(7) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetUint64(16359684999990746055) m = ModulusFromUint64(7) x = x.ModInverse(x, m) z.SetUint64(3) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetUint64(461423694560) m = ModulusFromUint64(461423694561) z.ModInverse(x, m) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetHex("2AFAE74A613B0764098D86") m, _ = ModulusFromHex("2AFAE74A613B0764098D87") z.ModInverse(x, m) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetHex("930330931B69B44B8E") m, _ = ModulusFromHex("930330931B69B44B8F") z.ModInverse(x, m) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetHex("DDAB4CDD41300C5F9511FE68") m, _ = ModulusFromHex("DDAB4CDD41300C5F9511FE69") z.ModInverse(x, m) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetHex("A200F1C011C86FFF9A") m, _ = ModulusFromHex("A200F1C011C86FFF9B") z.ModInverse(x, m) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetHex("E7B6E7C1CCB2CEDE797F87937E") m, _ = ModulusFromHex("E7B6E7C1CCB2CEDE797F87937F") z.ModInverse(x, m) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } } func TestExpExamples(t *testing.T) { var x, y, z Nat x.SetUint64(3) y.SetUint64(345) m := ModulusFromUint64(13) x = *x.Exp(&x, &y, m) z.SetUint64(1) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } m = ModulusFromBytes([]byte{1, 0, 0, 0, 0, 0, 0, 0, 1}) x.SetUint64(1) y.SetUint64(2) x = *x.Exp(&x, &y, m) z.SetUint64(1) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } } func TestSetBytesExamples(t *testing.T) { var x, z Nat x.SetBytes([]byte{0x12, 0x34, 0x56}) z.SetUint64(0x123456) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetBytes([]byte{0x00, 0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}) z.SetUint64(0xAABBCCDDEEFF) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } } func TestFillBytesExamples(t *testing.T) { var x Nat expected := []byte{0x00, 0x00, 0x00, 0x00, 0xAA, 0xBB, 0xCC, 0xDD} x.SetBytes(expected) buf := make([]byte, 8) x.FillBytes(buf) if !bytes.Equal(expected, buf) { t.Errorf("%+v != %+v", expected, buf) } } func TestBytesExamples(t *testing.T) { var x Nat expected := []byte{0x11, 0x22, 0x33, 0x44, 0xAA, 0xBB, 0xCC, 0xDD} x.SetBytes(expected) out := x.Bytes() if !bytes.Equal(expected, out) { t.Errorf("%+v != %+v", expected, out) } } func TestByteExample(t *testing.T) { x := new(Nat).SetBytes([]byte{8, 7, 6, 5, 4, 3, 2, 1, 0}) for i := 0; i <= 8; i++ { expected := byte(i) actual := x.Byte(i) if expected != actual { t.Errorf("%+v != %+v", expected, actual) } } } func TestModInverseEvenExamples(t *testing.T) { var z, x Nat x.SetUint64(9) m := ModulusFromUint64(10) x.ModInverse(&x, m) z.SetUint64(9) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetUint64(1) m = ModulusFromUint64(10) x.ModInverse(&x, m) z.SetUint64(1) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetUint64(19) x.ModInverse(&x, m) z.SetUint64(9) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetUint64(99) x.ModInverse(&x, m) z.SetUint64(9) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetUint64(999) m = ModulusFromUint64(1000) x.ModInverse(&x, m) z.SetUint64(999) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } // There's an edge case when the modulus is much larger than the input, // in which case when we do m^-1 mod x, we need to first calculate the remainder // of m. x.SetUint64(3) m = ModulusFromBytes([]byte{1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}) x.ModInverse(&x, m) z.SetBytes([]byte{0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAB}) if x.Eq(&z) != 1 { t.Errorf("%+v != %+v", x, z) } } func TestModSubExamples(t *testing.T) { m := ModulusFromUint64(13) x := new(Nat).SetUint64(0) y := new(Nat).SetUint64(1) x.ModSub(x, y, m) z := new(Nat).SetUint64(12) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } } func TestModNegExamples(t *testing.T) { m := ModulusFromUint64(13) x := new(Nat).SetUint64(0) x.ModNeg(x, m) z := new(Nat).SetUint64(0) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } x.SetUint64(1) x.ModNeg(x, m) z.SetUint64(12) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } } func TestModSqrtExamples(t *testing.T) { m := ModulusFromUint64(13) x := new(Nat).SetUint64(4) x.ModSqrt(x, m) z := new(Nat).SetUint64(11) if x.Eq(z) != 1 { t.Errorf("%+v != %+v", x, z) } m = ModulusFromUint64(1) x.SetUint64(13) x.ModSqrt(x, m) if x.EqZero() != 1 { t.Errorf("%+v != 0", x) } } func TestBigExamples(t *testing.T) { theBytes := []byte{0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88} x := new(Nat).SetBytes(theBytes) expected := new(big.Int).SetBytes(theBytes) actual := x.Big() if expected.Cmp(actual) != 0 { t.Errorf("%+v != %+v", expected, actual) } expectedNat := x actualNat := new(Nat).SetBig(expected, len(theBytes)*8) if expectedNat.Eq(actualNat) != 1 { t.Errorf("%+v != %+v", expectedNat, actualNat) } } func TestDivExamples(t *testing.T) { x := &Nat{announced: 3 * _W, limbs: []Word{0, 64, 64}} n := &Nat{announced: 2 * _W, limbs: []Word{1, 1}} nMod := ModulusFromNat(n) expectedNat := &Nat{announced: 2 * _W, limbs: []Word{0, 64}} actualNat := new(Nat).Div(x, nMod, 2*_W) if expectedNat.Eq(actualNat) != 1 { t.Errorf("%+v != %+v", expectedNat, actualNat) } nMod = ModulusFromUint64(1) actualNat.Div(x, nMod, x.AnnouncedLen()) if x.Eq(actualNat) != 1 { t.Errorf("%+v != %+v", x, actualNat) } } func TestCoprimeExamples(t *testing.T) { x := new(Nat).SetUint64(5 * 7 * 13) y := new(Nat).SetUint64(3 * 7 * 11) expected := Choice(0) actual := x.Coprime(y) if expected != actual { t.Errorf("%+v != %+v", expected, actual) } x.SetUint64(2) y.SetUint64(13) expected = Choice(1) actual = x.Coprime(y) if expected != actual { t.Errorf("%+v != %+v", expected, actual) } x.SetUint64(13) y.SetUint64(2) expected = Choice(1) actual = x.Coprime(y) if expected != actual { t.Errorf("%+v != %+v", expected, actual) } x.SetUint64(2 * 13 * 11) y.SetUint64(2 * 5 * 7) expected = Choice(0) actual = x.Coprime(y) if expected != actual { t.Errorf("%+v != %+v", expected, actual) } // check x,y with 0 limbs x = new(Nat) y = new(Nat) expected = Choice(0) actual = x.Coprime(y) if expected != actual { t.Errorf("%+v != %+v", expected, actual) } // check x,y=0 with 1 empty limb x.SetUint64(0) y.SetUint64(0) expected = Choice(0) actual = x.Coprime(y) if expected != actual { t.Errorf("%+v != %+v", expected, actual) } } func TestTrueLenExamples(t *testing.T) { x := new(Nat).SetUint64(0x0000_0000_0000_0001) expected := 1 actual := x.TrueLen() if expected != actual { t.Errorf("%+v != %+v", expected, actual) } x.SetUint64(0x0000_0000_0100_0001) expected = 25 actual = x.TrueLen() if expected != actual { t.Errorf("%+v != %+v", expected, actual) } } func TestTruncateExamples(t *testing.T) { x := new(Nat).SetUint64(0xAABB) x.Resize(16) expected := []byte{0xAA, 0xBB} actual := x.Bytes() if !bytes.Equal(expected, actual) { t.Errorf("%+v != %+v", expected, actual) } x.Resize(8) expectedNat := new(Nat).SetUint64(0xBB) if x.Eq(expectedNat) != 1 { t.Errorf("%+v != %+v", expectedNat, x) } expected = []byte{0xBB} actual = x.Bytes() if !bytes.Equal(expected, actual) { t.Errorf("%+v != %+v", expected, actual) } } func TestHexExamples(t *testing.T) { x := new(Nat).SetUint64(0x0123456789ABCDEF) expected := "0123456789ABCDEF" actual := x.Hex() if expected != actual { t.Errorf("%+v != %+v", expected, actual) } x.SetHex("0123456789ABCDEF") expectedNat := new(Nat).SetUint64(0x0123456789ABCDEF) if expectedNat.Eq(x) != 1 { t.Errorf("%+v != %+v", expectedNat, x) } } func TestDivEdgeCase(t *testing.T) { x, _ := new(Nat).SetHex("B857C2BFBB8F9C8529B37228BE59017114876E17623A605308BFF084CBA97565BC97F9A2ED65895572B157AF6CADE2D7DD018772149E3216DA6D5B57EA703AF1598E23F3A79637C3072053427732C9E336AF983AB8FFD4F0AD08F042C8D3709FC6CC7247AE6C5D1181183FDBC4A1252D6B8C124FF50D6C72579AC2EC75F79FFD040F61F771D8E4116B40E595DB898A702DC99A882A37F091CDC897171921D744E5F2ACA5F466E4D9087B8D04E90CA99DBB259329C30CD925E046FFCB0CDB17FF2EB9C7475D4280C14711B1538F1282A2259348EAB246296D03051774D34D968329C336997EA4EEEBE9D8EE2EBAEBEF4B97076DF9431556F219DFEEFB58D9828E6AB9944C6717AD201331C8A12A11544389251E9A80388378F5B5596D129DDB5BC80F4D1AC993F0E6EF65AD7F832189DA2BDA0E642B6F1CDC539F07913FCFD65BCDE7D7CD2B7223D37B3666D58879B8EE61D61CE3683B6168F392B61A7C99F162C12138CD598770CC7604577E67B8A28C96AF7BDCB24CBD9B0E2801A2F122EFF7A21249C65BA49BD39B9F6B62BD4B0B16EBA1B8FC4AA2EFD03AD4D08AE17371D4B0A88020B77BCD072063DE9EB3F1FCC54FD2D35E587A424C7F62090E6A82B4839ED376BC572882E415F0A3277AF19E9A8BD4F19C69BA445ADAEAB178CE6952BE8140B0FACF0E7E045B9B8A54986481F8279D78048959FAB13B41AC11EB12AA4C") nNat, _ := new(Nat).SetHex("D93C94E373D1B82924130A345FA7B8664AAFF9F335C0E6E79DCFEF49C88DC444885CA953F12BAA4A67B7B21C2FF6B4EECF6A750C76A456B2C800AFCBD0660CA03CB256A594C0D46B00118D6179F845D91EE0D4AFB2168E0FBFAB9958FE3A831950C8D8F402E4CD72C90128F1AE3BE986CE5FFD2EABC3363DE1EEB71BBC7245F4C78899301031803F0AE5B09C803E5E02E18FFA540202E65C29D1692058C34F34B9C9F42482E31436511B23A80F4642DB06BCE8E7C1B0A54E537418B411E4856277B9EC30C0103E1C7881E85F29AD6F7C27109DEEEC1676EE6A74E9641440A9E1095076CFBDD23FFF84A2C683EB19EBEE82811A8B6771CC7AF01DF85BA8A66FCD") n := ModulusFromNat(nNat) expected, _ := new(Nat).SetHex("D93C94E373D1B82924130A345FA7B8664AAFF9F335C0E6E79DCFEF49C88DC444885CA953F12BAA4A67B7B21C2FF6B4EECF6A750C76A456B2C800AFCBD0660CA03CB256A594C0D46B00118D6179F845D91EE0D4AFB2168E0FBFAB9958FE3A831950C8D8F402E4CD72C90128F1AE3BE986CE5FFD2EABC3363DE1EEB71BBC7245F2EEA5667ECBA323F12A6765DBA7C58145553B4CCA69B657C0048E06A6E9DD3AEEA09ADEAF46B7A979D10658FB7F22CAB762145FC368D5C4AAC7453E2BFDFC613134C41630993A75904EF63F91E3388ABAF40867AB499B62473B8FD437BB3FABD24D50FFB92903D6BA33E9E337759456E802FDDA7E3F84D5523442D6A25F058F7C") actual := x.Div(x, n, -1) if expected.Eq(actual) != 1 { t.Errorf("%+v != %+v", expected, actual) } x, _ = new(Nat).SetHex("BC5B56830516E486DD0C5C76DF5838511BF68ECB4503FDE3A76C") n = ModulusFromUint64(0xDF) expected, _ = new(Nat).SetHex("D83AEF5E2848331DB0D83C3A6690E5F5CB268613D33F212A14") actual = x.Div(x, n, -1) if expected.Eq(actual) != 1 { t.Errorf("%+v != %+v", expected, actual) } } func TestLshExamples(t *testing.T) { x := new(Nat).SetUint64(1).Resize(1) expected := new(Nat).SetUint64(32) actual := x.Lsh(x, 5, -1) if expected.Eq(actual) != 1 { t.Errorf("%+v != %+v", expected, actual) } } func TestRshExamples(t *testing.T) { x := new(Nat).SetUint64(32) expected := new(Nat).SetUint64(1) actual := x.Rsh(x, 5, -1) if expected.Eq(actual) != 1 { t.Errorf("%+v != %+v", expected, actual) } }